From: John Koleszar Date: Wed, 7 Nov 2012 00:59:01 +0000 (-0800) Subject: Rough merge of master into experimental X-Git-Tag: v1.3.0~1217^2~124 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=7b8dfcb5a2cfb01ee7a6009d945d06559b564d06;p=libvpx Rough merge of master into experimental Creates a merge between the master and experimental branches. Fixes a number of conflicts in the build system to allow *either* VP8 or VP9 to be built. Specifically either: $ configure --disable-vp9 $ configure --disable-vp8 --disable-unit-tests VP9 still exports its symbols and files as VP8, so that will be resolved in the next commit. Unit tests are broken in VP9, but this isn't a new issue. They are fixed upstream on origin/experimental as of this writing, but rebasing this merge proved difficult, so will tackle that in a second merge commit. Change-Id: I2b7d852c18efd58d1ebc621b8041fe0260442c21 --- 7b8dfcb5a2cfb01ee7a6009d945d06559b564d06 diff --cc configure index 638d0df1e,b3c5fe90d..bd3bf1641 --- a/configure +++ b/configure @@@ -31,7 -33,7 +33,8 @@@ Advanced options ${toggle_debug_libs} in/exclude debug version of libraries ${toggle_md5} support for output of checksum data ${toggle_static_msvcrt} use static MSVCRT (VS builds only) + ${toggle_vp8} VP8 codec support + ${toggle_vp9} VP9 codec support ${toggle_internal_stats} output of encoder internal stats for debug, if supported (encoders) ${toggle_mem_tracker} track memory usage ${toggle_postproc} postprocessing @@@ -158,20 -171,24 +172,29 @@@ enable optimization enable fast_unaligned #allow unaligned accesses, if supported by hw enable md5 enable spatial_resampling + enable multithread enable os_support + enable temporal_denoising [ -d ${source_path}/../include ] && enable alt_tree_layout - for d in vp9; do -for d in vp8; do ++for d in vp8 vp9; do [ -d ${source_path}/${d} ] && disable alt_tree_layout; done if ! enabled alt_tree_layout; then # development environment + [ -d ${source_path}/vp8 ] && CODECS="${CODECS} vp8_encoder vp8_decoder" +[ -d ${source_path}/vp9 ] && CODECS="${CODECS} vp9_encoder vp9_decoder" else # customer environment - [ -f ${source_path}/../include/vpx/vp8cx.h ] && CODECS="${CODECS} vp9_encoder" - [ -f ${source_path}/../include/vpx/vp8dx.h ] && CODECS="${CODECS} vp9_decoder" + [ -f ${source_path}/../include/vpx/vp8cx.h ] && CODECS="${CODECS} vp8_encoder" + [ -f ${source_path}/../include/vpx/vp8dx.h ] && CODECS="${CODECS} vp8_decoder" ++[ -f ${source_path}/../include/vpx/vp9cx.h ] && CODECS="${CODECS} vp9_encoder" ++[ -f ${source_path}/../include/vpx/vp9dx.h ] && CODECS="${CODECS} vp9_decoder" + [ -f ${source_path}/../include/vpx/vp8cx.h ] || disable vp8_encoder + [ -f ${source_path}/../include/vpx/vp8dx.h ] || disable vp8_decoder ++[ -f ${source_path}/../include/vpx/vp9cx.h ] || disable vp9_encoder ++[ -f ${source_path}/../include/vpx/vp9dx.h ] || disable vp9_decoder [ -f ${source_path}/../lib/*/*mt.lib ] && soft_enable static_msvcrt fi @@@ -265,8 -274,8 +291,10 @@@ CONFIG_LIST= postproc_visualizer os_support unit_tests + multi_res_encoding + temporal_denoising + experimental + ${EXPERIMENT_LIST} " CMDLINE_SELECT=" extra_warnings @@@ -306,7 -320,8 +339,9 @@@ small postproc_visualizer unit_tests + multi_res_encoding + temporal_denoising + experimental " process_cmdline() { @@@ -512,8 -537,10 +569,11 @@@ process_toolchain() check_add_cflags -Wpointer-arith check_add_cflags -Wtype-limits check_add_cflags -Wcast-qual - check_add_cflags -Wundef + check_add_cflags -Wvla + check_add_cflags -Wimplicit-function-declaration + check_add_cflags -Wuninitialized + check_add_cflags -Wunused-variable + check_add_cflags -Wunused-but-set-variable enabled extra_warnings || check_add_cflags -Wno-unused-function fi diff --cc examples.mk index 74fb68156,90913e67e..0d4b4d5a9 --- a/examples.mk +++ b/examples.mk @@@ -97,10 -114,10 +114,12 @@@ vp8_multi_resolution_encoder.DESCRIPTIO # We should not link to math library (libm) on RVCT # when building for bare-metal targets ifeq ($(CONFIG_OS_SUPPORT), yes) + CODEC_EXTRA_LIBS-$(CONFIG_VP8) += m +CODEC_EXTRA_LIBS-$(CONFIG_VP9) += m else ifeq ($(CONFIG_GCC), yes) + CODEC_EXTRA_LIBS-$(CONFIG_VP8) += m + CODEC_EXTRA_LIBS-$(CONFIG_VP9) += m endif endif # @@@ -117,8 -134,8 +136,10 @@@ ifeq ($(HAVE_ALT_TREE_LAYOUT),yes INC_PATH := $(SRC_PATH_BARE)/../include else LIB_PATH-yes += $(if $(BUILD_PFX),$(BUILD_PFX),.) + INC_PATH-$(CONFIG_VP8_DECODER) += $(SRC_PATH_BARE)/vp8 + INC_PATH-$(CONFIG_VP8_ENCODER) += $(SRC_PATH_BARE)/vp8 + INC_PATH-$(CONFIG_VP9_DECODER) += $(SRC_PATH_BARE)/vp9 + INC_PATH-$(CONFIG_VP9_ENCODER) += $(SRC_PATH_BARE)/vp9 LIB_PATH := $(call enabled,LIB_PATH) INC_PATH := $(call enabled,INC_PATH) endif diff --cc libmkv/EbmlIDs.h index c6c4a6960,e3ce5856f..4920bf9ba --- a/libmkv/EbmlIDs.h +++ b/libmkv/EbmlIDs.h @@@ -10,221 -10,222 +10,221 @@@ #ifndef MKV_DEFS_HPP #define MKV_DEFS_HPP 1 - // Commenting out values not available in webm, but available in matroska + /* Commenting out values not available in webm, but available in matroska */ -enum mkv -{ - EBML = 0x1A45DFA3, - EBMLVersion = 0x4286, - EBMLReadVersion = 0x42F7, - EBMLMaxIDLength = 0x42F2, - EBMLMaxSizeLength = 0x42F3, - DocType = 0x4282, - DocTypeVersion = 0x4287, - DocTypeReadVersion = 0x4285, +enum mkv { + EBML = 0x1A45DFA3, + EBMLVersion = 0x4286, + EBMLReadVersion = 0x42F7, + EBMLMaxIDLength = 0x42F2, + EBMLMaxSizeLength = 0x42F3, + DocType = 0x4282, + DocTypeVersion = 0x4287, + DocTypeReadVersion = 0x4285, - // CRC_32 = 0xBF, + /* CRC_32 = 0xBF, */ - Void = 0xEC, - SignatureSlot = 0x1B538667, - SignatureAlgo = 0x7E8A, - SignatureHash = 0x7E9A, - SignaturePublicKey = 0x7EA5, - Signature = 0x7EB5, - SignatureElements = 0x7E5B, - SignatureElementList = 0x7E7B, - SignedElement = 0x6532, - /* segment */ - Segment = 0x18538067, - /* Meta Seek Information */ - SeekHead = 0x114D9B74, - Seek = 0x4DBB, - SeekID = 0x53AB, - SeekPosition = 0x53AC, - /* Segment Information */ - Info = 0x1549A966, + Void = 0xEC, + SignatureSlot = 0x1B538667, + SignatureAlgo = 0x7E8A, + SignatureHash = 0x7E9A, + SignaturePublicKey = 0x7EA5, + Signature = 0x7EB5, + SignatureElements = 0x7E5B, + SignatureElementList = 0x7E7B, + SignedElement = 0x6532, - // segment ++ /* segment */ + Segment = 0x18538067, - // Meta Seek Information ++ /* Meta Seek Information */ + SeekHead = 0x114D9B74, + Seek = 0x4DBB, + SeekID = 0x53AB, + SeekPosition = 0x53AC, - // Segment Information ++ /* Segment Information */ + Info = 0x1549A966, - // SegmentUID = 0x73A4, - // SegmentFilename = 0x7384, - // PrevUID = 0x3CB923, - // PrevFilename = 0x3C83AB, - // NextUID = 0x3EB923, - // NextFilename = 0x3E83BB, - // SegmentFamily = 0x4444, - // ChapterTranslate = 0x6924, - // ChapterTranslateEditionUID = 0x69FC, - // ChapterTranslateCodec = 0x69BF, - // ChapterTranslateID = 0x69A5, + /* SegmentUID = 0x73A4, */ + /* SegmentFilename = 0x7384, */ + /* PrevUID = 0x3CB923, */ + /* PrevFilename = 0x3C83AB, */ + /* NextUID = 0x3EB923, */ + /* NextFilename = 0x3E83BB, */ + /* SegmentFamily = 0x4444, */ + /* ChapterTranslate = 0x6924, */ + /* ChapterTranslateEditionUID = 0x69FC, */ + /* ChapterTranslateCodec = 0x69BF, */ + /* ChapterTranslateID = 0x69A5, */ - TimecodeScale = 0x2AD7B1, - Segment_Duration = 0x4489, - DateUTC = 0x4461, + TimecodeScale = 0x2AD7B1, + Segment_Duration = 0x4489, + DateUTC = 0x4461, - // Title = 0x7BA9, + /* Title = 0x7BA9, */ - MuxingApp = 0x4D80, - WritingApp = 0x5741, - /* Cluster */ - Cluster = 0x1F43B675, - Timecode = 0xE7, + MuxingApp = 0x4D80, + WritingApp = 0x5741, - // Cluster ++ /* Cluster */ + Cluster = 0x1F43B675, + Timecode = 0xE7, - // SilentTracks = 0x5854, - // SilentTrackNumber = 0x58D7, - // Position = 0xA7, + /* SilentTracks = 0x5854, */ + /* SilentTrackNumber = 0x58D7, */ + /* Position = 0xA7, */ - PrevSize = 0xAB, - BlockGroup = 0xA0, - Block = 0xA1, + PrevSize = 0xAB, + BlockGroup = 0xA0, + Block = 0xA1, - // BlockVirtual = 0xA2, - // BlockAdditions = 0x75A1, - // BlockMore = 0xA6, - // BlockAddID = 0xEE, - // BlockAdditional = 0xA5, + /* BlockVirtual = 0xA2, */ + /* BlockAdditions = 0x75A1, */ + /* BlockMore = 0xA6, */ + /* BlockAddID = 0xEE, */ + /* BlockAdditional = 0xA5, */ - BlockDuration = 0x9B, + BlockDuration = 0x9B, - // ReferencePriority = 0xFA, + /* ReferencePriority = 0xFA, */ - ReferenceBlock = 0xFB, + ReferenceBlock = 0xFB, - // ReferenceVirtual = 0xFD, - // CodecState = 0xA4, - // Slices = 0x8E, - // TimeSlice = 0xE8, + /* ReferenceVirtual = 0xFD, */ + /* CodecState = 0xA4, */ + /* Slices = 0x8E, */ + /* TimeSlice = 0xE8, */ - LaceNumber = 0xCC, + LaceNumber = 0xCC, - // FrameNumber = 0xCD, - // BlockAdditionID = 0xCB, - // MkvDelay = 0xCE, - // Cluster_Duration = 0xCF, + /* FrameNumber = 0xCD, */ + /* BlockAdditionID = 0xCB, */ + /* MkvDelay = 0xCE, */ + /* Cluster_Duration = 0xCF, */ - SimpleBlock = 0xA3, + SimpleBlock = 0xA3, - // EncryptedBlock = 0xAF, - // Track + /* EncryptedBlock = 0xAF, */ - /* Track */ - Tracks = 0x1654AE6B, - TrackEntry = 0xAE, - TrackNumber = 0xD7, - TrackUID = 0x73C5, - TrackType = 0x83, - FlagEnabled = 0xB9, - FlagDefault = 0x88, - FlagForced = 0x55AA, - FlagLacing = 0x9C, ++ /* Track */ + Tracks = 0x1654AE6B, + TrackEntry = 0xAE, + TrackNumber = 0xD7, + TrackUID = 0x73C5, + TrackType = 0x83, + FlagEnabled = 0xB9, + FlagDefault = 0x88, + FlagForced = 0x55AA, + FlagLacing = 0x9C, - // MinCache = 0x6DE7, - // MaxCache = 0x6DF8, + /* MinCache = 0x6DE7, */ + /* MaxCache = 0x6DF8, */ - DefaultDuration = 0x23E383, + DefaultDuration = 0x23E383, - // TrackTimecodeScale = 0x23314F, - // TrackOffset = 0x537F, - // MaxBlockAdditionID = 0x55EE, + /* TrackTimecodeScale = 0x23314F, */ + /* TrackOffset = 0x537F, */ + /* MaxBlockAdditionID = 0x55EE, */ - Name = 0x536E, - Language = 0x22B59C, - CodecID = 0x86, - CodecPrivate = 0x63A2, - CodecName = 0x258688, + Name = 0x536E, + Language = 0x22B59C, + CodecID = 0x86, + CodecPrivate = 0x63A2, + CodecName = 0x258688, - // AttachmentLink = 0x7446, - // CodecSettings = 0x3A9697, - // CodecInfoURL = 0x3B4040, - // CodecDownloadURL = 0x26B240, - // CodecDecodeAll = 0xAA, - // TrackOverlay = 0x6FAB, - // TrackTranslate = 0x6624, - // TrackTranslateEditionUID = 0x66FC, - // TrackTranslateCodec = 0x66BF, - // TrackTranslateTrackID = 0x66A5, - // video + /* AttachmentLink = 0x7446, */ + /* CodecSettings = 0x3A9697, */ + /* CodecInfoURL = 0x3B4040, */ + /* CodecDownloadURL = 0x26B240, */ + /* CodecDecodeAll = 0xAA, */ + /* TrackOverlay = 0x6FAB, */ + /* TrackTranslate = 0x6624, */ + /* TrackTranslateEditionUID = 0x66FC, */ + /* TrackTranslateCodec = 0x66BF, */ + /* TrackTranslateTrackID = 0x66A5, */ - /* video */ - Video = 0xE0, - FlagInterlaced = 0x9A, - StereoMode = 0x53B8, - PixelWidth = 0xB0, - PixelHeight = 0xBA, - PixelCropBottom = 0x54AA, - PixelCropTop = 0x54BB, - PixelCropLeft = 0x54CC, - PixelCropRight = 0x54DD, - DisplayWidth = 0x54B0, - DisplayHeight = 0x54BA, - DisplayUnit = 0x54B2, - AspectRatioType = 0x54B3, ++ /* video */ + Video = 0xE0, + FlagInterlaced = 0x9A, + StereoMode = 0x53B8, + PixelWidth = 0xB0, + PixelHeight = 0xBA, + PixelCropBottom = 0x54AA, + PixelCropTop = 0x54BB, + PixelCropLeft = 0x54CC, + PixelCropRight = 0x54DD, + DisplayWidth = 0x54B0, + DisplayHeight = 0x54BA, + DisplayUnit = 0x54B2, + AspectRatioType = 0x54B3, - // ColourSpace = 0x2EB524, - // GammaValue = 0x2FB523, + /* ColourSpace = 0x2EB524, */ + /* GammaValue = 0x2FB523, */ - FrameRate = 0x2383E3, - /* end video */ - /* audio */ - Audio = 0xE1, - SamplingFrequency = 0xB5, - OutputSamplingFrequency = 0x78B5, - Channels = 0x9F, + FrameRate = 0x2383E3, - // end video - // audio ++ /* end video */ ++ /* audio */ + Audio = 0xE1, + SamplingFrequency = 0xB5, + OutputSamplingFrequency = 0x78B5, + Channels = 0x9F, - // ChannelPositions = 0x7D7B, + /* ChannelPositions = 0x7D7B, */ - BitDepth = 0x6264, - /* end audio */ - /* content encoding */ + BitDepth = 0x6264, - // end audio - // content encoding - // ContentEncodings = 0x6d80, - // ContentEncoding = 0x6240, - // ContentEncodingOrder = 0x5031, - // ContentEncodingScope = 0x5032, - // ContentEncodingType = 0x5033, - // ContentCompression = 0x5034, - // ContentCompAlgo = 0x4254, - // ContentCompSettings = 0x4255, - // ContentEncryption = 0x5035, - // ContentEncAlgo = 0x47e1, - // ContentEncKeyID = 0x47e2, - // ContentSignature = 0x47e3, - // ContentSigKeyID = 0x47e4, - // ContentSigAlgo = 0x47e5, - // ContentSigHashAlgo = 0x47e6, - // end content encoding - // Cueing Data ++ /* end audio */ ++ /* content encoding */ + /* ContentEncodings = 0x6d80, */ + /* ContentEncoding = 0x6240, */ + /* ContentEncodingOrder = 0x5031, */ + /* ContentEncodingScope = 0x5032, */ + /* ContentEncodingType = 0x5033, */ + /* ContentCompression = 0x5034, */ + /* ContentCompAlgo = 0x4254, */ + /* ContentCompSettings = 0x4255, */ + /* ContentEncryption = 0x5035, */ + /* ContentEncAlgo = 0x47e1, */ + /* ContentEncKeyID = 0x47e2, */ + /* ContentSignature = 0x47e3, */ + /* ContentSigKeyID = 0x47e4, */ + /* ContentSigAlgo = 0x47e5, */ + /* ContentSigHashAlgo = 0x47e6, */ - /* end content encoding */ - /* Cueing Data */ - Cues = 0x1C53BB6B, - CuePoint = 0xBB, - CueTime = 0xB3, - CueTrackPositions = 0xB7, - CueTrack = 0xF7, - CueClusterPosition = 0xF1, - CueBlockNumber = 0x5378 ++ /* end content encoding */ ++ /* Cueing Data */ + Cues = 0x1C53BB6B, + CuePoint = 0xBB, + CueTime = 0xB3, + CueTrackPositions = 0xB7, + CueTrack = 0xF7, + CueClusterPosition = 0xF1, - CueBlockNumber = 0x5378, - // CueCodecState = 0xEA, - // CueReference = 0xDB, - // CueRefTime = 0x96, - // CueRefCluster = 0x97, - // CueRefNumber = 0x535F, - // CueRefCodecState = 0xEB, - // Attachment - // Attachments = 0x1941A469, - // AttachedFile = 0x61A7, - // FileDescription = 0x467E, - // FileName = 0x466E, - // FileMimeType = 0x4660, - // FileData = 0x465C, - // FileUID = 0x46AE, - // FileReferral = 0x4675, - // Chapters - // Chapters = 0x1043A770, - // EditionEntry = 0x45B9, - // EditionUID = 0x45BC, - // EditionFlagHidden = 0x45BD, - // EditionFlagDefault = 0x45DB, - // EditionFlagOrdered = 0x45DD, - // ChapterAtom = 0xB6, - // ChapterUID = 0x73C4, - // ChapterTimeStart = 0x91, - // ChapterTimeEnd = 0x92, - // ChapterFlagHidden = 0x98, - // ChapterFlagEnabled = 0x4598, - // ChapterSegmentUID = 0x6E67, - // ChapterSegmentEditionUID = 0x6EBC, - // ChapterPhysicalEquiv = 0x63C3, - // ChapterTrack = 0x8F, - // ChapterTrackNumber = 0x89, - // ChapterDisplay = 0x80, - // ChapString = 0x85, - // ChapLanguage = 0x437C, - // ChapCountry = 0x437E, - // ChapProcess = 0x6944, - // ChapProcessCodecID = 0x6955, - // ChapProcessPrivate = 0x450D, - // ChapProcessCommand = 0x6911, - // ChapProcessTime = 0x6922, - // ChapProcessData = 0x6933, - // Tagging - // Tags = 0x1254C367, - // Tag = 0x7373, - // Targets = 0x63C0, - // TargetTypeValue = 0x68CA, - // TargetType = 0x63CA, - // Tagging_TrackUID = 0x63C5, - // Tagging_EditionUID = 0x63C9, - // Tagging_ChapterUID = 0x63C4, - // AttachmentUID = 0x63C6, - // SimpleTag = 0x67C8, - // TagName = 0x45A3, - // TagLanguage = 0x447A, - // TagDefault = 0x4484, - // TagString = 0x4487, - // TagBinary = 0x4485, ++ CueBlockNumber = 0x5378 + /* CueCodecState = 0xEA, */ + /* CueReference = 0xDB, */ + /* CueRefTime = 0x96, */ + /* CueRefCluster = 0x97, */ + /* CueRefNumber = 0x535F, */ + /* CueRefCodecState = 0xEB, */ - /* Attachment */ ++ /* Attachment */ + /* Attachments = 0x1941A469, */ + /* AttachedFile = 0x61A7, */ + /* FileDescription = 0x467E, */ + /* FileName = 0x466E, */ + /* FileMimeType = 0x4660, */ + /* FileData = 0x465C, */ + /* FileUID = 0x46AE, */ + /* FileReferral = 0x4675, */ - /* Chapters */ ++ /* Chapters */ + /* Chapters = 0x1043A770, */ + /* EditionEntry = 0x45B9, */ + /* EditionUID = 0x45BC, */ + /* EditionFlagHidden = 0x45BD, */ + /* EditionFlagDefault = 0x45DB, */ + /* EditionFlagOrdered = 0x45DD, */ + /* ChapterAtom = 0xB6, */ + /* ChapterUID = 0x73C4, */ + /* ChapterTimeStart = 0x91, */ + /* ChapterTimeEnd = 0x92, */ + /* ChapterFlagHidden = 0x98, */ + /* ChapterFlagEnabled = 0x4598, */ + /* ChapterSegmentUID = 0x6E67, */ + /* ChapterSegmentEditionUID = 0x6EBC, */ + /* ChapterPhysicalEquiv = 0x63C3, */ + /* ChapterTrack = 0x8F, */ + /* ChapterTrackNumber = 0x89, */ + /* ChapterDisplay = 0x80, */ + /* ChapString = 0x85, */ + /* ChapLanguage = 0x437C, */ + /* ChapCountry = 0x437E, */ + /* ChapProcess = 0x6944, */ + /* ChapProcessCodecID = 0x6955, */ + /* ChapProcessPrivate = 0x450D, */ + /* ChapProcessCommand = 0x6911, */ + /* ChapProcessTime = 0x6922, */ + /* ChapProcessData = 0x6933, */ - /* Tagging */ ++ /* Tagging */ + /* Tags = 0x1254C367, */ + /* Tag = 0x7373, */ + /* Targets = 0x63C0, */ + /* TargetTypeValue = 0x68CA, */ + /* TargetType = 0x63CA, */ + /* Tagging_TrackUID = 0x63C5, */ + /* Tagging_EditionUID = 0x63C9, */ + /* Tagging_ChapterUID = 0x63C4, */ + /* AttachmentUID = 0x63C6, */ + /* SimpleTag = 0x67C8, */ + /* TagName = 0x45A3, */ + /* TagLanguage = 0x447A, */ + /* TagDefault = 0x4484, */ + /* TagString = 0x4487, */ + /* TagBinary = 0x4485, */ }; #endif diff --cc libmkv/EbmlWriter.c index 69039e1bf,d70f06e43..5fc5ed2a3 --- a/libmkv/EbmlWriter.c +++ b/libmkv/EbmlWriter.c @@@ -18,136 -18,158 +18,140 @@@ #define LITERALU64(n) n##LLU #endif - void Ebml_WriteLen(EbmlGlobal *glob, long long val) { - // TODO check and make sure we are not > than 0x0100000000000000LLU - unsigned char size = 8; // size in bytes to output - unsigned long long minVal = LITERALU64(0x00000000000000ff); // mask to compare for byte size -void Ebml_WriteLen(EbmlGlobal *glob, int64_t val) -{ - /* TODO check and make sure we are not > than 0x0100000000000000LLU */ - unsigned char size = 8; /* size in bytes to output */ ++void Ebml_WriteLen(EbmlGlobal *glob, int64_t val) { ++ /* TODO check and make sure we are not > than 0x0100000000000000LLU */ ++ unsigned char size = 8; /* size in bytes to output */ + - /* mask to compare for byte size */ - int64_t minVal = 0xff; ++ /* mask to compare for byte size */ ++ int64_t minVal = 0xff; - for (size = 1; size < 8; size ++) - { - if (val < minVal) - break; + for (size = 1; size < 8; size ++) { + if (val < minVal) + break; - minVal = (minVal << 7); - } + minVal = (minVal << 7); + } - val |= (LITERALU64(0x000000000000080) << ((size - 1) * 7)); - val |= (((uint64_t)0x80) << ((size - 1) * 7)); ++ val |= (((uint64_t)0x80) << ((size - 1) * 7)); - Ebml_Serialize(glob, (void *) &val, sizeof(val), size); + Ebml_Serialize(glob, (void *) &val, sizeof(val), size); } -void Ebml_WriteString(EbmlGlobal *glob, const char *str) -{ - const size_t size_ = strlen(str); - const uint64_t size = size_; - Ebml_WriteLen(glob, size); - /* TODO: it's not clear from the spec whether the nul terminator - * should be serialized too. For now we omit the null terminator. - */ - Ebml_Write(glob, str, (unsigned long)size); +void Ebml_WriteString(EbmlGlobal *glob, const char *str) { + const size_t size_ = strlen(str); - const unsigned long long size = size_; ++ const uint64_t size = size_; + Ebml_WriteLen(glob, size); - // TODO: it's not clear from the spec whether the nul terminator - // should be serialized too. For now we omit the null terminator. - Ebml_Write(glob, str, size); ++ /* TODO: it's not clear from the spec whether the nul terminator ++ * should be serialized too. For now we omit the null terminator. ++ */ ++ Ebml_Write(glob, str, (unsigned long)size); } -void Ebml_WriteUTF8(EbmlGlobal *glob, const wchar_t *wstr) -{ - const size_t strlen = wcslen(wstr); +void Ebml_WriteUTF8(EbmlGlobal *glob, const wchar_t *wstr) { + const size_t strlen = wcslen(wstr); - // TODO: it's not clear from the spec whether the nul terminator - // should be serialized too. For now we include it. - const unsigned long long size = strlen; - /* TODO: it's not clear from the spec whether the nul terminator - * should be serialized too. For now we include it. - */ - const uint64_t size = strlen; ++ /* TODO: it's not clear from the spec whether the nul terminator ++ * should be serialized too. For now we include it. ++ */ ++ const uint64_t size = strlen; - Ebml_WriteLen(glob, size); - Ebml_Write(glob, wstr, (unsigned long)size); + Ebml_WriteLen(glob, size); - Ebml_Write(glob, wstr, size); ++ Ebml_Write(glob, wstr, (unsigned long)size); } -void Ebml_WriteID(EbmlGlobal *glob, unsigned long class_id) -{ - int len; +void Ebml_WriteID(EbmlGlobal *glob, unsigned long class_id) { + int len; - if (class_id >= 0x01000000) - len = 4; - else if (class_id >= 0x00010000) - len = 3; - else if (class_id >= 0x00000100) - len = 2; - else - len = 1; + if (class_id >= 0x01000000) + len = 4; + else if (class_id >= 0x00010000) + len = 3; + else if (class_id >= 0x00000100) + len = 2; + else + len = 1; - Ebml_Serialize(glob, (void *)&class_id, sizeof(class_id), len); + Ebml_Serialize(glob, (void *)&class_id, sizeof(class_id), len); } -void Ebml_SerializeUnsigned64(EbmlGlobal *glob, unsigned long class_id, uint64_t ui) -{ - unsigned char sizeSerialized = 8 | 0x80; - Ebml_WriteID(glob, class_id); - Ebml_Serialize(glob, &sizeSerialized, sizeof(sizeSerialized), 1); - Ebml_Serialize(glob, &ui, sizeof(ui), 8); +void Ebml_SerializeUnsigned64(EbmlGlobal *glob, unsigned long class_id, uint64_t ui) { + unsigned char sizeSerialized = 8 | 0x80; + Ebml_WriteID(glob, class_id); + Ebml_Serialize(glob, &sizeSerialized, sizeof(sizeSerialized), 1); + Ebml_Serialize(glob, &ui, sizeof(ui), 8); } -void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned long ui) -{ - unsigned char size = 8; /* size in bytes to output */ - unsigned char sizeSerialized = 0; - unsigned long minVal; - - Ebml_WriteID(glob, class_id); - minVal = 0x7fLU; /* mask to compare for byte size */ +void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned long ui) { - unsigned char size = 8; // size in bytes to output ++ unsigned char size = 8; /* size in bytes to output */ + unsigned char sizeSerialized = 0; + unsigned long minVal; - for (size = 1; size < 4; size ++) - { - if (ui < minVal) - { - break; - } + Ebml_WriteID(glob, class_id); - minVal = 0x7fLU; // mask to compare for byte size ++ minVal = 0x7fLU; /* mask to compare for byte size */ - minVal <<= 7; + for (size = 1; size < 4; size ++) { + if (ui < minVal) { + break; } - sizeSerialized = 0x80 | size; - Ebml_Serialize(glob, &sizeSerialized, sizeof(sizeSerialized), 1); - Ebml_Serialize(glob, &ui, sizeof(ui), size); + minVal <<= 7; + } + + sizeSerialized = 0x80 | size; + Ebml_Serialize(glob, &sizeSerialized, sizeof(sizeSerialized), 1); + Ebml_Serialize(glob, &ui, sizeof(ui), size); } - // TODO: perhaps this is a poor name for this id serializer helper function + /* TODO: perhaps this is a poor name for this id serializer helper function */ -void Ebml_SerializeBinary(EbmlGlobal *glob, unsigned long class_id, unsigned long bin) -{ - int size; - for (size=4; size > 1; size--) - { - if (bin & 0x000000ff << ((size-1) * 8)) - break; - } - Ebml_WriteID(glob, class_id); - Ebml_WriteLen(glob, size); - Ebml_WriteID(glob, bin); +void Ebml_SerializeBinary(EbmlGlobal *glob, unsigned long class_id, unsigned long bin) { + int size; + for (size = 4; size > 1; size--) { + if (bin & 0x000000ff << ((size - 1) * 8)) + break; + } + Ebml_WriteID(glob, class_id); + Ebml_WriteLen(glob, size); + Ebml_WriteID(glob, bin); } -void Ebml_SerializeFloat(EbmlGlobal *glob, unsigned long class_id, double d) -{ - unsigned char len = 0x88; +void Ebml_SerializeFloat(EbmlGlobal *glob, unsigned long class_id, double d) { + unsigned char len = 0x88; - Ebml_WriteID(glob, class_id); - Ebml_Serialize(glob, &len, sizeof(len), 1); - Ebml_Serialize(glob, &d, sizeof(d), 8); + Ebml_WriteID(glob, class_id); + Ebml_Serialize(glob, &len, sizeof(len), 1); + Ebml_Serialize(glob, &d, sizeof(d), 8); } -void Ebml_WriteSigned16(EbmlGlobal *glob, short val) -{ - signed long out = ((val & 0x003FFFFF) | 0x00200000) << 8; - Ebml_Serialize(glob, &out, sizeof(out), 3); +void Ebml_WriteSigned16(EbmlGlobal *glob, short val) { + signed long out = ((val & 0x003FFFFF) | 0x00200000) << 8; + Ebml_Serialize(glob, &out, sizeof(out), 3); } -void Ebml_SerializeString(EbmlGlobal *glob, unsigned long class_id, const char *s) -{ - Ebml_WriteID(glob, class_id); - Ebml_WriteString(glob, s); +void Ebml_SerializeString(EbmlGlobal *glob, unsigned long class_id, const char *s) { + Ebml_WriteID(glob, class_id); + Ebml_WriteString(glob, s); } -void Ebml_SerializeUTF8(EbmlGlobal *glob, unsigned long class_id, wchar_t *s) -{ - Ebml_WriteID(glob, class_id); - Ebml_WriteUTF8(glob, s); +void Ebml_SerializeUTF8(EbmlGlobal *glob, unsigned long class_id, wchar_t *s) { + Ebml_WriteID(glob, class_id); + Ebml_WriteUTF8(glob, s); } -void Ebml_SerializeData(EbmlGlobal *glob, unsigned long class_id, unsigned char *data, unsigned long data_length) -{ - Ebml_WriteID(glob, class_id); - Ebml_WriteLen(glob, data_length); - Ebml_Write(glob, data, data_length); +void Ebml_SerializeData(EbmlGlobal *glob, unsigned long class_id, unsigned char *data, unsigned long data_length) { + Ebml_WriteID(glob, class_id); + Ebml_WriteLen(glob, data_length); + Ebml_Write(glob, data, data_length); } -void Ebml_WriteVoid(EbmlGlobal *glob, unsigned long vSize) -{ - unsigned char tmp = 0; - unsigned long i = 0; +void Ebml_WriteVoid(EbmlGlobal *glob, unsigned long vSize) { + unsigned char tmp = 0; + unsigned long i = 0; - Ebml_WriteID(glob, 0xEC); - Ebml_WriteLen(glob, vSize); + Ebml_WriteID(glob, 0xEC); + Ebml_WriteLen(glob, vSize); - for (i = 0; i < vSize; i++) - { - Ebml_Write(glob, &tmp, 1); - } + for (i = 0; i < vSize; i++) { + Ebml_Write(glob, &tmp, 1); + } } - // TODO Serialize Date + /* TODO Serialize Date */ diff --cc libs.mk index abb7a8e3d,373c1cd44..9af6a35c7 --- a/libs.mk +++ b/libs.mk @@@ -17,6 -17,6 +17,34 @@@ els ASM:=.asm endif ++ ++# ++# Calculate platform- and compiler-specific offsets for hand coded assembly ++# ++ifeq ($(filter icc gcc,$(TGT_CC)), $(TGT_CC)) ++OFFSET_PATTERN:='^[a-zA-Z0-9_]* EQU' ++define asm_offsets_template ++$$(BUILD_PFX)$(1): $$(BUILD_PFX)$(2).S ++ @echo " [CREATE] $$@" ++ $$(qexec)LC_ALL=C grep $$(OFFSET_PATTERN) $$< | tr -d '$$$$\#' $$(ADS2GAS) > $$@ ++$$(BUILD_PFX)$(2).S: $(2) ++CLEAN-OBJS += $$(BUILD_PFX)$(1) $(2).S ++endef ++else ++ ifeq ($(filter rvct,$(TGT_CC)), $(TGT_CC)) ++define asm_offsets_template ++$$(BUILD_PFX)$(1): obj_int_extract ++$$(BUILD_PFX)$(1): $$(BUILD_PFX)$(2).o ++ @echo " [CREATE] $$@" ++ $$(qexec)./obj_int_extract rvds $$< $$(ADS2GAS) > $$@ ++OBJS-yes += $$(BUILD_PFX)$(2).o ++CLEAN-OBJS += $$(BUILD_PFX)$(1) ++$$(filter %$$(ASM).o,$$(OBJS-yes)): $$(BUILD_PFX)$(1) ++endef ++endif # rvct ++endif # !gcc ++ ++ CODEC_SRCS-yes += CHANGELOG CODEC_SRCS-yes += libs.mk @@@ -29,32 -29,32 +57,64 @@@ CODEC_SRCS-yes += $(addprefix vpx_mem/, include $(SRC_PATH_BARE)/vpx_scale/vpx_scale.mk CODEC_SRCS-yes += $(addprefix vpx_scale/,$(call enabled,SCALE_SRCS)) ++ifneq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),) ++ VP8_PREFIX=vp8/ ++ include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8_common.mk ++endif + + ifeq ($(CONFIG_VP8_ENCODER),yes) - VP8_PREFIX=vp8/ + include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8cx.mk + CODEC_SRCS-yes += $(addprefix $(VP8_PREFIX),$(call enabled,VP8_CX_SRCS)) + CODEC_EXPORTS-yes += $(addprefix $(VP8_PREFIX),$(VP8_CX_EXPORTS)) + CODEC_SRCS-yes += $(VP8_PREFIX)vp8cx.mk vpx/vp8.h vpx/vp8cx.h - CODEC_SRCS-$(ARCH_ARM) += $(VP8_PREFIX)vp8cx_arm.mk ++ CODEC_SRCS-$(ARCH_ARM) += $(VP8_PREFIX)vp88cx_arm.mk + INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8cx.h + INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP8_PREFIX)/% + CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8cx.h + CODEC_DOC_SECTIONS += vp8 vp8_encoder + endif + + ifeq ($(CONFIG_VP8_DECODER),yes) - VP8_PREFIX=vp8/ + include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8dx.mk + CODEC_SRCS-yes += $(addprefix $(VP8_PREFIX),$(call enabled,VP8_DX_SRCS)) + CODEC_EXPORTS-yes += $(addprefix $(VP8_PREFIX),$(VP8_DX_EXPORTS)) + CODEC_SRCS-yes += $(VP8_PREFIX)vp8dx.mk vpx/vp8.h vpx/vp8dx.h + INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8dx.h + INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP8_PREFIX)/% + CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8dx.h + CODEC_DOC_SECTIONS += vp8 vp8_decoder + endif + ++ifneq ($(CONFIG_VP9_ENCODER)$(CONFIG_VP9_DECODER),) ++ VP9_PREFIX=vp9/ ++ include $(SRC_PATH_BARE)/$(VP9_PREFIX)vp9_common.mk ++endif + +ifeq ($(CONFIG_VP9_ENCODER),yes) + VP9_PREFIX=vp9/ + include $(SRC_PATH_BARE)/$(VP9_PREFIX)vp9cx.mk + CODEC_SRCS-yes += $(addprefix $(VP9_PREFIX),$(call enabled,VP9_CX_SRCS)) + CODEC_EXPORTS-yes += $(addprefix $(VP9_PREFIX),$(VP9_CX_EXPORTS)) - CODEC_SRCS-yes += $(VP9_PREFIX)vp9cx.mk vpx/vp8.h vpx/vp8cx.h vpx/vp8e.h ++ CODEC_SRCS-yes += $(VP9_PREFIX)vp9cx.mk vpx/vp8.h vpx/vp8cx.h + CODEC_SRCS-$(ARCH_ARM) += $(VP9_PREFIX)vp98cx_arm.mk - INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8e.h include/vpx/vp8cx.h ++ INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8cx.h + INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP9_PREFIX)/% + CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8cx.h + CODEC_DOC_SECTIONS += vp9 vp9_encoder +endif + +ifeq ($(CONFIG_VP9_DECODER),yes) + VP9_PREFIX=vp9/ + include $(SRC_PATH_BARE)/$(VP9_PREFIX)vp9dx.mk + CODEC_SRCS-yes += $(addprefix $(VP9_PREFIX),$(call enabled,VP9_DX_SRCS)) + CODEC_EXPORTS-yes += $(addprefix $(VP9_PREFIX),$(VP9_DX_EXPORTS)) + CODEC_SRCS-yes += $(VP9_PREFIX)vp9dx.mk vpx/vp8.h vpx/vp8dx.h + INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8dx.h + INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP9_PREFIX)/% + CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8dx.h + CODEC_DOC_SECTIONS += vp9 vp9_decoder +endif + ifeq ($(CONFIG_ENCODERS),yes) CODEC_DOC_SECTIONS += encoder @@@ -172,9 -170,9 +231,9 @@@ CLEAN-OBJS += vpx.de vpx.vcproj: $(CODEC_SRCS) vpx.def @echo " [CREATE] $@" - $(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \ + $(qexec)$(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \ - --lib \ - --target=$(TOOLCHAIN) \ + $(if $(CONFIG_SHARED),--dll,--lib) \ + --target=$(TOOLCHAIN) \ $(if $(CONFIG_STATIC_MSVCRT),--static-crt) \ --name=vpx \ --proj-guid=DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74 \ @@@ -263,7 -242,8 +322,8 @@@ vpx.pc: config.mk libs.m $(qexec)echo 'Version: $(VERSION_MAJOR).$(VERSION_MINOR).$(VERSION_PATCH)' >> $@ $(qexec)echo 'Requires:' >> $@ $(qexec)echo 'Conflicts:' >> $@ - $(qexec)echo 'Libs: -L$${libdir} -lvpx' >> $@ + $(qexec)echo 'Libs: -L$${libdir} -lvpx -lm' >> $@ + $(qexec)echo 'Libs.private: -lm -lpthread' >> $@ $(qexec)echo 'Cflags: -I$${includedir}' >> $@ INSTALL-LIBS-yes += $(LIBSUBDIR)/pkgconfig/vpx.pc INSTALL_MAPS += $(LIBSUBDIR)/pkgconfig/%.pc %.pc @@@ -298,57 -278,57 +358,6 @@@ endi $(filter %.s.o,$(OBJS-yes)): $(BUILD_PFX)vpx_config.asm $(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)vpx_config.asm --# --# Calculate platform- and compiler-specific offsets for hand coded assembly --# -- --OFFSET_PATTERN:='^[a-zA-Z0-9_]* EQU' -- --ifeq ($(filter icc gcc,$(TGT_CC)), $(TGT_CC)) - $(BUILD_PFX)asm_com_offsets.asm: $(BUILD_PFX)$(VP9_PREFIX)common/asm_com_offsets.c.S - $(BUILD_PFX)asm_com_offsets.asm: $(BUILD_PFX)$(VP8_PREFIX)common/asm_com_offsets.c.S -- @echo " [CREATE] $@" -- $(qexec)LC_ALL=C grep $(OFFSET_PATTERN) $< | tr -d '$$\#' $(ADS2GAS) > $@ - $(BUILD_PFX)$(VP9_PREFIX)common/asm_com_offsets.c.S: $(VP9_PREFIX)common/asm_com_offsets.c - CLEAN-OBJS += $(BUILD_PFX)asm_com_offsets.asm $(BUILD_PFX)$(VP9_PREFIX)common/asm_com_offsets.c.S - $(BUILD_PFX)$(VP8_PREFIX)common/asm_com_offsets.c.S: $(VP8_PREFIX)common/asm_com_offsets.c - CLEAN-OBJS += $(BUILD_PFX)asm_com_offsets.asm $(BUILD_PFX)$(VP8_PREFIX)common/asm_com_offsets.c.S -- - $(BUILD_PFX)asm_enc_offsets.asm: $(BUILD_PFX)$(VP9_PREFIX)encoder/asm_enc_offsets.c.S - $(BUILD_PFX)asm_enc_offsets.asm: $(BUILD_PFX)$(VP8_PREFIX)encoder/asm_enc_offsets.c.S -- @echo " [CREATE] $@" -- $(qexec)LC_ALL=C grep $(OFFSET_PATTERN) $< | tr -d '$$\#' $(ADS2GAS) > $@ - $(BUILD_PFX)$(VP9_PREFIX)encoder/asm_enc_offsets.c.S: $(VP9_PREFIX)encoder/asm_enc_offsets.c - CLEAN-OBJS += $(BUILD_PFX)asm_enc_offsets.asm $(BUILD_PFX)$(VP9_PREFIX)encoder/asm_enc_offsets.c.S - $(BUILD_PFX)$(VP8_PREFIX)encoder/asm_enc_offsets.c.S: $(VP8_PREFIX)encoder/asm_enc_offsets.c - CLEAN-OBJS += $(BUILD_PFX)asm_enc_offsets.asm $(BUILD_PFX)$(VP8_PREFIX)encoder/asm_enc_offsets.c.S -- - $(BUILD_PFX)asm_dec_offsets.asm: $(BUILD_PFX)$(VP9_PREFIX)decoder/asm_dec_offsets.c.S - $(BUILD_PFX)asm_dec_offsets.asm: $(BUILD_PFX)$(VP8_PREFIX)decoder/asm_dec_offsets.c.S -- @echo " [CREATE] $@" -- $(qexec)LC_ALL=C grep $(OFFSET_PATTERN) $< | tr -d '$$\#' $(ADS2GAS) > $@ - $(BUILD_PFX)$(VP9_PREFIX)decoder/asm_dec_offsets.c.S: $(VP9_PREFIX)decoder/asm_dec_offsets.c - CLEAN-OBJS += $(BUILD_PFX)asm_dec_offsets.asm $(BUILD_PFX)$(VP9_PREFIX)decoder/asm_dec_offsets.c.S - $(BUILD_PFX)$(VP8_PREFIX)decoder/asm_dec_offsets.c.S: $(VP8_PREFIX)decoder/asm_dec_offsets.c - CLEAN-OBJS += $(BUILD_PFX)asm_dec_offsets.asm $(BUILD_PFX)$(VP8_PREFIX)decoder/asm_dec_offsets.c.S --else -- ifeq ($(filter rvct,$(TGT_CC)), $(TGT_CC)) -- asm_com_offsets.asm: obj_int_extract - asm_com_offsets.asm: $(VP9_PREFIX)common/asm_com_offsets.c.o - asm_com_offsets.asm: $(VP8_PREFIX)common/asm_com_offsets.c.o -- @echo " [CREATE] $@" -- $(qexec)./obj_int_extract rvds $< $(ADS2GAS) > $@ - OBJS-yes += $(VP9_PREFIX)common/asm_com_offsets.c.o - OBJS-yes += $(VP8_PREFIX)common/asm_com_offsets.c.o -- CLEAN-OBJS += asm_com_offsets.asm -- $(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_com_offsets.asm -- -- asm_enc_offsets.asm: obj_int_extract - asm_enc_offsets.asm: $(VP9_PREFIX)encoder/asm_enc_offsets.c.o - asm_enc_offsets.asm: $(VP8_PREFIX)encoder/asm_enc_offsets.c.o -- @echo " [CREATE] $@" -- $(qexec)./obj_int_extract rvds $< $(ADS2GAS) > $@ - OBJS-yes += $(VP9_PREFIX)encoder/asm_enc_offsets.c.o - OBJS-yes += $(VP8_PREFIX)encoder/asm_enc_offsets.c.o -- CLEAN-OBJS += asm_enc_offsets.asm -- $(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_enc_offsets.asm -- -- asm_dec_offsets.asm: obj_int_extract - asm_dec_offsets.asm: $(VP9_PREFIX)decoder/asm_dec_offsets.c.o - asm_dec_offsets.asm: $(VP8_PREFIX)decoder/asm_dec_offsets.c.o -- @echo " [CREATE] $@" -- $(qexec)./obj_int_extract rvds $< $(ADS2GAS) > $@ - OBJS-yes += $(VP9_PREFIX)decoder/asm_dec_offsets.c.o - OBJS-yes += $(VP8_PREFIX)decoder/asm_dec_offsets.c.o -- CLEAN-OBJS += asm_dec_offsets.asm -- $(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_dec_offsets.asm -- endif --endif $(shell $(SRC_PATH_BARE)/build/make/version.sh "$(SRC_PATH_BARE)" $(BUILD_PFX)vpx_version.h) CLEAN-OBJS += $(BUILD_PFX)vpx_version.h @@@ -356,15 -336,14 +365,15 @@@ # # Rule to generate runtime cpu detection files # - $(OBJS-yes:.o=.d): vpx_rtcd.h - vpx_rtcd.h: $(sort $(filter %rtcd_defs.sh,$(CODEC_SRCS))) + $(BUILD_PFX)vpx_rtcd.h: $(SRC_PATH_BARE)/$(sort $(filter %rtcd_defs.sh,$(CODEC_SRCS))) @echo " [CREATE] $@" $(qexec)$(SRC_PATH_BARE)/build/make/rtcd.sh --arch=$(TGT_ISA) \ - --sym=vpx_rtcd \ - --config=$(target)$(if $(FAT_ARCHS),,-$(TOOLCHAIN)).mk \ - $(RTCD_OPTIONS) $^ > $@ + --sym=vpx_rtcd \ + --config=$(target)$(if $(FAT_ARCHS),,-$(TOOLCHAIN)).mk \ + $(RTCD_OPTIONS) $^ > $@ CLEAN-OBJS += $(BUILD_PFX)vpx_rtcd.h ++ CODEC_DOC_SRCS += vpx/vpx_codec.h \ vpx/vpx_decoder.h \ vpx/vpx_encoder.h \ diff --cc test/test.mk index 129c18862,7a11a2793..3c6d44c97 --- a/test/test.mk +++ b/test/test.mk @@@ -1,10 -1,178 +1,186 @@@ - LIBVPX_TEST_SRCS-yes += test.mk LIBVPX_TEST_SRCS-yes += acm_random.h - LIBVPX_TEST_SRCS-yes += boolcoder_test.cc - LIBVPX_TEST_SRCS-yes += dct16x16_test.cc - LIBVPX_TEST_SRCS-yes += fdct4x4_test.cc - LIBVPX_TEST_SRCS-yes += fdct8x8_test.cc - LIBVPX_TEST_SRCS-yes += idct8x8_test.cc + LIBVPX_TEST_SRCS-yes += test.mk LIBVPX_TEST_SRCS-yes += test_libvpx.cc + LIBVPX_TEST_SRCS-yes += util.h + LIBVPX_TEST_SRCS-yes += video_source.h + + ## + ## BLACK BOX TESTS + ## + ## Black box tests only use the public API. + ## + LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += altref_test.cc + LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += config_test.cc + LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += cq_test.cc + LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += datarate_test.cc + LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += encode_test_driver.cc + LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += encode_test_driver.h + LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += error_resilience_test.cc + LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += i420_video_source.h + LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += keyframe_test.cc + LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += resize_test.cc + + LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += ../md5_utils.h ../md5_utils.c + LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += decode_test_driver.cc + LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += decode_test_driver.h + LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += ivf_video_source.h + LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += test_vector_test.cc + ## + ## WHITE BOX TESTS + ## + ## Whitebox tests invoke functions not exposed via the public API. Certain + ## shared library builds don't make these functions accessible. + ## + ifeq ($(CONFIG_SHARED),) + + # These tests require both the encoder and decoder to be built. + ifeq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),yesyes) + LIBVPX_TEST_SRCS-yes += boolcoder_test.cc + endif + -LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += fdct4x4_test.cc + LIBVPX_TEST_SRCS-yes += idctllm_test.cc + LIBVPX_TEST_SRCS-yes += intrapred_test.cc + LIBVPX_TEST_SRCS-$(CONFIG_POSTPROC) += pp_filter_test.cc + LIBVPX_TEST_SRCS-yes += sad_test.cc + LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += set_roi.cc + LIBVPX_TEST_SRCS-yes += sixtap_predict_test.cc + LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += subtract_test.cc ++LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += vp8_fdct4x4_test.cc ++ ++# VP9 tests ++LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct4x4_test.cc ++LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc ++LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct16x16_test.cc ++ifneq ($(CONFIG_VP9_ENCODER)$(CONFIG_VP9_DECODER),) ++LIBVPX_TEST_SRCS-yes += idct8x8_test.cc ++endif + + endif + - LIBVPX_TEST_DATA-yes += hantro_collage_w352h288.yuv + ## + ## TEST DATA + ## + LIBVPX_TEST_DATA-$(CONFIG_VP8_ENCODER) += hantro_collage_w352h288.yuv + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-001.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-002.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-003.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-004.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-005.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-006.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-007.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-008.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-009.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-010.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-011.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-012.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-013.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-014.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-015.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-016.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-017.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-018.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1400.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1411.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1416.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1417.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1402.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1412.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1418.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1424.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-01.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-02.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-03.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-04.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1401.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1403.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1407.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1408.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1409.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1410.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1413.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1414.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1415.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1425.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1426.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1427.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1432.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1435.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1436.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1437.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1441.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1442.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1404.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1405.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1406.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1428.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1429.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1430.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1431.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1433.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1434.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1438.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1439.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1440.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1443.ivf + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-001.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-002.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-003.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-004.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-005.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-006.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-007.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-008.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-009.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-010.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-011.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-012.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-013.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-014.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-015.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-016.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-017.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-018.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1400.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1411.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1416.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1417.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1402.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1412.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1418.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1424.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1401.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1403.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1407.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1408.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1409.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1410.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1413.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1414.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1415.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1425.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1426.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1427.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1432.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1435.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1436.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1437.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1441.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1442.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-01.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-02.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-03.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-04.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1404.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1405.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1406.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1428.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1429.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1430.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1431.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1433.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1434.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1438.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1439.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1440.ivf.md5 + LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1443.ivf.md5 diff --cc test/test_libvpx.cc index 924aa2e2e,cfd5d2807..2b9b0c21f --- a/test/test_libvpx.cc +++ b/test/test_libvpx.cc @@@ -26,18 -26,18 +26,18 @@@ int main(int argc, char **argv) ::testing::InitGoogleTest(&argc, argv); #if ARCH_X86 || ARCH_X86_64 - int simd_caps = x86_simd_caps(); + const int simd_caps = x86_simd_caps(); - if(!(simd_caps & HAS_MMX)) + if (!(simd_caps & HAS_MMX)) append_gtest_filter(":-MMX/*"); - if(!(simd_caps & HAS_SSE)) + if (!(simd_caps & HAS_SSE)) append_gtest_filter(":-SSE/*"); - if(!(simd_caps & HAS_SSE2)) + if (!(simd_caps & HAS_SSE2)) append_gtest_filter(":-SSE2/*"); - if(!(simd_caps & HAS_SSE3)) + if (!(simd_caps & HAS_SSE3)) append_gtest_filter(":-SSE3/*"); - if(!(simd_caps & HAS_SSSE3)) + if (!(simd_caps & HAS_SSSE3)) append_gtest_filter(":-SSSE3/*"); - if(!(simd_caps & HAS_SSE4_1)) + if (!(simd_caps & HAS_SSE4_1)) append_gtest_filter(":-SSE4_1/*"); #endif diff --cc test/vp8_fdct4x4_test.cc index 000000000,000000000..619b23d22 new file mode 100644 --- /dev/null +++ b/test/vp8_fdct4x4_test.cc @@@ -1,0 -1,0 +1,169 @@@ ++/* ++* Copyright (c) 2012 The WebM project authors. All Rights Reserved. ++* ++* Use of this source code is governed by a BSD-style license ++* that can be found in the LICENSE file in the root of the source ++* tree. An additional intellectual property rights grant can be found ++* in the file PATENTS. All contributing project authors may ++* be found in the AUTHORS file in the root of the source tree. ++*/ ++ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++ ++extern "C" { ++#include "vpx_rtcd.h" ++} ++ ++#include "test/acm_random.h" ++#include "third_party/googletest/src/include/gtest/gtest.h" ++#include "vpx/vpx_integer.h" ++ ++ ++namespace { ++ ++const int cospi8sqrt2minus1 = 20091; ++const int sinpi8sqrt2 = 35468; ++ ++void reference_idct4x4(const int16_t *input, int16_t *output) { ++ const int16_t *ip = input; ++ int16_t *op = output; ++ ++ for (int i = 0; i < 4; ++i) { ++ const int a1 = ip[0] + ip[8]; ++ const int b1 = ip[0] - ip[8]; ++ const int temp1 = (ip[4] * sinpi8sqrt2) >> 16; ++ const int temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1) >> 16); ++ const int c1 = temp1 - temp2; ++ const int temp3 = ip[4] + ((ip[4] * cospi8sqrt2minus1) >> 16); ++ const int temp4 = (ip[12] * sinpi8sqrt2) >> 16; ++ const int d1 = temp3 + temp4; ++ op[0] = a1 + d1; ++ op[12] = a1 - d1; ++ op[4] = b1 + c1; ++ op[8] = b1 - c1; ++ ++ip; ++ ++op; ++ } ++ ip = output; ++ op = output; ++ for (int i = 0; i < 4; ++i) { ++ const int a1 = ip[0] + ip[2]; ++ const int b1 = ip[0] - ip[2]; ++ const int temp1 = (ip[1] * sinpi8sqrt2) >> 16; ++ const int temp2 = ip[3] + ((ip[3] * cospi8sqrt2minus1) >> 16); ++ const int c1 = temp1 - temp2; ++ const int temp3 = ip[1] + ((ip[1] * cospi8sqrt2minus1) >> 16); ++ const int temp4 = (ip[3] * sinpi8sqrt2) >> 16; ++ const int d1 = temp3 + temp4; ++ op[0] = (a1 + d1 + 4) >> 3; ++ op[3] = (a1 - d1 + 4) >> 3; ++ op[1] = (b1 + c1 + 4) >> 3; ++ op[2] = (b1 - c1 + 4) >> 3; ++ ip += 4; ++ op += 4; ++ } ++} ++ ++using libvpx_test::ACMRandom; ++ ++TEST(Vp8FdctTest, SignBiasCheck) { ++ ACMRandom rnd(ACMRandom::DeterministicSeed()); ++ int16_t test_input_block[16]; ++ int16_t test_output_block[16]; ++ const int pitch = 8; ++ int count_sign_block[16][2]; ++ const int count_test_block = 1000000; ++ ++ memset(count_sign_block, 0, sizeof(count_sign_block)); ++ ++ for (int i = 0; i < count_test_block; ++i) { ++ // Initialize a test block with input range [-255, 255]. ++ for (int j = 0; j < 16; ++j) ++ test_input_block[j] = rnd.Rand8() - rnd.Rand8(); ++ ++ vp8_short_fdct4x4_c(test_input_block, test_output_block, pitch); ++ ++ for (int j = 0; j < 16; ++j) { ++ if (test_output_block[j] < 0) ++ ++count_sign_block[j][0]; ++ else if (test_output_block[j] > 0) ++ ++count_sign_block[j][1]; ++ } ++ } ++ ++ bool bias_acceptable = true; ++ for (int j = 0; j < 16; ++j) ++ bias_acceptable = bias_acceptable && ++ (abs(count_sign_block[j][0] - count_sign_block[j][1]) < 10000); ++ ++ EXPECT_EQ(true, bias_acceptable) ++ << "Error: 4x4 FDCT has a sign bias > 1% for input range [-255, 255]"; ++ ++ memset(count_sign_block, 0, sizeof(count_sign_block)); ++ ++ for (int i = 0; i < count_test_block; ++i) { ++ // Initialize a test block with input range [-15, 15]. ++ for (int j = 0; j < 16; ++j) ++ test_input_block[j] = (rnd.Rand8() >> 4) - (rnd.Rand8() >> 4); ++ ++ vp8_short_fdct4x4_c(test_input_block, test_output_block, pitch); ++ ++ for (int j = 0; j < 16; ++j) { ++ if (test_output_block[j] < 0) ++ ++count_sign_block[j][0]; ++ else if (test_output_block[j] > 0) ++ ++count_sign_block[j][1]; ++ } ++ } ++ ++ bias_acceptable = true; ++ for (int j = 0; j < 16; ++j) ++ bias_acceptable = bias_acceptable && ++ (abs(count_sign_block[j][0] - count_sign_block[j][1]) < 100000); ++ ++ EXPECT_EQ(true, bias_acceptable) ++ << "Error: 4x4 FDCT has a sign bias > 10% for input range [-15, 15]"; ++}; ++ ++TEST(Vp8FdctTest, RoundTripErrorCheck) { ++ ACMRandom rnd(ACMRandom::DeterministicSeed()); ++ int max_error = 0; ++ double total_error = 0; ++ const int count_test_block = 1000000; ++ for (int i = 0; i < count_test_block; ++i) { ++ int16_t test_input_block[16]; ++ int16_t test_temp_block[16]; ++ int16_t test_output_block[16]; ++ ++ // Initialize a test block with input range [-255, 255]. ++ for (int j = 0; j < 16; ++j) ++ test_input_block[j] = rnd.Rand8() - rnd.Rand8(); ++ ++ const int pitch = 8; ++ vp8_short_fdct4x4_c(test_input_block, test_temp_block, pitch); ++ reference_idct4x4(test_temp_block, test_output_block); ++ ++ for (int j = 0; j < 16; ++j) { ++ const int diff = test_input_block[j] - test_output_block[j]; ++ const int error = diff * diff; ++ if (max_error < error) ++ max_error = error; ++ total_error += error; ++ } ++ } ++ ++ EXPECT_GE(1, max_error ) ++ << "Error: FDCT/IDCT has an individual roundtrip error > 1"; ++ ++ EXPECT_GE(count_test_block, total_error) ++ << "Error: FDCT/IDCT has average roundtrip error > 1 per block"; ++}; ++ ++} // namespace diff --cc tools/all_builds.py index d1f0c80c0,000000000..78581d9f0 mode 100755,000000..100755 --- a/tools/all_builds.py +++ b/tools/all_builds.py @@@ -1,72 -1,0 +1,72 @@@ +#!/usr/bin/python + +import getopt +import subprocess +import sys + +LONG_OPTIONS = ["shard=", "shards="] - BASE_COMMAND = "./configure --enable-internal-stats --enable-experimental" ++BASE_COMMAND = "./configure --disable-vp8 --disable-unit-tests --enable-internal-stats --enable-experimental" + +def RunCommand(command): + run = subprocess.Popen(command, shell=True) + output = run.communicate() + if run.returncode: + print "Non-zero return code: " + str(run.returncode) + " => exiting!" + sys.exit(1) + +def list_of_experiments(): + experiments = [] + configure_file = open("configure") + list_start = False + for line in configure_file.read().split("\n"): + if line == 'EXPERIMENT_LIST="': + list_start = True + elif line == '"': + list_start = False + elif list_start: + currently_broken = ["csm"] + experiment = line[4:] + if experiment not in currently_broken: + experiments.append(experiment) + return experiments + +def main(argv): + # Parse arguments + options = {"--shard": 0, "--shards": 1} + if "--" in argv: + opt_end_index = argv.index("--") + else: + opt_end_index = len(argv) + try: + o, _ = getopt.getopt(argv[1:opt_end_index], None, LONG_OPTIONS) + except getopt.GetoptError, err: + print str(err) + print "Usage: %s [--shard= --shards=] -- [configure flag ...]"%argv[0] + sys.exit(2) + + options.update(o) + extra_args = argv[opt_end_index + 1:] + + # Shard experiment list + shard = int(options["--shard"]) + shards = int(options["--shards"]) + experiments = list_of_experiments() + base_command = " ".join([BASE_COMMAND] + extra_args) + configs = [base_command] + configs += ["%s --enable-%s" % (base_command, e) for e in experiments] + my_configs = zip(configs, range(len(configs))) + my_configs = filter(lambda x: x[1] % shards == shard, my_configs) + my_configs = [e[0] for e in my_configs] + + # Run configs for this shard + for config in my_configs: + test_build(config) + +def test_build(configure_command): + print "\033[34m\033[47mTesting %s\033[0m" % (configure_command) + RunCommand(configure_command) + RunCommand("make clean") + RunCommand("make") + +if __name__ == "__main__": + main(sys.argv) diff --cc vp8/encoder/arm/armv5te/boolhuff_armv5te.asm index 000000000,a644a004c..4abe818f1 mode 000000,100644..100644 --- a/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm +++ b/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm @@@ -1,0 -1,310 +1,310 @@@ + ; + ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. + ; + ; Use of this source code is governed by a BSD-style license + ; that can be found in the LICENSE file in the root of the source + ; tree. An additional intellectual property rights grant can be found + ; in the file PATENTS. All contributing project authors may + ; be found in the AUTHORS file in the root of the source tree. + ; + + + EXPORT |vp8_start_encode| + EXPORT |vp8_encode_bool| + EXPORT |vp8_stop_encode| + EXPORT |vp8_encode_value| + IMPORT |vp8_validate_buffer_arm| + - INCLUDE asm_enc_offsets.asm ++ INCLUDE vp8_asm_enc_offsets.asm + + ARM + REQUIRE8 + PRESERVE8 + + AREA |.text|, CODE, READONLY + + ; macro for validating write buffer position + ; needs vp8_writer in r0 + ; start shall not be in r1 + MACRO + VALIDATE_POS $start, $pos + push {r0-r3, r12, lr} ; rest of regs are preserved by subroutine call + ldr r2, [r0, #vp8_writer_buffer_end] + ldr r3, [r0, #vp8_writer_error] + mov r1, $pos + mov r0, $start + bl vp8_validate_buffer_arm + pop {r0-r3, r12, lr} + MEND + + ; r0 BOOL_CODER *br + ; r1 unsigned char *source + ; r2 unsigned char *source_end + |vp8_start_encode| PROC + str r2, [r0, #vp8_writer_buffer_end] + mov r12, #0 + mov r3, #255 + mvn r2, #23 + str r12, [r0, #vp8_writer_lowvalue] + str r3, [r0, #vp8_writer_range] + str r2, [r0, #vp8_writer_count] + str r12, [r0, #vp8_writer_pos] + str r1, [r0, #vp8_writer_buffer] + bx lr + ENDP + + ; r0 BOOL_CODER *br + ; r1 int bit + ; r2 int probability + |vp8_encode_bool| PROC + push {r4-r10, lr} + + mov r4, r2 + + ldr r2, [r0, #vp8_writer_lowvalue] + ldr r5, [r0, #vp8_writer_range] + ldr r3, [r0, #vp8_writer_count] + + sub r7, r5, #1 ; range-1 + + cmp r1, #0 + mul r6, r4, r7 ; ((range-1) * probability) + + mov r7, #1 + add r4, r7, r6, lsr #8 ; 1 + (((range-1) * probability) >> 8) + + addne r2, r2, r4 ; if (bit) lowvalue += split + subne r4, r5, r4 ; if (bit) range = range-split + + ; Counting the leading zeros is used to normalize range. + clz r6, r4 + sub r6, r6, #24 ; shift + + ; Flag is set on the sum of count. This flag is used later + ; to determine if count >= 0 + adds r3, r3, r6 ; count += shift + lsl r5, r4, r6 ; range <<= shift + bmi token_count_lt_zero ; if(count >= 0) + + sub r6, r6, r3 ; offset = shift - count + sub r4, r6, #1 ; offset-1 + lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) + bpl token_high_bit_not_set + + ldr r4, [r0, #vp8_writer_pos] ; x + sub r4, r4, #1 ; x = w->pos-1 + b token_zero_while_start + token_zero_while_loop + mov r9, #0 + strb r9, [r7, r4] ; w->buffer[x] =(unsigned char)0 + sub r4, r4, #1 ; x-- + token_zero_while_start + cmp r4, #0 + ldrge r7, [r0, #vp8_writer_buffer] + ldrb r1, [r7, r4] + cmpge r1, #0xff + beq token_zero_while_loop + + ldr r7, [r0, #vp8_writer_buffer] + ldrb r9, [r7, r4] ; w->buffer[x] + add r9, r9, #1 + strb r9, [r7, r4] ; w->buffer[x] + 1 + token_high_bit_not_set + rsb r4, r6, #24 ; 24-offset + ldr r9, [r0, #vp8_writer_buffer] + lsr r7, r2, r4 ; lowvalue >> (24-offset) + ldr r4, [r0, #vp8_writer_pos] ; w->pos + lsl r2, r2, r6 ; lowvalue <<= offset + mov r6, r3 ; shift = count + add r1, r4, #1 ; w->pos++ + bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff + str r1, [r0, #vp8_writer_pos] + sub r3, r3, #8 ; count -= 8 + + VALIDATE_POS r9, r1 ; validate_buffer at pos + + strb r7, [r9, r4] ; w->buffer[w->pos++] + + token_count_lt_zero + lsl r2, r2, r6 ; lowvalue <<= shift + + str r2, [r0, #vp8_writer_lowvalue] + str r5, [r0, #vp8_writer_range] + str r3, [r0, #vp8_writer_count] + pop {r4-r10, pc} + ENDP + + ; r0 BOOL_CODER *br + |vp8_stop_encode| PROC + push {r4-r10, lr} + + ldr r2, [r0, #vp8_writer_lowvalue] + ldr r5, [r0, #vp8_writer_range] + ldr r3, [r0, #vp8_writer_count] + + mov r10, #32 + + stop_encode_loop + sub r7, r5, #1 ; range-1 + + mov r4, r7, lsl #7 ; ((range-1) * 128) + + mov r7, #1 + add r4, r7, r4, lsr #8 ; 1 + (((range-1) * 128) >> 8) + + ; Counting the leading zeros is used to normalize range. + clz r6, r4 + sub r6, r6, #24 ; shift + + ; Flag is set on the sum of count. This flag is used later + ; to determine if count >= 0 + adds r3, r3, r6 ; count += shift + lsl r5, r4, r6 ; range <<= shift + bmi token_count_lt_zero_se ; if(count >= 0) + + sub r6, r6, r3 ; offset = shift - count + sub r4, r6, #1 ; offset-1 + lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) + bpl token_high_bit_not_set_se + + ldr r4, [r0, #vp8_writer_pos] ; x + sub r4, r4, #1 ; x = w->pos-1 + b token_zero_while_start_se + token_zero_while_loop_se + mov r9, #0 + strb r9, [r7, r4] ; w->buffer[x] =(unsigned char)0 + sub r4, r4, #1 ; x-- + token_zero_while_start_se + cmp r4, #0 + ldrge r7, [r0, #vp8_writer_buffer] + ldrb r1, [r7, r4] + cmpge r1, #0xff + beq token_zero_while_loop_se + + ldr r7, [r0, #vp8_writer_buffer] + ldrb r9, [r7, r4] ; w->buffer[x] + add r9, r9, #1 + strb r9, [r7, r4] ; w->buffer[x] + 1 + token_high_bit_not_set_se + rsb r4, r6, #24 ; 24-offset + ldr r9, [r0, #vp8_writer_buffer] + lsr r7, r2, r4 ; lowvalue >> (24-offset) + ldr r4, [r0, #vp8_writer_pos] ; w->pos + lsl r2, r2, r6 ; lowvalue <<= offset + mov r6, r3 ; shift = count + add r1, r4, #1 ; w->pos++ + bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff + str r1, [r0, #vp8_writer_pos] + sub r3, r3, #8 ; count -= 8 + + VALIDATE_POS r9, r1 ; validate_buffer at pos + + strb r7, [r9, r4] ; w->buffer[w->pos++] + + token_count_lt_zero_se + lsl r2, r2, r6 ; lowvalue <<= shift + + subs r10, r10, #1 + bne stop_encode_loop + + str r2, [r0, #vp8_writer_lowvalue] + str r5, [r0, #vp8_writer_range] + str r3, [r0, #vp8_writer_count] + pop {r4-r10, pc} + + ENDP + + ; r0 BOOL_CODER *br + ; r1 int data + ; r2 int bits + |vp8_encode_value| PROC + push {r4-r12, lr} + + mov r10, r2 + + ldr r2, [r0, #vp8_writer_lowvalue] + ldr r5, [r0, #vp8_writer_range] + ldr r3, [r0, #vp8_writer_count] + + rsb r4, r10, #32 ; 32-n + + ; v is kept in r1 during the token pack loop + lsl r1, r1, r4 ; r1 = v << 32 - n + + encode_value_loop + sub r7, r5, #1 ; range-1 + + ; Decisions are made based on the bit value shifted + ; off of v, so set a flag here based on this. + ; This value is refered to as "bb" + lsls r1, r1, #1 ; bit = v >> n + mov r4, r7, lsl #7 ; ((range-1) * 128) + + mov r7, #1 + add r4, r7, r4, lsr #8 ; 1 + (((range-1) * 128) >> 8) + + addcs r2, r2, r4 ; if (bit) lowvalue += split + subcs r4, r5, r4 ; if (bit) range = range-split + + ; Counting the leading zeros is used to normalize range. + clz r6, r4 + sub r6, r6, #24 ; shift + + ; Flag is set on the sum of count. This flag is used later + ; to determine if count >= 0 + adds r3, r3, r6 ; count += shift + lsl r5, r4, r6 ; range <<= shift + bmi token_count_lt_zero_ev ; if(count >= 0) + + sub r6, r6, r3 ; offset = shift - count + sub r4, r6, #1 ; offset-1 + lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) + bpl token_high_bit_not_set_ev + + ldr r4, [r0, #vp8_writer_pos] ; x + sub r4, r4, #1 ; x = w->pos-1 + b token_zero_while_start_ev + token_zero_while_loop_ev + mov r9, #0 + strb r9, [r7, r4] ; w->buffer[x] =(unsigned char)0 + sub r4, r4, #1 ; x-- + token_zero_while_start_ev + cmp r4, #0 + ldrge r7, [r0, #vp8_writer_buffer] + ldrb r11, [r7, r4] + cmpge r11, #0xff + beq token_zero_while_loop_ev + + ldr r7, [r0, #vp8_writer_buffer] + ldrb r9, [r7, r4] ; w->buffer[x] + add r9, r9, #1 + strb r9, [r7, r4] ; w->buffer[x] + 1 + token_high_bit_not_set_ev + rsb r4, r6, #24 ; 24-offset + ldr r9, [r0, #vp8_writer_buffer] + lsr r7, r2, r4 ; lowvalue >> (24-offset) + ldr r4, [r0, #vp8_writer_pos] ; w->pos + lsl r2, r2, r6 ; lowvalue <<= offset + mov r6, r3 ; shift = count + add r11, r4, #1 ; w->pos++ + bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff + str r11, [r0, #vp8_writer_pos] + sub r3, r3, #8 ; count -= 8 + + VALIDATE_POS r9, r11 ; validate_buffer at pos + + strb r7, [r9, r4] ; w->buffer[w->pos++] + + token_count_lt_zero_ev + lsl r2, r2, r6 ; lowvalue <<= shift + + subs r10, r10, #1 + bne encode_value_loop + + str r2, [r0, #vp8_writer_lowvalue] + str r5, [r0, #vp8_writer_range] + str r3, [r0, #vp8_writer_count] + pop {r4-r12, pc} + ENDP + + END diff --cc vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm index 000000000,a1cd46704..90a141c62 mode 000000,100644..100644 --- a/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm +++ b/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm @@@ -1,0 -1,317 +1,317 @@@ + ; + ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. + ; + ; Use of this source code is governed by a BSD-style license + ; that can be found in the LICENSE file in the root of the source + ; tree. An additional intellectual property rights grant can be found + ; in the file PATENTS. All contributing project authors may + ; be found in the AUTHORS file in the root of the source tree. + ; + + + EXPORT |vp8cx_pack_tokens_armv5| + IMPORT |vp8_validate_buffer_arm| + - INCLUDE asm_enc_offsets.asm ++ INCLUDE vp8_asm_enc_offsets.asm + + ARM + REQUIRE8 + PRESERVE8 + + AREA |.text|, CODE, READONLY + + + ; macro for validating write buffer position + ; needs vp8_writer in r0 + ; start shall not be in r1 + MACRO + VALIDATE_POS $start, $pos + push {r0-r3, r12, lr} ; rest of regs are preserved by subroutine call + ldr r2, [r0, #vp8_writer_buffer_end] + ldr r3, [r0, #vp8_writer_error] + mov r1, $pos + mov r0, $start + bl vp8_validate_buffer_arm + pop {r0-r3, r12, lr} + MEND + + + ; r0 vp8_writer *w + ; r1 const TOKENEXTRA *p + ; r2 int xcount + ; r3 vp8_coef_encodings + ; s0 vp8_extra_bits + ; s1 vp8_coef_tree + |vp8cx_pack_tokens_armv5| PROC + push {r4-r12, lr} + sub sp, sp, #16 + + ; Add size of xcount * sizeof (TOKENEXTRA) to get stop + ; sizeof (TOKENEXTRA) is 8 + add r2, r1, r2, lsl #3 ; stop = p + xcount*sizeof(TOKENEXTRA) + str r2, [sp, #0] + str r3, [sp, #8] ; save vp8_coef_encodings + ldr r2, [r0, #vp8_writer_lowvalue] + ldr r5, [r0, #vp8_writer_range] + ldr r3, [r0, #vp8_writer_count] + b check_p_lt_stop + + while_p_lt_stop + ldrb r6, [r1, #tokenextra_token] ; t + ldr r4, [sp, #8] ; vp8_coef_encodings + mov lr, #0 + add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t + ldr r9, [r1, #tokenextra_context_tree] ; pp + + ldrb r7, [r1, #tokenextra_skip_eob_node] + + ldr r6, [r4, #vp8_token_value] ; v + ldr r8, [r4, #vp8_token_len] ; n + + ; vp8 specific skip_eob_node + cmp r7, #0 + movne lr, #2 ; i = 2 + subne r8, r8, #1 ; --n + + rsb r4, r8, #32 ; 32-n + ldr r10, [sp, #60] ; vp8_coef_tree + + ; v is kept in r12 during the token pack loop + lsl r12, r6, r4 ; r12 = v << 32 - n + + ; loop start + token_loop + ldrb r4, [r9, lr, asr #1] ; pp [i>>1] + sub r7, r5, #1 ; range-1 + + ; Decisions are made based on the bit value shifted + ; off of v, so set a flag here based on this. + ; This value is refered to as "bb" + lsls r12, r12, #1 ; bb = v >> n + mul r6, r4, r7 ; ((range-1) * pp[i>>1])) + + ; bb can only be 0 or 1. So only execute this statement + ; if bb == 1, otherwise it will act like i + 0 + addcs lr, lr, #1 ; i + bb + + mov r7, #1 + ldrsb lr, [r10, lr] ; i = vp8_coef_tree[i+bb] + add r4, r7, r6, lsr #8 ; 1 + (((range-1) * pp[i>>1]) >> 8) + + addcs r2, r2, r4 ; if (bb) lowvalue += split + subcs r4, r5, r4 ; if (bb) range = range-split + + ; Counting the leading zeros is used to normalize range. + clz r6, r4 + sub r6, r6, #24 ; shift + + ; Flag is set on the sum of count. This flag is used later + ; to determine if count >= 0 + adds r3, r3, r6 ; count += shift + lsl r5, r4, r6 ; range <<= shift + bmi token_count_lt_zero ; if(count >= 0) + + sub r6, r6, r3 ; offset = shift - count + sub r4, r6, #1 ; offset-1 + lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) + bpl token_high_bit_not_set + + ldr r4, [r0, #vp8_writer_pos] ; x + sub r4, r4, #1 ; x = w->pos-1 + b token_zero_while_start + token_zero_while_loop + mov r10, #0 + strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 + sub r4, r4, #1 ; x-- + token_zero_while_start + cmp r4, #0 + ldrge r7, [r0, #vp8_writer_buffer] + ldrb r11, [r7, r4] + cmpge r11, #0xff + beq token_zero_while_loop + + ldr r7, [r0, #vp8_writer_buffer] + ldrb r10, [r7, r4] ; w->buffer[x] + add r10, r10, #1 + strb r10, [r7, r4] ; w->buffer[x] + 1 + token_high_bit_not_set + rsb r4, r6, #24 ; 24-offset + ldr r10, [r0, #vp8_writer_buffer] + lsr r7, r2, r4 ; lowvalue >> (24-offset) + ldr r4, [r0, #vp8_writer_pos] ; w->pos + lsl r2, r2, r6 ; lowvalue <<= offset + mov r6, r3 ; shift = count + add r11, r4, #1 ; w->pos++ + bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff + str r11, [r0, #vp8_writer_pos] + sub r3, r3, #8 ; count -= 8 + + VALIDATE_POS r10, r11 ; validate_buffer at pos + + strb r7, [r10, r4] ; w->buffer[w->pos++] + + ; r10 is used earlier in the loop, but r10 is used as + ; temp variable here. So after r10 is used, reload + ; vp8_coef_tree_dcd into r10 + ldr r10, [sp, #60] ; vp8_coef_tree + + token_count_lt_zero + lsl r2, r2, r6 ; lowvalue <<= shift + + subs r8, r8, #1 ; --n + bne token_loop + + ldrb r6, [r1, #tokenextra_token] ; t + ldr r7, [sp, #56] ; vp8_extra_bits + ; Add t * sizeof (vp8_extra_bit_struct) to get the desired + ; element. Here vp8_extra_bit_struct == 16 + add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t + + ldr r4, [r12, #vp8_extra_bit_struct_base_val] + cmp r4, #0 + beq skip_extra_bits + + ; if( b->base_val) + ldr r8, [r12, #vp8_extra_bit_struct_len] ; L + ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra + cmp r8, #0 ; if( L) + beq no_extra_bits + + ldr r9, [r12, #vp8_extra_bit_struct_prob] + asr r7, lr, #1 ; v=e>>1 + + ldr r10, [r12, #vp8_extra_bit_struct_tree] + str r10, [sp, #4] ; b->tree + + rsb r4, r8, #32 + lsl r12, r7, r4 + + mov lr, #0 ; i = 0 + + extra_bits_loop + ldrb r4, [r9, lr, asr #1] ; pp[i>>1] + sub r7, r5, #1 ; range-1 + lsls r12, r12, #1 ; v >> n + mul r6, r4, r7 ; (range-1) * pp[i>>1] + addcs lr, lr, #1 ; i + bb + + mov r7, #1 + ldrsb lr, [r10, lr] ; i = b->tree[i+bb] + add r4, r7, r6, lsr #8 ; split = 1 + (((range-1) * pp[i>>1]) >> 8) + + addcs r2, r2, r4 ; if (bb) lowvalue += split + subcs r4, r5, r4 ; if (bb) range = range-split + + clz r6, r4 + sub r6, r6, #24 + + adds r3, r3, r6 ; count += shift + lsl r5, r4, r6 ; range <<= shift + bmi extra_count_lt_zero ; if(count >= 0) + + sub r6, r6, r3 ; offset= shift - count + sub r4, r6, #1 ; offset-1 + lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) + bpl extra_high_bit_not_set + + ldr r4, [r0, #vp8_writer_pos] ; x + sub r4, r4, #1 ; x = w->pos - 1 + b extra_zero_while_start + extra_zero_while_loop + mov r10, #0 + strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 + sub r4, r4, #1 ; x-- + extra_zero_while_start + cmp r4, #0 + ldrge r7, [r0, #vp8_writer_buffer] + ldrb r11, [r7, r4] + cmpge r11, #0xff + beq extra_zero_while_loop + + ldr r7, [r0, #vp8_writer_buffer] + ldrb r10, [r7, r4] + add r10, r10, #1 + strb r10, [r7, r4] + extra_high_bit_not_set + rsb r4, r6, #24 ; 24-offset + ldr r10, [r0, #vp8_writer_buffer] + lsr r7, r2, r4 ; lowvalue >> (24-offset) + ldr r4, [r0, #vp8_writer_pos] + lsl r2, r2, r6 ; lowvalue <<= offset + mov r6, r3 ; shift = count + add r11, r4, #1 ; w->pos++ + bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff + str r11, [r0, #vp8_writer_pos] + sub r3, r3, #8 ; count -= 8 + + VALIDATE_POS r10, r11 ; validate_buffer at pos + + strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-offset)) + ldr r10, [sp, #4] ; b->tree + extra_count_lt_zero + lsl r2, r2, r6 + + subs r8, r8, #1 ; --n + bne extra_bits_loop ; while (n) + + no_extra_bits + ldr lr, [r1, #4] ; e = p->Extra + add r4, r5, #1 ; range + 1 + tst lr, #1 + lsr r4, r4, #1 ; split = (range + 1) >> 1 + addne r2, r2, r4 ; lowvalue += split + subne r4, r5, r4 ; range = range-split + tst r2, #0x80000000 ; lowvalue & 0x80000000 + lsl r5, r4, #1 ; range <<= 1 + beq end_high_bit_not_set + + ldr r4, [r0, #vp8_writer_pos] + mov r7, #0 + sub r4, r4, #1 + b end_zero_while_start + end_zero_while_loop + strb r7, [r6, r4] + sub r4, r4, #1 ; x-- + end_zero_while_start + cmp r4, #0 + ldrge r6, [r0, #vp8_writer_buffer] + ldrb r12, [r6, r4] + cmpge r12, #0xff + beq end_zero_while_loop + + ldr r6, [r0, #vp8_writer_buffer] + ldrb r7, [r6, r4] + add r7, r7, #1 + strb r7, [r6, r4] + end_high_bit_not_set + adds r3, r3, #1 ; ++count + lsl r2, r2, #1 ; lowvalue <<= 1 + bne end_count_zero + + ldr r4, [r0, #vp8_writer_pos] + mvn r3, #7 + ldr r7, [r0, #vp8_writer_buffer] + lsr r6, r2, #24 ; lowvalue >> 24 + add r12, r4, #1 ; w->pos++ + bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff + str r12, [r0, #vp8_writer_pos] + + VALIDATE_POS r7, r12 ; validate_buffer at pos + + strb r6, [r7, r4] + end_count_zero + skip_extra_bits + add r1, r1, #TOKENEXTRA_SZ ; ++p + check_p_lt_stop + ldr r4, [sp, #0] ; stop + cmp r1, r4 ; while( p < stop) + bcc while_p_lt_stop + + str r2, [r0, #vp8_writer_lowvalue] + str r5, [r0, #vp8_writer_range] + str r3, [r0, #vp8_writer_count] + add sp, sp, #16 + pop {r4-r12, pc} + ENDP + + END diff --cc vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm index 000000000,1fa5e6c22..3a8d17a81 mode 000000,100644..100644 --- a/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm +++ b/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm @@@ -1,0 -1,352 +1,352 @@@ + ; + ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. + ; + ; Use of this source code is governed by a BSD-style license + ; that can be found in the LICENSE file in the root of the source + ; tree. An additional intellectual property rights grant can be found + ; in the file PATENTS. All contributing project authors may + ; be found in the AUTHORS file in the root of the source tree. + ; + + + EXPORT |vp8cx_pack_mb_row_tokens_armv5| + IMPORT |vp8_validate_buffer_arm| + - INCLUDE asm_enc_offsets.asm ++ INCLUDE vp8_asm_enc_offsets.asm + + ARM + REQUIRE8 + PRESERVE8 + + AREA |.text|, CODE, READONLY + + + ; macro for validating write buffer position + ; needs vp8_writer in r0 + ; start shall not be in r1 + MACRO + VALIDATE_POS $start, $pos + push {r0-r3, r12, lr} ; rest of regs are preserved by subroutine call + ldr r2, [r0, #vp8_writer_buffer_end] + ldr r3, [r0, #vp8_writer_error] + mov r1, $pos + mov r0, $start + bl vp8_validate_buffer_arm + pop {r0-r3, r12, lr} + MEND + + ; r0 VP8_COMP *cpi + ; r1 vp8_writer *w + ; r2 vp8_coef_encodings + ; r3 vp8_extra_bits + ; s0 vp8_coef_tree + + |vp8cx_pack_mb_row_tokens_armv5| PROC + push {r4-r12, lr} + sub sp, sp, #24 + + ; Compute address of cpi->common.mb_rows + ldr r4, _VP8_COMP_common_ + ldr r6, _VP8_COMMON_MBrows_ + add r4, r0, r4 + + ldr r5, [r4, r6] ; load up mb_rows + + str r2, [sp, #20] ; save vp8_coef_encodings + str r5, [sp, #12] ; save mb_rows + str r3, [sp, #8] ; save vp8_extra_bits + + ldr r4, _VP8_COMP_tplist_ + add r4, r0, r4 + ldr r7, [r4, #0] ; dereference cpi->tp_list + + mov r0, r1 ; keep same as other loops + + ldr r2, [r0, #vp8_writer_lowvalue] + ldr r5, [r0, #vp8_writer_range] + ldr r3, [r0, #vp8_writer_count] + + mb_row_loop + + ldr r1, [r7, #tokenlist_start] + ldr r9, [r7, #tokenlist_stop] + str r9, [sp, #0] ; save stop for later comparison + str r7, [sp, #16] ; tokenlist address for next time + + b check_p_lt_stop + + ; actuall work gets done here! + + while_p_lt_stop + ldrb r6, [r1, #tokenextra_token] ; t + ldr r4, [sp, #20] ; vp8_coef_encodings + mov lr, #0 + add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t + ldr r9, [r1, #tokenextra_context_tree] ; pp + + ldrb r7, [r1, #tokenextra_skip_eob_node] + + ldr r6, [r4, #vp8_token_value] ; v + ldr r8, [r4, #vp8_token_len] ; n + + ; vp8 specific skip_eob_node + cmp r7, #0 + movne lr, #2 ; i = 2 + subne r8, r8, #1 ; --n + + rsb r4, r8, #32 ; 32-n + ldr r10, [sp, #64] ; vp8_coef_tree + + ; v is kept in r12 during the token pack loop + lsl r12, r6, r4 ; r12 = v << 32 - n + + ; loop start + token_loop + ldrb r4, [r9, lr, asr #1] ; pp [i>>1] + sub r7, r5, #1 ; range-1 + + ; Decisions are made based on the bit value shifted + ; off of v, so set a flag here based on this. + ; This value is refered to as "bb" + lsls r12, r12, #1 ; bb = v >> n + mul r6, r4, r7 ; ((range-1) * pp[i>>1])) + + ; bb can only be 0 or 1. So only execute this statement + ; if bb == 1, otherwise it will act like i + 0 + addcs lr, lr, #1 ; i + bb + + mov r7, #1 + ldrsb lr, [r10, lr] ; i = vp8_coef_tree[i+bb] + add r4, r7, r6, lsr #8 ; 1 + (((range-1) * pp[i>>1]) >> 8) + + addcs r2, r2, r4 ; if (bb) lowvalue += split + subcs r4, r5, r4 ; if (bb) range = range-split + + ; Counting the leading zeros is used to normalize range. + clz r6, r4 + sub r6, r6, #24 ; shift + + ; Flag is set on the sum of count. This flag is used later + ; to determine if count >= 0 + adds r3, r3, r6 ; count += shift + lsl r5, r4, r6 ; range <<= shift + bmi token_count_lt_zero ; if(count >= 0) + + sub r6, r6, r3 ; offset = shift - count + sub r4, r6, #1 ; offset-1 + lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) + bpl token_high_bit_not_set + + ldr r4, [r0, #vp8_writer_pos] ; x + sub r4, r4, #1 ; x = w->pos-1 + b token_zero_while_start + token_zero_while_loop + mov r10, #0 + strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 + sub r4, r4, #1 ; x-- + token_zero_while_start + cmp r4, #0 + ldrge r7, [r0, #vp8_writer_buffer] + ldrb r11, [r7, r4] + cmpge r11, #0xff + beq token_zero_while_loop + + ldr r7, [r0, #vp8_writer_buffer] + ldrb r10, [r7, r4] ; w->buffer[x] + add r10, r10, #1 + strb r10, [r7, r4] ; w->buffer[x] + 1 + token_high_bit_not_set + rsb r4, r6, #24 ; 24-offset + ldr r10, [r0, #vp8_writer_buffer] + lsr r7, r2, r4 ; lowvalue >> (24-offset) + ldr r4, [r0, #vp8_writer_pos] ; w->pos + lsl r2, r2, r6 ; lowvalue <<= offset + mov r6, r3 ; shift = count + add r11, r4, #1 ; w->pos++ + bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff + str r11, [r0, #vp8_writer_pos] + sub r3, r3, #8 ; count -= 8 + + VALIDATE_POS r10, r11 ; validate_buffer at pos + + strb r7, [r10, r4] ; w->buffer[w->pos++] + + ; r10 is used earlier in the loop, but r10 is used as + ; temp variable here. So after r10 is used, reload + ; vp8_coef_tree_dcd into r10 + ldr r10, [sp, #64] ; vp8_coef_tree + + token_count_lt_zero + lsl r2, r2, r6 ; lowvalue <<= shift + + subs r8, r8, #1 ; --n + bne token_loop + + ldrb r6, [r1, #tokenextra_token] ; t + ldr r7, [sp, #8] ; vp8_extra_bits + ; Add t * sizeof (vp8_extra_bit_struct) to get the desired + ; element. Here vp8_extra_bit_struct == 16 + add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t + + ldr r4, [r12, #vp8_extra_bit_struct_base_val] + cmp r4, #0 + beq skip_extra_bits + + ; if( b->base_val) + ldr r8, [r12, #vp8_extra_bit_struct_len] ; L + ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra + cmp r8, #0 ; if( L) + beq no_extra_bits + + ldr r9, [r12, #vp8_extra_bit_struct_prob] + asr r7, lr, #1 ; v=e>>1 + + ldr r10, [r12, #vp8_extra_bit_struct_tree] + str r10, [sp, #4] ; b->tree + + rsb r4, r8, #32 + lsl r12, r7, r4 + + mov lr, #0 ; i = 0 + + extra_bits_loop + ldrb r4, [r9, lr, asr #1] ; pp[i>>1] + sub r7, r5, #1 ; range-1 + lsls r12, r12, #1 ; v >> n + mul r6, r4, r7 ; (range-1) * pp[i>>1] + addcs lr, lr, #1 ; i + bb + + mov r7, #1 + ldrsb lr, [r10, lr] ; i = b->tree[i+bb] + add r4, r7, r6, lsr #8 ; split = 1 + (((range-1) * pp[i>>1]) >> 8) + + addcs r2, r2, r4 ; if (bb) lowvalue += split + subcs r4, r5, r4 ; if (bb) range = range-split + + clz r6, r4 + sub r6, r6, #24 + + adds r3, r3, r6 ; count += shift + lsl r5, r4, r6 ; range <<= shift + bmi extra_count_lt_zero ; if(count >= 0) + + sub r6, r6, r3 ; offset= shift - count + sub r4, r6, #1 ; offset-1 + lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) + bpl extra_high_bit_not_set + + ldr r4, [r0, #vp8_writer_pos] ; x + sub r4, r4, #1 ; x = w->pos - 1 + b extra_zero_while_start + extra_zero_while_loop + mov r10, #0 + strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 + sub r4, r4, #1 ; x-- + extra_zero_while_start + cmp r4, #0 + ldrge r7, [r0, #vp8_writer_buffer] + ldrb r11, [r7, r4] + cmpge r11, #0xff + beq extra_zero_while_loop + + ldr r7, [r0, #vp8_writer_buffer] + ldrb r10, [r7, r4] + add r10, r10, #1 + strb r10, [r7, r4] + extra_high_bit_not_set + rsb r4, r6, #24 ; 24-offset + ldr r10, [r0, #vp8_writer_buffer] + lsr r7, r2, r4 ; lowvalue >> (24-offset) + ldr r4, [r0, #vp8_writer_pos] + lsl r2, r2, r6 ; lowvalue <<= offset + mov r6, r3 ; shift = count + add r11, r4, #1 ; w->pos++ + bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff + str r11, [r0, #vp8_writer_pos] + sub r3, r3, #8 ; count -= 8 + + VALIDATE_POS r10, r11 ; validate_buffer at pos + + strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-offset)) + ldr r10, [sp, #4] ; b->tree + extra_count_lt_zero + lsl r2, r2, r6 + + subs r8, r8, #1 ; --n + bne extra_bits_loop ; while (n) + + no_extra_bits + ldr lr, [r1, #4] ; e = p->Extra + add r4, r5, #1 ; range + 1 + tst lr, #1 + lsr r4, r4, #1 ; split = (range + 1) >> 1 + addne r2, r2, r4 ; lowvalue += split + subne r4, r5, r4 ; range = range-split + tst r2, #0x80000000 ; lowvalue & 0x80000000 + lsl r5, r4, #1 ; range <<= 1 + beq end_high_bit_not_set + + ldr r4, [r0, #vp8_writer_pos] + mov r7, #0 + sub r4, r4, #1 + b end_zero_while_start + end_zero_while_loop + strb r7, [r6, r4] + sub r4, r4, #1 ; x-- + end_zero_while_start + cmp r4, #0 + ldrge r6, [r0, #vp8_writer_buffer] + ldrb r12, [r6, r4] + cmpge r12, #0xff + beq end_zero_while_loop + + ldr r6, [r0, #vp8_writer_buffer] + ldrb r7, [r6, r4] + add r7, r7, #1 + strb r7, [r6, r4] + end_high_bit_not_set + adds r3, r3, #1 ; ++count + lsl r2, r2, #1 ; lowvalue <<= 1 + bne end_count_zero + + ldr r4, [r0, #vp8_writer_pos] + mvn r3, #7 + ldr r7, [r0, #vp8_writer_buffer] + lsr r6, r2, #24 ; lowvalue >> 24 + add r12, r4, #1 ; w->pos++ + bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff + str r12, [r0, #vp8_writer_pos] + + VALIDATE_POS r7, r12 ; validate_buffer at pos + + strb r6, [r7, r4] + end_count_zero + skip_extra_bits + add r1, r1, #TOKENEXTRA_SZ ; ++p + check_p_lt_stop + ldr r4, [sp, #0] ; stop + cmp r1, r4 ; while( p < stop) + bcc while_p_lt_stop + + ldr r6, [sp, #12] ; mb_rows + ldr r7, [sp, #16] ; tokenlist address + subs r6, r6, #1 + add r7, r7, #TOKENLIST_SZ ; next element in the array + str r6, [sp, #12] + bne mb_row_loop + + str r2, [r0, #vp8_writer_lowvalue] + str r5, [r0, #vp8_writer_range] + str r3, [r0, #vp8_writer_count] + add sp, sp, #24 + pop {r4-r12, pc} + ENDP + + _VP8_COMP_common_ + DCD vp8_comp_common + _VP8_COMMON_MBrows_ + DCD vp8_common_mb_rows + _VP8_COMP_tplist_ + DCD vp8_comp_tplist + + END diff --cc vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm index 000000000,90a98fe8d..e9aa4958f mode 000000,100644..100644 --- a/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm +++ b/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm @@@ -1,0 -1,471 +1,471 @@@ + ; + ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. + ; + ; Use of this source code is governed by a BSD-style license + ; that can be found in the LICENSE file in the root of the source + ; tree. An additional intellectual property rights grant can be found + ; in the file PATENTS. All contributing project authors may + ; be found in the AUTHORS file in the root of the source tree. + ; + + + EXPORT |vp8cx_pack_tokens_into_partitions_armv5| + IMPORT |vp8_validate_buffer_arm| + - INCLUDE asm_enc_offsets.asm ++ INCLUDE vp8_asm_enc_offsets.asm + + ARM + REQUIRE8 + PRESERVE8 + + AREA |.text|, CODE, READONLY + + ; macro for validating write buffer position + ; needs vp8_writer in r0 + ; start shall not be in r1 + MACRO + VALIDATE_POS $start, $pos + push {r0-r3, r12, lr} ; rest of regs are preserved by subroutine call + ldr r2, [r0, #vp8_writer_buffer_end] + ldr r3, [r0, #vp8_writer_error] + mov r1, $pos + mov r0, $start + bl vp8_validate_buffer_arm + pop {r0-r3, r12, lr} + MEND + + ; r0 VP8_COMP *cpi + ; r1 unsigned char *cx_data + ; r2 const unsigned char *cx_data_end + ; r3 int num_part + ; s0 vp8_coef_encodings + ; s1 vp8_extra_bits, + ; s2 const vp8_tree_index * + + |vp8cx_pack_tokens_into_partitions_armv5| PROC + push {r4-r12, lr} + sub sp, sp, #40 + + ; Compute address of cpi->common.mb_rows + ldr r4, _VP8_COMP_common_ + ldr r6, _VP8_COMMON_MBrows_ + add r4, r0, r4 + + ldr r5, [r4, r6] ; load up mb_rows + + str r5, [sp, #36] ; save mb_rows + str r1, [sp, #24] ; save ptr = cx_data + str r3, [sp, #20] ; save num_part + str r2, [sp, #8] ; save cx_data_end + + ldr r4, _VP8_COMP_tplist_ + add r4, r0, r4 + ldr r7, [r4, #0] ; dereference cpi->tp_list + str r7, [sp, #32] ; store start of cpi->tp_list + + ldr r11, _VP8_COMP_bc_ ; load up vp8_writer out of cpi + add r0, r0, r11 + + mov r11, #0 + str r11, [sp, #28] ; i + + numparts_loop + ldr r2, _vp8_writer_sz_ ; load up sizeof(vp8_writer) + add r0, r2 ; bc[i + 1] + + ldr r10, [sp, #24] ; ptr + ldr r5, [sp, #36] ; move mb_rows to the counting section + subs r5, r5, r11 ; move start point with each partition + ; mb_rows starts at i + str r5, [sp, #12] + + ; Reset all of the VP8 Writer data for each partition that + ; is processed. + ; start_encode + + ldr r3, [sp, #8] + str r3, [r0, #vp8_writer_buffer_end] + + mov r2, #0 ; vp8_writer_lowvalue + mov r5, #255 ; vp8_writer_range + mvn r3, #23 ; vp8_writer_count + + str r2, [r0, #vp8_writer_pos] + str r10, [r0, #vp8_writer_buffer] + + ble end_partition ; if (mb_rows <= 0) end partition + + mb_row_loop + + ldr r1, [r7, #tokenlist_start] + ldr r9, [r7, #tokenlist_stop] + str r9, [sp, #0] ; save stop for later comparison + str r7, [sp, #16] ; tokenlist address for next time + + b check_p_lt_stop + + ; actual work gets done here! + + while_p_lt_stop + ldrb r6, [r1, #tokenextra_token] ; t + ldr r4, [sp, #80] ; vp8_coef_encodings + mov lr, #0 + add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t + ldr r9, [r1, #tokenextra_context_tree] ; pp + + ldrb r7, [r1, #tokenextra_skip_eob_node] + + ldr r6, [r4, #vp8_token_value] ; v + ldr r8, [r4, #vp8_token_len] ; n + + ; vp8 specific skip_eob_node + cmp r7, #0 + movne lr, #2 ; i = 2 + subne r8, r8, #1 ; --n + + rsb r4, r8, #32 ; 32-n + ldr r10, [sp, #88] ; vp8_coef_tree + + ; v is kept in r12 during the token pack loop + lsl r12, r6, r4 ; r12 = v << 32 - n + + ; loop start + token_loop + ldrb r4, [r9, lr, asr #1] ; pp [i>>1] + sub r7, r5, #1 ; range-1 + + ; Decisions are made based on the bit value shifted + ; off of v, so set a flag here based on this. + ; This value is refered to as "bb" + lsls r12, r12, #1 ; bb = v >> n + mul r6, r4, r7 ; ((range-1) * pp[i>>1])) + + ; bb can only be 0 or 1. So only execute this statement + ; if bb == 1, otherwise it will act like i + 0 + addcs lr, lr, #1 ; i + bb + + mov r7, #1 + ldrsb lr, [r10, lr] ; i = vp8_coef_tree[i+bb] + add r4, r7, r6, lsr #8 ; 1 + (((range-1) * pp[i>>1]) >> 8) + + addcs r2, r2, r4 ; if (bb) lowvalue += split + subcs r4, r5, r4 ; if (bb) range = range-split + + ; Counting the leading zeros is used to normalize range. + clz r6, r4 + sub r6, r6, #24 ; shift + + ; Flag is set on the sum of count. This flag is used later + ; to determine if count >= 0 + adds r3, r3, r6 ; count += shift + lsl r5, r4, r6 ; range <<= shift + bmi token_count_lt_zero ; if(count >= 0) + + sub r6, r6, r3 ; offset = shift - count + sub r4, r6, #1 ; offset-1 + lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) + bpl token_high_bit_not_set + + ldr r4, [r0, #vp8_writer_pos] ; x + sub r4, r4, #1 ; x = w->pos-1 + b token_zero_while_start + token_zero_while_loop + mov r10, #0 + strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 + sub r4, r4, #1 ; x-- + token_zero_while_start + cmp r4, #0 + ldrge r7, [r0, #vp8_writer_buffer] + ldrb r11, [r7, r4] + cmpge r11, #0xff + beq token_zero_while_loop + + ldr r7, [r0, #vp8_writer_buffer] + ldrb r10, [r7, r4] ; w->buffer[x] + add r10, r10, #1 + strb r10, [r7, r4] ; w->buffer[x] + 1 + token_high_bit_not_set + rsb r4, r6, #24 ; 24-offset + ldr r10, [r0, #vp8_writer_buffer] + lsr r7, r2, r4 ; lowvalue >> (24-offset) + ldr r4, [r0, #vp8_writer_pos] ; w->pos + lsl r2, r2, r6 ; lowvalue <<= offset + mov r6, r3 ; shift = count + add r11, r4, #1 ; w->pos++ + bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff + str r11, [r0, #vp8_writer_pos] + sub r3, r3, #8 ; count -= 8 + + VALIDATE_POS r10, r11 ; validate_buffer at pos + + strb r7, [r10, r4] ; w->buffer[w->pos++] + + ; r10 is used earlier in the loop, but r10 is used as + ; temp variable here. So after r10 is used, reload + ; vp8_coef_tree_dcd into r10 + ldr r10, [sp, #88] ; vp8_coef_tree + + token_count_lt_zero + lsl r2, r2, r6 ; lowvalue <<= shift + + subs r8, r8, #1 ; --n + bne token_loop + + ldrb r6, [r1, #tokenextra_token] ; t + ldr r7, [sp, #84] ; vp8_extra_bits + ; Add t * sizeof (vp8_extra_bit_struct) to get the desired + ; element. Here vp8_extra_bit_struct == 16 + add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t + + ldr r4, [r12, #vp8_extra_bit_struct_base_val] + cmp r4, #0 + beq skip_extra_bits + + ; if( b->base_val) + ldr r8, [r12, #vp8_extra_bit_struct_len] ; L + ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra + cmp r8, #0 ; if( L) + beq no_extra_bits + + ldr r9, [r12, #vp8_extra_bit_struct_prob] + asr r7, lr, #1 ; v=e>>1 + + ldr r10, [r12, #vp8_extra_bit_struct_tree] + str r10, [sp, #4] ; b->tree + + rsb r4, r8, #32 + lsl r12, r7, r4 + + mov lr, #0 ; i = 0 + + extra_bits_loop + ldrb r4, [r9, lr, asr #1] ; pp[i>>1] + sub r7, r5, #1 ; range-1 + lsls r12, r12, #1 ; v >> n + mul r6, r4, r7 ; (range-1) * pp[i>>1] + addcs lr, lr, #1 ; i + bb + + mov r7, #1 + ldrsb lr, [r10, lr] ; i = b->tree[i+bb] + add r4, r7, r6, lsr #8 ; split = 1 + (((range-1) * pp[i>>1]) >> 8) + + addcs r2, r2, r4 ; if (bb) lowvalue += split + subcs r4, r5, r4 ; if (bb) range = range-split + + clz r6, r4 + sub r6, r6, #24 + + adds r3, r3, r6 ; count += shift + lsl r5, r4, r6 ; range <<= shift + bmi extra_count_lt_zero ; if(count >= 0) + + sub r6, r6, r3 ; offset= shift - count + sub r4, r6, #1 ; offset-1 + lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) + bpl extra_high_bit_not_set + + ldr r4, [r0, #vp8_writer_pos] ; x + sub r4, r4, #1 ; x = w->pos - 1 + b extra_zero_while_start + extra_zero_while_loop + mov r10, #0 + strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 + sub r4, r4, #1 ; x-- + extra_zero_while_start + cmp r4, #0 + ldrge r7, [r0, #vp8_writer_buffer] + ldrb r11, [r7, r4] + cmpge r11, #0xff + beq extra_zero_while_loop + + ldr r7, [r0, #vp8_writer_buffer] + ldrb r10, [r7, r4] + add r10, r10, #1 + strb r10, [r7, r4] + extra_high_bit_not_set + rsb r4, r6, #24 ; 24-offset + ldr r10, [r0, #vp8_writer_buffer] + lsr r7, r2, r4 ; lowvalue >> (24-offset) + ldr r4, [r0, #vp8_writer_pos] + lsl r2, r2, r6 ; lowvalue <<= offset + mov r6, r3 ; shift = count + add r11, r4, #1 ; w->pos++ + bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff + str r11, [r0, #vp8_writer_pos] + sub r3, r3, #8 ; count -= 8 + + VALIDATE_POS r10, r11 ; validate_buffer at pos + + strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-offset)) + ldr r10, [sp, #4] ; b->tree + extra_count_lt_zero + lsl r2, r2, r6 + + subs r8, r8, #1 ; --n + bne extra_bits_loop ; while (n) + + no_extra_bits + ldr lr, [r1, #4] ; e = p->Extra + add r4, r5, #1 ; range + 1 + tst lr, #1 + lsr r4, r4, #1 ; split = (range + 1) >> 1 + addne r2, r2, r4 ; lowvalue += split + subne r4, r5, r4 ; range = range-split + tst r2, #0x80000000 ; lowvalue & 0x80000000 + lsl r5, r4, #1 ; range <<= 1 + beq end_high_bit_not_set + + ldr r4, [r0, #vp8_writer_pos] + mov r7, #0 + sub r4, r4, #1 + b end_zero_while_start + end_zero_while_loop + strb r7, [r6, r4] + sub r4, r4, #1 ; x-- + end_zero_while_start + cmp r4, #0 + ldrge r6, [r0, #vp8_writer_buffer] + ldrb r12, [r6, r4] + cmpge r12, #0xff + beq end_zero_while_loop + + ldr r6, [r0, #vp8_writer_buffer] + ldrb r7, [r6, r4] + add r7, r7, #1 + strb r7, [r6, r4] + end_high_bit_not_set + adds r3, r3, #1 ; ++count + lsl r2, r2, #1 ; lowvalue <<= 1 + bne end_count_zero + + ldr r4, [r0, #vp8_writer_pos] + mvn r3, #7 ; count = -8 + ldr r7, [r0, #vp8_writer_buffer] + lsr r6, r2, #24 ; lowvalue >> 24 + add r12, r4, #1 ; w->pos++ + bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff + str r12, [r0, #vp8_writer_pos] + + VALIDATE_POS r7, r12 ; validate_buffer at pos + + strb r6, [r7, r4] + end_count_zero + skip_extra_bits + add r1, r1, #TOKENEXTRA_SZ ; ++p + check_p_lt_stop + ldr r4, [sp, #0] ; stop + cmp r1, r4 ; while( p < stop) + bcc while_p_lt_stop + + ldr r10, [sp, #20] ; num_parts + mov r1, #TOKENLIST_SZ + mul r1, r10, r1 + + ldr r6, [sp, #12] ; mb_rows + ldr r7, [sp, #16] ; tokenlist address + subs r6, r6, r10 + add r7, r7, r1 ; next element in the array + str r6, [sp, #12] + bgt mb_row_loop + + end_partition + mov r12, #32 + + stop_encode_loop + sub r7, r5, #1 ; range-1 + + mov r4, r7, lsl #7 ; ((range-1) * 128) + + mov r7, #1 + add r4, r7, r4, lsr #8 ; 1 + (((range-1) * 128) >> 8) + + ; Counting the leading zeros is used to normalize range. + clz r6, r4 + sub r6, r6, #24 ; shift + + ; Flag is set on the sum of count. This flag is used later + ; to determine if count >= 0 + adds r3, r3, r6 ; count += shift + lsl r5, r4, r6 ; range <<= shift + bmi token_count_lt_zero_se ; if(count >= 0) + + sub r6, r6, r3 ; offset = shift - count + sub r4, r6, #1 ; offset-1 + lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) + bpl token_high_bit_not_set_se + + ldr r4, [r0, #vp8_writer_pos] ; x + sub r4, r4, #1 ; x = w->pos-1 + b token_zero_while_start_se + token_zero_while_loop_se + mov r10, #0 + strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 + sub r4, r4, #1 ; x-- + token_zero_while_start_se + cmp r4, #0 + ldrge r7, [r0, #vp8_writer_buffer] + ldrb r11, [r7, r4] + cmpge r11, #0xff + beq token_zero_while_loop_se + + ldr r7, [r0, #vp8_writer_buffer] + ldrb r10, [r7, r4] ; w->buffer[x] + add r10, r10, #1 + strb r10, [r7, r4] ; w->buffer[x] + 1 + token_high_bit_not_set_se + rsb r4, r6, #24 ; 24-offset + ldr r10, [r0, #vp8_writer_buffer] + lsr r7, r2, r4 ; lowvalue >> (24-offset) + ldr r4, [r0, #vp8_writer_pos] ; w->pos + lsl r2, r2, r6 ; lowvalue <<= offset + mov r6, r3 ; shift = count + add r11, r4, #1 ; w->pos++ + bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff + str r11, [r0, #vp8_writer_pos] + sub r3, r3, #8 ; count -= 8 + + VALIDATE_POS r10, r11 ; validate_buffer at pos + + strb r7, [r10, r4] ; w->buffer[w->pos++] + + token_count_lt_zero_se + lsl r2, r2, r6 ; lowvalue <<= shift + + subs r12, r12, #1 + bne stop_encode_loop + + ldr r4, [r0, #vp8_writer_pos] ; w->pos + ldr r12, [sp, #24] ; ptr + add r12, r12, r4 ; ptr += w->pos + str r12, [sp, #24] + + ldr r11, [sp, #28] ; i + ldr r10, [sp, #20] ; num_parts + + add r11, r11, #1 ; i++ + str r11, [sp, #28] + + ldr r7, [sp, #32] ; cpi->tp_list[i] + mov r1, #TOKENLIST_SZ + add r7, r7, r1 ; next element in cpi->tp_list + str r7, [sp, #32] ; cpi->tp_list[i+1] + + cmp r10, r11 + bgt numparts_loop + + add sp, sp, #40 + pop {r4-r12, pc} + ENDP + + _VP8_COMP_common_ + DCD vp8_comp_common + _VP8_COMMON_MBrows_ + DCD vp8_common_mb_rows + _VP8_COMP_tplist_ + DCD vp8_comp_tplist + _VP8_COMP_bc_ + DCD vp8_comp_bc + _vp8_writer_sz_ + DCD vp8_writer_sz + + END diff --cc vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm index 000000000,d61f5d94d..de35a1e13 mode 000000,100644..100644 --- a/vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm +++ b/vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm @@@ -1,0 -1,225 +1,225 @@@ + ; + ; Copyright (c) 2011 The WebM project authors. All Rights Reserved. + ; + ; Use of this source code is governed by a BSD-style license + ; that can be found in the LICENSE file in the root of the source + ; tree. An additional intellectual property rights grant can be found + ; in the file PATENTS. All contributing project authors may + ; be found in the AUTHORS file in the root of the source tree. + ; + + + EXPORT |vp8_fast_quantize_b_armv6| + - INCLUDE asm_enc_offsets.asm ++ INCLUDE vp8_asm_enc_offsets.asm + + ARM + REQUIRE8 + PRESERVE8 + + AREA ||.text||, CODE, READONLY, ALIGN=2 + + ; r0 BLOCK *b + ; r1 BLOCKD *d + |vp8_fast_quantize_b_armv6| PROC + stmfd sp!, {r1, r4-r11, lr} + + ldr r3, [r0, #vp8_block_coeff] ; coeff + ldr r4, [r0, #vp8_block_quant_fast] ; quant_fast + ldr r5, [r0, #vp8_block_round] ; round + ldr r6, [r1, #vp8_blockd_qcoeff] ; qcoeff + ldr r7, [r1, #vp8_blockd_dqcoeff] ; dqcoeff + ldr r8, [r1, #vp8_blockd_dequant] ; dequant + + ldr r2, loop_count ; loop_count=0x1000000. 'lsls' instruction + ; is used to update the counter so that + ; it can be used to mark nonzero + ; quantized coefficient pairs. + + mov r1, #0 ; flags for quantized coeffs + + ; PART 1: quantization and dequantization loop + loop + ldr r9, [r3], #4 ; [z1 | z0] + ldr r10, [r5], #4 ; [r1 | r0] + ldr r11, [r4], #4 ; [q1 | q0] + + ssat16 lr, #1, r9 ; [sz1 | sz0] + eor r9, r9, lr ; [z1 ^ sz1 | z0 ^ sz0] + ssub16 r9, r9, lr ; x = (z ^ sz) - sz + sadd16 r9, r9, r10 ; [x1+r1 | x0+r0] + + ldr r12, [r3], #4 ; [z3 | z2] + + smulbb r0, r9, r11 ; [(x0+r0)*q0] + smultt r9, r9, r11 ; [(x1+r1)*q1] + + ldr r10, [r5], #4 ; [r3 | r2] + + ssat16 r11, #1, r12 ; [sz3 | sz2] + eor r12, r12, r11 ; [z3 ^ sz3 | z2 ^ sz2] + pkhtb r0, r9, r0, asr #16 ; [y1 | y0] + ldr r9, [r4], #4 ; [q3 | q2] + ssub16 r12, r12, r11 ; x = (z ^ sz) - sz + + sadd16 r12, r12, r10 ; [x3+r3 | x2+r2] + + eor r0, r0, lr ; [(y1 ^ sz1) | (y0 ^ sz0)] + + smulbb r10, r12, r9 ; [(x2+r2)*q2] + smultt r12, r12, r9 ; [(x3+r3)*q3] + + ssub16 r0, r0, lr ; x = (y ^ sz) - sz + + cmp r0, #0 ; check if zero + orrne r1, r1, r2, lsr #24 ; add flag for nonzero coeffs + + str r0, [r6], #4 ; *qcoeff++ = x + ldr r9, [r8], #4 ; [dq1 | dq0] + + pkhtb r10, r12, r10, asr #16 ; [y3 | y2] + eor r10, r10, r11 ; [(y3 ^ sz3) | (y2 ^ sz2)] + ssub16 r10, r10, r11 ; x = (y ^ sz) - sz + + cmp r10, #0 ; check if zero + orrne r1, r1, r2, lsr #23 ; add flag for nonzero coeffs + + str r10, [r6], #4 ; *qcoeff++ = x + ldr r11, [r8], #4 ; [dq3 | dq2] + + smulbb r12, r0, r9 ; [x0*dq0] + smultt r0, r0, r9 ; [x1*dq1] + + smulbb r9, r10, r11 ; [x2*dq2] + smultt r10, r10, r11 ; [x3*dq3] + + lsls r2, r2, #2 ; update loop counter + strh r12, [r7, #0] ; dqcoeff[0] = [x0*dq0] + strh r0, [r7, #2] ; dqcoeff[1] = [x1*dq1] + strh r9, [r7, #4] ; dqcoeff[2] = [x2*dq2] + strh r10, [r7, #6] ; dqcoeff[3] = [x3*dq3] + add r7, r7, #8 ; dqcoeff += 8 + bne loop + + ; PART 2: check position for eob... + ldr r11, [sp, #0] ; restore BLOCKD pointer + mov lr, #0 ; init eob + cmp r1, #0 ; coeffs after quantization? + ldr r12, [r11, #vp8_blockd_eob] + beq end ; skip eob calculations if all zero + + ldr r0, [r11, #vp8_blockd_qcoeff] + + ; check shortcut for nonzero qcoeffs + tst r1, #0x80 + bne quant_coeff_15_14 + tst r1, #0x20 + bne quant_coeff_13_11 + tst r1, #0x8 + bne quant_coeff_12_7 + tst r1, #0x40 + bne quant_coeff_10_9 + tst r1, #0x10 + bne quant_coeff_8_3 + tst r1, #0x2 + bne quant_coeff_6_5 + tst r1, #0x4 + bne quant_coeff_4_2 + b quant_coeff_1_0 + + quant_coeff_15_14 + ldrh r2, [r0, #30] ; rc=15, i=15 + mov lr, #16 + cmp r2, #0 + bne end + + ldrh r3, [r0, #28] ; rc=14, i=14 + mov lr, #15 + cmp r3, #0 + bne end + + quant_coeff_13_11 + ldrh r2, [r0, #22] ; rc=11, i=13 + mov lr, #14 + cmp r2, #0 + bne end + + quant_coeff_12_7 + ldrh r3, [r0, #14] ; rc=7, i=12 + mov lr, #13 + cmp r3, #0 + bne end + + ldrh r2, [r0, #20] ; rc=10, i=11 + mov lr, #12 + cmp r2, #0 + bne end + + quant_coeff_10_9 + ldrh r3, [r0, #26] ; rc=13, i=10 + mov lr, #11 + cmp r3, #0 + bne end + + ldrh r2, [r0, #24] ; rc=12, i=9 + mov lr, #10 + cmp r2, #0 + bne end + + quant_coeff_8_3 + ldrh r3, [r0, #18] ; rc=9, i=8 + mov lr, #9 + cmp r3, #0 + bne end + + ldrh r2, [r0, #12] ; rc=6, i=7 + mov lr, #8 + cmp r2, #0 + bne end + + quant_coeff_6_5 + ldrh r3, [r0, #6] ; rc=3, i=6 + mov lr, #7 + cmp r3, #0 + bne end + + ldrh r2, [r0, #4] ; rc=2, i=5 + mov lr, #6 + cmp r2, #0 + bne end + + quant_coeff_4_2 + ldrh r3, [r0, #10] ; rc=5, i=4 + mov lr, #5 + cmp r3, #0 + bne end + + ldrh r2, [r0, #16] ; rc=8, i=3 + mov lr, #4 + cmp r2, #0 + bne end + + ldrh r3, [r0, #8] ; rc=4, i=2 + mov lr, #3 + cmp r3, #0 + bne end + + quant_coeff_1_0 + ldrh r2, [r0, #2] ; rc=1, i=1 + mov lr, #2 + cmp r2, #0 + bne end + + mov lr, #1 ; rc=0, i=0 + + end + strb lr, [r12] + ldmfd sp!, {r1, r4-r11, pc} + + ENDP + + loop_count + DCD 0x1000000 + + END + diff --cc vp8/encoder/arm/armv6/vp8_subtract_armv6.asm index 000000000,f329f8f73..05746cf7f mode 000000,100644..100644 --- a/vp8/encoder/arm/armv6/vp8_subtract_armv6.asm +++ b/vp8/encoder/arm/armv6/vp8_subtract_armv6.asm @@@ -1,0 -1,272 +1,272 @@@ + ; + ; Copyright (c) 2011 The WebM project authors. All Rights Reserved. + ; + ; Use of this source code is governed by a BSD-style license + ; that can be found in the LICENSE file in the root of the source + ; tree. An additional intellectual property rights grant can be found + ; in the file PATENTS. All contributing project authors may + ; be found in the AUTHORS file in the root of the source tree. + ; + + + EXPORT |vp8_subtract_mby_armv6| + EXPORT |vp8_subtract_mbuv_armv6| + EXPORT |vp8_subtract_b_armv6| + - INCLUDE asm_enc_offsets.asm ++ INCLUDE vp8_asm_enc_offsets.asm + + ARM + REQUIRE8 + PRESERVE8 + + AREA ||.text||, CODE, READONLY, ALIGN=2 + + ; r0 BLOCK *be + ; r1 BLOCKD *bd + ; r2 int pitch + |vp8_subtract_b_armv6| PROC + + stmfd sp!, {r4-r9} + + ldr r4, [r0, #vp8_block_base_src] + ldr r5, [r0, #vp8_block_src] + ldr r6, [r0, #vp8_block_src_diff] + + ldr r3, [r4] + ldr r7, [r0, #vp8_block_src_stride] + add r3, r3, r5 ; src = *base_src + src + ldr r8, [r1, #vp8_blockd_predictor] + + mov r9, #4 ; loop count + + loop_block + + ldr r0, [r3], r7 ; src + ldr r1, [r8], r2 ; pred + + uxtb16 r4, r0 ; [s2 | s0] + uxtb16 r5, r1 ; [p2 | p0] + uxtb16 r0, r0, ror #8 ; [s3 | s1] + uxtb16 r1, r1, ror #8 ; [p3 | p1] + + usub16 r4, r4, r5 ; [d2 | d0] + usub16 r5, r0, r1 ; [d3 | d1] + + subs r9, r9, #1 ; decrement loop counter + + pkhbt r0, r4, r5, lsl #16 ; [d1 | d0] + pkhtb r1, r5, r4, asr #16 ; [d3 | d2] + + str r0, [r6, #0] ; diff + str r1, [r6, #4] ; diff + + add r6, r6, r2, lsl #1 ; update diff pointer + bne loop_block + + ldmfd sp!, {r4-r9} + mov pc, lr + + ENDP + + + ; r0 short *diff + ; r1 unsigned char *usrc + ; r2 unsigned char *vsrc + ; r3 int src_stride + ; sp unsigned char *upred + ; sp unsigned char *vpred + ; sp int pred_stride + |vp8_subtract_mbuv_armv6| PROC + + stmfd sp!, {r4-r11} + + add r0, r0, #512 ; set *diff point to Cb + mov r4, #8 ; loop count + ldr r5, [sp, #32] ; upred + ldr r12, [sp, #40] ; pred_stride + + ; Subtract U block + loop_u + ldr r6, [r1] ; usrc (A) + ldr r7, [r5] ; upred (A) + + uxtb16 r8, r6 ; [s2 | s0] (A) + uxtb16 r9, r7 ; [p2 | p0] (A) + uxtb16 r10, r6, ror #8 ; [s3 | s1] (A) + uxtb16 r11, r7, ror #8 ; [p3 | p1] (A) + + usub16 r6, r8, r9 ; [d2 | d0] (A) + usub16 r7, r10, r11 ; [d3 | d1] (A) + + ldr r10, [r1, #4] ; usrc (B) + ldr r11, [r5, #4] ; upred (B) + + pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A) + pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A) + + str r8, [r0], #4 ; diff (A) + uxtb16 r8, r10 ; [s2 | s0] (B) + str r9, [r0], #4 ; diff (A) + + uxtb16 r9, r11 ; [p2 | p0] (B) + uxtb16 r10, r10, ror #8 ; [s3 | s1] (B) + uxtb16 r11, r11, ror #8 ; [p3 | p1] (B) + + usub16 r6, r8, r9 ; [d2 | d0] (B) + usub16 r7, r10, r11 ; [d3 | d1] (B) + + add r1, r1, r3 ; update usrc pointer + add r5, r5, r12 ; update upred pointer + + pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B) + pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B) + + str r8, [r0], #4 ; diff (B) + subs r4, r4, #1 ; update loop counter + str r9, [r0], #4 ; diff (B) + + bne loop_u + + ldr r5, [sp, #36] ; vpred + mov r4, #8 ; loop count + + ; Subtract V block + loop_v + ldr r6, [r2] ; vsrc (A) + ldr r7, [r5] ; vpred (A) + + uxtb16 r8, r6 ; [s2 | s0] (A) + uxtb16 r9, r7 ; [p2 | p0] (A) + uxtb16 r10, r6, ror #8 ; [s3 | s1] (A) + uxtb16 r11, r7, ror #8 ; [p3 | p1] (A) + + usub16 r6, r8, r9 ; [d2 | d0] (A) + usub16 r7, r10, r11 ; [d3 | d1] (A) + + ldr r10, [r2, #4] ; vsrc (B) + ldr r11, [r5, #4] ; vpred (B) + + pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A) + pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A) + + str r8, [r0], #4 ; diff (A) + uxtb16 r8, r10 ; [s2 | s0] (B) + str r9, [r0], #4 ; diff (A) + + uxtb16 r9, r11 ; [p2 | p0] (B) + uxtb16 r10, r10, ror #8 ; [s3 | s1] (B) + uxtb16 r11, r11, ror #8 ; [p3 | p1] (B) + + usub16 r6, r8, r9 ; [d2 | d0] (B) + usub16 r7, r10, r11 ; [d3 | d1] (B) + + add r2, r2, r3 ; update vsrc pointer + add r5, r5, r12 ; update vpred pointer + + pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B) + pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B) + + str r8, [r0], #4 ; diff (B) + subs r4, r4, #1 ; update loop counter + str r9, [r0], #4 ; diff (B) + + bne loop_v + + ldmfd sp!, {r4-r11} + bx lr + + ENDP + + + ; r0 short *diff + ; r1 unsigned char *src + ; r2 int src_stride + ; r3 unsigned char *pred + ; sp int pred_stride + |vp8_subtract_mby_armv6| PROC + + stmfd sp!, {r4-r11} + ldr r12, [sp, #32] ; pred_stride + mov r4, #16 + loop + ldr r6, [r1] ; src (A) + ldr r7, [r3] ; pred (A) + + uxtb16 r8, r6 ; [s2 | s0] (A) + uxtb16 r9, r7 ; [p2 | p0] (A) + uxtb16 r10, r6, ror #8 ; [s3 | s1] (A) + uxtb16 r11, r7, ror #8 ; [p3 | p1] (A) + + usub16 r6, r8, r9 ; [d2 | d0] (A) + usub16 r7, r10, r11 ; [d3 | d1] (A) + + ldr r10, [r1, #4] ; src (B) + ldr r11, [r3, #4] ; pred (B) + + pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A) + pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A) + + str r8, [r0], #4 ; diff (A) + uxtb16 r8, r10 ; [s2 | s0] (B) + str r9, [r0], #4 ; diff (A) + + uxtb16 r9, r11 ; [p2 | p0] (B) + uxtb16 r10, r10, ror #8 ; [s3 | s1] (B) + uxtb16 r11, r11, ror #8 ; [p3 | p1] (B) + + usub16 r6, r8, r9 ; [d2 | d0] (B) + usub16 r7, r10, r11 ; [d3 | d1] (B) + + ldr r10, [r1, #8] ; src (C) + ldr r11, [r3, #8] ; pred (C) + + pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B) + pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B) + + str r8, [r0], #4 ; diff (B) + uxtb16 r8, r10 ; [s2 | s0] (C) + str r9, [r0], #4 ; diff (B) + + uxtb16 r9, r11 ; [p2 | p0] (C) + uxtb16 r10, r10, ror #8 ; [s3 | s1] (C) + uxtb16 r11, r11, ror #8 ; [p3 | p1] (C) + + usub16 r6, r8, r9 ; [d2 | d0] (C) + usub16 r7, r10, r11 ; [d3 | d1] (C) + + ldr r10, [r1, #12] ; src (D) + ldr r11, [r3, #12] ; pred (D) + + pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (C) + pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (C) + + str r8, [r0], #4 ; diff (C) + uxtb16 r8, r10 ; [s2 | s0] (D) + str r9, [r0], #4 ; diff (C) + + uxtb16 r9, r11 ; [p2 | p0] (D) + uxtb16 r10, r10, ror #8 ; [s3 | s1] (D) + uxtb16 r11, r11, ror #8 ; [p3 | p1] (D) + + usub16 r6, r8, r9 ; [d2 | d0] (D) + usub16 r7, r10, r11 ; [d3 | d1] (D) + + add r1, r1, r2 ; update src pointer + add r3, r3, r12 ; update pred pointer + + pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (D) + pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (D) + + str r8, [r0], #4 ; diff (D) + subs r4, r4, #1 ; update loop counter + str r9, [r0], #4 ; diff (D) + + bne loop + + ldmfd sp!, {r4-r11} + bx lr + + ENDP + + END + diff --cc vp8/encoder/arm/neon/fastquantizeb_neon.asm index 000000000,143058842..9374310e5 mode 000000,100644..100644 --- a/vp8/encoder/arm/neon/fastquantizeb_neon.asm +++ b/vp8/encoder/arm/neon/fastquantizeb_neon.asm @@@ -1,0 -1,258 +1,258 @@@ + ; + ; Copyright (c) 2011 The WebM project authors. All Rights Reserved. + ; + ; Use of this source code is governed by a BSD-style license + ; that can be found in the LICENSE file in the root of the source + ; tree. An additional intellectual property rights grant can be found + ; in the file PATENTS. All contributing project authors may + ; be found in the AUTHORS file in the root of the source tree. + ; + + + EXPORT |vp8_fast_quantize_b_neon| + EXPORT |vp8_fast_quantize_b_pair_neon| + - INCLUDE asm_enc_offsets.asm ++ INCLUDE vp8_asm_enc_offsets.asm + + ARM + REQUIRE8 + PRESERVE8 + + AREA ||.text||, CODE, READONLY, ALIGN=4 + + ;vp8_fast_quantize_b_pair_neon(BLOCK *b1, BLOCK *b2, BLOCKD *d1, BLOCKD *d2); + |vp8_fast_quantize_b_pair_neon| PROC + + stmfd sp!, {r4-r9} + vstmdb sp!, {q4-q7} + + ldr r4, [r0, #vp8_block_coeff] + ldr r5, [r0, #vp8_block_quant_fast] + ldr r6, [r0, #vp8_block_round] + + vld1.16 {q0, q1}, [r4@128] ; load z + + ldr r7, [r2, #vp8_blockd_qcoeff] + + vabs.s16 q4, q0 ; calculate x = abs(z) + vabs.s16 q5, q1 + + ;right shift 15 to get sign, all 0 if it is positive, all 1 if it is negative + vshr.s16 q2, q0, #15 ; sz + vshr.s16 q3, q1, #15 + + vld1.s16 {q6, q7}, [r6@128] ; load round_ptr [0-15] + vld1.s16 {q8, q9}, [r5@128] ; load quant_ptr [0-15] + + ldr r4, [r1, #vp8_block_coeff] + + vadd.s16 q4, q6 ; x + Round + vadd.s16 q5, q7 + + vld1.16 {q0, q1}, [r4@128] ; load z2 + + vqdmulh.s16 q4, q8 ; y = ((Round+abs(z)) * Quant) >> 16 + vqdmulh.s16 q5, q9 + + vabs.s16 q10, q0 ; calculate x2 = abs(z_2) + vabs.s16 q11, q1 + vshr.s16 q12, q0, #15 ; sz2 + vshr.s16 q13, q1, #15 + + ;modify data to have its original sign + veor.s16 q4, q2 ; y^sz + veor.s16 q5, q3 + + vadd.s16 q10, q6 ; x2 + Round + vadd.s16 q11, q7 + + ldr r8, [r2, #vp8_blockd_dequant] + + vqdmulh.s16 q10, q8 ; y2 = ((Round+abs(z)) * Quant) >> 16 + vqdmulh.s16 q11, q9 + + vshr.s16 q4, #1 ; right shift 1 after vqdmulh + vshr.s16 q5, #1 + + vld1.s16 {q6, q7}, [r8@128] ;load dequant_ptr[i] + + vsub.s16 q4, q2 ; x1=(y^sz)-sz = (y^sz)-(-1) (2's complement) + vsub.s16 q5, q3 + + vshr.s16 q10, #1 ; right shift 1 after vqdmulh + vshr.s16 q11, #1 + + ldr r9, [r2, #vp8_blockd_dqcoeff] + + veor.s16 q10, q12 ; y2^sz2 + veor.s16 q11, q13 + + vst1.s16 {q4, q5}, [r7] ; store: qcoeff = x1 + + + vsub.s16 q10, q12 ; x2=(y^sz)-sz = (y^sz)-(-1) (2's complement) + vsub.s16 q11, q13 + + ldr r6, [r3, #vp8_blockd_qcoeff] + + vmul.s16 q2, q6, q4 ; x * Dequant + vmul.s16 q3, q7, q5 + + adr r0, inv_zig_zag ; load ptr of inverse zigzag table + + vceq.s16 q8, q8 ; set q8 to all 1 + + vst1.s16 {q10, q11}, [r6] ; store: qcoeff = x2 + + vmul.s16 q12, q6, q10 ; x2 * Dequant + vmul.s16 q13, q7, q11 + + vld1.16 {q6, q7}, [r0@128] ; load inverse scan order + + vtst.16 q14, q4, q8 ; now find eob + vtst.16 q15, q5, q8 ; non-zero element is set to all 1 + + vst1.s16 {q2, q3}, [r9] ; store dqcoeff = x * Dequant + + ldr r7, [r3, #vp8_blockd_dqcoeff] + + vand q0, q6, q14 ; get all valid numbers from scan array + vand q1, q7, q15 + + vst1.s16 {q12, q13}, [r7] ; store dqcoeff = x * Dequant + + vtst.16 q2, q10, q8 ; now find eob + vtst.16 q3, q11, q8 ; non-zero element is set to all 1 + + vmax.u16 q0, q0, q1 ; find maximum value in q0, q1 + + vand q10, q6, q2 ; get all valid numbers from scan array + vand q11, q7, q3 + vmax.u16 q10, q10, q11 ; find maximum value in q10, q11 + + vmax.u16 d0, d0, d1 + vmax.u16 d20, d20, d21 + vmovl.u16 q0, d0 + vmovl.u16 q10, d20 + + vmax.u32 d0, d0, d1 + vmax.u32 d20, d20, d21 + vpmax.u32 d0, d0, d0 + vpmax.u32 d20, d20, d20 + + ldr r4, [r2, #vp8_blockd_eob] + ldr r5, [r3, #vp8_blockd_eob] + + vst1.8 {d0[0]}, [r4] ; store eob + vst1.8 {d20[0]}, [r5] ; store eob + + vldmia sp!, {q4-q7} + ldmfd sp!, {r4-r9} + bx lr + + ENDP + + ;void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d) + |vp8_fast_quantize_b_neon| PROC + + stmfd sp!, {r4-r7} + + ldr r3, [r0, #vp8_block_coeff] + ldr r4, [r0, #vp8_block_quant_fast] + ldr r5, [r0, #vp8_block_round] + + vld1.16 {q0, q1}, [r3@128] ; load z + vorr.s16 q14, q0, q1 ; check if all zero (step 1) + ldr r6, [r1, #vp8_blockd_qcoeff] + ldr r7, [r1, #vp8_blockd_dqcoeff] + vorr.s16 d28, d28, d29 ; check if all zero (step 2) + + vabs.s16 q12, q0 ; calculate x = abs(z) + vabs.s16 q13, q1 + + ;right shift 15 to get sign, all 0 if it is positive, all 1 if it is negative + vshr.s16 q2, q0, #15 ; sz + vmov r2, r3, d28 ; check if all zero (step 3) + vshr.s16 q3, q1, #15 + + vld1.s16 {q14, q15}, [r5@128]; load round_ptr [0-15] + vld1.s16 {q8, q9}, [r4@128] ; load quant_ptr [0-15] + + vadd.s16 q12, q14 ; x + Round + vadd.s16 q13, q15 + + adr r0, inv_zig_zag ; load ptr of inverse zigzag table + + vqdmulh.s16 q12, q8 ; y = ((Round+abs(z)) * Quant) >> 16 + vqdmulh.s16 q13, q9 + + vld1.16 {q10, q11}, [r0@128]; load inverse scan order + + vceq.s16 q8, q8 ; set q8 to all 1 + + ldr r4, [r1, #vp8_blockd_dequant] + + vshr.s16 q12, #1 ; right shift 1 after vqdmulh + vshr.s16 q13, #1 + + ldr r5, [r1, #vp8_blockd_eob] + + orr r2, r2, r3 ; check if all zero (step 4) + cmp r2, #0 ; check if all zero (step 5) + beq zero_output ; check if all zero (step 6) + + ;modify data to have its original sign + veor.s16 q12, q2 ; y^sz + veor.s16 q13, q3 + + vsub.s16 q12, q2 ; x1=(y^sz)-sz = (y^sz)-(-1) (2's complement) + vsub.s16 q13, q3 + + vld1.s16 {q2, q3}, [r4@128] ; load dequant_ptr[i] + + vtst.16 q14, q12, q8 ; now find eob + vtst.16 q15, q13, q8 ; non-zero element is set to all 1 + + vst1.s16 {q12, q13}, [r6@128]; store: qcoeff = x1 + + vand q10, q10, q14 ; get all valid numbers from scan array + vand q11, q11, q15 + + + vmax.u16 q0, q10, q11 ; find maximum value in q0, q1 + vmax.u16 d0, d0, d1 + vmovl.u16 q0, d0 + + vmul.s16 q2, q12 ; x * Dequant + vmul.s16 q3, q13 + + vmax.u32 d0, d0, d1 + vpmax.u32 d0, d0, d0 + + vst1.s16 {q2, q3}, [r7@128] ; store dqcoeff = x * Dequant + + vst1.8 {d0[0]}, [r5] ; store eob + + ldmfd sp!, {r4-r7} + bx lr + + zero_output + strb r2, [r5] ; store eob + vst1.s16 {q0, q1}, [r6@128] ; qcoeff = 0 + vst1.s16 {q0, q1}, [r7@128] ; dqcoeff = 0 + + ldmfd sp!, {r4-r7} + bx lr + + ENDP + + ; default inverse zigzag table is defined in vp8/common/entropy.c + ALIGN 16 ; enable use of @128 bit aligned loads + inv_zig_zag + DCW 0x0001, 0x0002, 0x0006, 0x0007 + DCW 0x0003, 0x0005, 0x0008, 0x000d + DCW 0x0004, 0x0009, 0x000c, 0x000e + DCW 0x000a, 0x000b, 0x000f, 0x0010 + + END + diff --cc vp8/encoder/arm/neon/subtract_neon.asm index 000000000,91a328c29..5bda78678 mode 000000,100644..100644 --- a/vp8/encoder/arm/neon/subtract_neon.asm +++ b/vp8/encoder/arm/neon/subtract_neon.asm @@@ -1,0 -1,199 +1,199 @@@ + ; + ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. + ; + ; Use of this source code is governed by a BSD-style license + ; that can be found in the LICENSE file in the root of the source + ; tree. An additional intellectual property rights grant can be found + ; in the file PATENTS. All contributing project authors may + ; be found in the AUTHORS file in the root of the source tree. + ; + + EXPORT |vp8_subtract_b_neon| + EXPORT |vp8_subtract_mby_neon| + EXPORT |vp8_subtract_mbuv_neon| + - INCLUDE asm_enc_offsets.asm ++ INCLUDE vp8_asm_enc_offsets.asm + + ARM + REQUIRE8 + PRESERVE8 + + AREA ||.text||, CODE, READONLY, ALIGN=2 + + ;void vp8_subtract_b_neon(BLOCK *be, BLOCKD *bd, int pitch) + |vp8_subtract_b_neon| PROC + + stmfd sp!, {r4-r7} + + ldr r3, [r0, #vp8_block_base_src] + ldr r4, [r0, #vp8_block_src] + ldr r5, [r0, #vp8_block_src_diff] + ldr r3, [r3] + ldr r6, [r0, #vp8_block_src_stride] + add r3, r3, r4 ; src = *base_src + src + ldr r7, [r1, #vp8_blockd_predictor] + + vld1.8 {d0}, [r3], r6 ;load src + vld1.8 {d1}, [r7], r2 ;load pred + vld1.8 {d2}, [r3], r6 + vld1.8 {d3}, [r7], r2 + vld1.8 {d4}, [r3], r6 + vld1.8 {d5}, [r7], r2 + vld1.8 {d6}, [r3], r6 + vld1.8 {d7}, [r7], r2 + + vsubl.u8 q10, d0, d1 + vsubl.u8 q11, d2, d3 + vsubl.u8 q12, d4, d5 + vsubl.u8 q13, d6, d7 + + mov r2, r2, lsl #1 + + vst1.16 {d20}, [r5], r2 ;store diff + vst1.16 {d22}, [r5], r2 + vst1.16 {d24}, [r5], r2 + vst1.16 {d26}, [r5], r2 + + ldmfd sp!, {r4-r7} + bx lr + + ENDP + + + ;========================================== + ;void vp8_subtract_mby_neon(short *diff, unsigned char *src, int src_stride + ; unsigned char *pred, int pred_stride) + |vp8_subtract_mby_neon| PROC + push {r4-r7} + mov r12, #4 + ldr r4, [sp, #16] ; pred_stride + mov r6, #32 ; "diff" stride x2 + add r5, r0, #16 ; second diff pointer + + subtract_mby_loop + vld1.8 {q0}, [r1], r2 ;load src + vld1.8 {q1}, [r3], r4 ;load pred + vld1.8 {q2}, [r1], r2 + vld1.8 {q3}, [r3], r4 + vld1.8 {q4}, [r1], r2 + vld1.8 {q5}, [r3], r4 + vld1.8 {q6}, [r1], r2 + vld1.8 {q7}, [r3], r4 + + vsubl.u8 q8, d0, d2 + vsubl.u8 q9, d1, d3 + vsubl.u8 q10, d4, d6 + vsubl.u8 q11, d5, d7 + vsubl.u8 q12, d8, d10 + vsubl.u8 q13, d9, d11 + vsubl.u8 q14, d12, d14 + vsubl.u8 q15, d13, d15 + + vst1.16 {q8}, [r0], r6 ;store diff + vst1.16 {q9}, [r5], r6 + vst1.16 {q10}, [r0], r6 + vst1.16 {q11}, [r5], r6 + vst1.16 {q12}, [r0], r6 + vst1.16 {q13}, [r5], r6 + vst1.16 {q14}, [r0], r6 + vst1.16 {q15}, [r5], r6 + + subs r12, r12, #1 + bne subtract_mby_loop + + pop {r4-r7} + bx lr + ENDP + + ;================================= + ;void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc, + ; int src_stride, unsigned char *upred, + ; unsigned char *vpred, int pred_stride) + + |vp8_subtract_mbuv_neon| PROC + push {r4-r7} + ldr r4, [sp, #16] ; upred + ldr r5, [sp, #20] ; vpred + ldr r6, [sp, #24] ; pred_stride + add r0, r0, #512 ; short *udiff = diff + 256; + mov r12, #32 ; "diff" stride x2 + add r7, r0, #16 ; second diff pointer + + ;u + vld1.8 {d0}, [r1], r3 ;load usrc + vld1.8 {d1}, [r4], r6 ;load upred + vld1.8 {d2}, [r1], r3 + vld1.8 {d3}, [r4], r6 + vld1.8 {d4}, [r1], r3 + vld1.8 {d5}, [r4], r6 + vld1.8 {d6}, [r1], r3 + vld1.8 {d7}, [r4], r6 + vld1.8 {d8}, [r1], r3 + vld1.8 {d9}, [r4], r6 + vld1.8 {d10}, [r1], r3 + vld1.8 {d11}, [r4], r6 + vld1.8 {d12}, [r1], r3 + vld1.8 {d13}, [r4], r6 + vld1.8 {d14}, [r1], r3 + vld1.8 {d15}, [r4], r6 + + vsubl.u8 q8, d0, d1 + vsubl.u8 q9, d2, d3 + vsubl.u8 q10, d4, d5 + vsubl.u8 q11, d6, d7 + vsubl.u8 q12, d8, d9 + vsubl.u8 q13, d10, d11 + vsubl.u8 q14, d12, d13 + vsubl.u8 q15, d14, d15 + + vst1.16 {q8}, [r0], r12 ;store diff + vst1.16 {q9}, [r7], r12 + vst1.16 {q10}, [r0], r12 + vst1.16 {q11}, [r7], r12 + vst1.16 {q12}, [r0], r12 + vst1.16 {q13}, [r7], r12 + vst1.16 {q14}, [r0], r12 + vst1.16 {q15}, [r7], r12 + + ;v + vld1.8 {d0}, [r2], r3 ;load vsrc + vld1.8 {d1}, [r5], r6 ;load vpred + vld1.8 {d2}, [r2], r3 + vld1.8 {d3}, [r5], r6 + vld1.8 {d4}, [r2], r3 + vld1.8 {d5}, [r5], r6 + vld1.8 {d6}, [r2], r3 + vld1.8 {d7}, [r5], r6 + vld1.8 {d8}, [r2], r3 + vld1.8 {d9}, [r5], r6 + vld1.8 {d10}, [r2], r3 + vld1.8 {d11}, [r5], r6 + vld1.8 {d12}, [r2], r3 + vld1.8 {d13}, [r5], r6 + vld1.8 {d14}, [r2], r3 + vld1.8 {d15}, [r5], r6 + + vsubl.u8 q8, d0, d1 + vsubl.u8 q9, d2, d3 + vsubl.u8 q10, d4, d5 + vsubl.u8 q11, d6, d7 + vsubl.u8 q12, d8, d9 + vsubl.u8 q13, d10, d11 + vsubl.u8 q14, d12, d13 + vsubl.u8 q15, d14, d15 + + vst1.16 {q8}, [r0], r12 ;store diff + vst1.16 {q9}, [r7], r12 + vst1.16 {q10}, [r0], r12 + vst1.16 {q11}, [r7], r12 + vst1.16 {q12}, [r0], r12 + vst1.16 {q13}, [r7], r12 + vst1.16 {q14}, [r0], r12 + vst1.16 {q15}, [r7], r12 + + pop {r4-r7} + bx lr + + ENDP + + END diff --cc vp8/encoder/x86/quantize_sse2.asm index 000000000,724e54c45..fe9464b3d mode 000000,100644..100644 --- a/vp8/encoder/x86/quantize_sse2.asm +++ b/vp8/encoder/x86/quantize_sse2.asm @@@ -1,0 -1,386 +1,386 @@@ + ; + ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. + ; + ; Use of this source code is governed by a BSD-style license and patent + ; grant that can be found in the LICENSE file in the root of the source + ; tree. All contributing project authors may be found in the AUTHORS + ; file in the root of the source tree. + ; + + + %include "vpx_ports/x86_abi_support.asm" -%include "asm_enc_offsets.asm" ++%include "vp8_asm_enc_offsets.asm" + + + ; void vp8_regular_quantize_b_sse2 | arg + ; (BLOCK *b, | 0 + ; BLOCKD *d) | 1 + + global sym(vp8_regular_quantize_b_sse2) PRIVATE + sym(vp8_regular_quantize_b_sse2): + push rbp + mov rbp, rsp + SAVE_XMM 7 + GET_GOT rbx + + %if ABI_IS_32BIT + push rdi + push rsi + %else + %ifidn __OUTPUT_FORMAT__,x64 + push rdi + push rsi + %endif + %endif + + ALIGN_STACK 16, rax + %define zrun_zbin_boost 0 ; 8 + %define abs_minus_zbin 8 ; 32 + %define temp_qcoeff 40 ; 32 + %define qcoeff 72 ; 32 + %define stack_size 104 + sub rsp, stack_size + ; end prolog + + %if ABI_IS_32BIT + mov rdi, arg(0) ; BLOCK *b + mov rsi, arg(1) ; BLOCKD *d + %else + %ifidn __OUTPUT_FORMAT__,x64 + mov rdi, rcx ; BLOCK *b + mov rsi, rdx ; BLOCKD *d + %else + ;mov rdi, rdi ; BLOCK *b + ;mov rsi, rsi ; BLOCKD *d + %endif + %endif + + mov rdx, [rdi + vp8_block_coeff] ; coeff_ptr + mov rcx, [rdi + vp8_block_zbin] ; zbin_ptr + movd xmm7, [rdi + vp8_block_zbin_extra] ; zbin_oq_value + + ; z + movdqa xmm0, [rdx] + movdqa xmm4, [rdx + 16] + mov rdx, [rdi + vp8_block_round] ; round_ptr + + pshuflw xmm7, xmm7, 0 + punpcklwd xmm7, xmm7 ; duplicated zbin_oq_value + + movdqa xmm1, xmm0 + movdqa xmm5, xmm4 + + ; sz + psraw xmm0, 15 + psraw xmm4, 15 + + ; (z ^ sz) + pxor xmm1, xmm0 + pxor xmm5, xmm4 + + ; x = abs(z) + psubw xmm1, xmm0 + psubw xmm5, xmm4 + + movdqa xmm2, [rcx] + movdqa xmm3, [rcx + 16] + mov rcx, [rdi + vp8_block_quant] ; quant_ptr + + ; *zbin_ptr + zbin_oq_value + paddw xmm2, xmm7 + paddw xmm3, xmm7 + + ; x - (*zbin_ptr + zbin_oq_value) + psubw xmm1, xmm2 + psubw xmm5, xmm3 + movdqa [rsp + abs_minus_zbin], xmm1 + movdqa [rsp + abs_minus_zbin + 16], xmm5 + + ; add (zbin_ptr + zbin_oq_value) back + paddw xmm1, xmm2 + paddw xmm5, xmm3 + + movdqa xmm2, [rdx] + movdqa xmm6, [rdx + 16] + + movdqa xmm3, [rcx] + movdqa xmm7, [rcx + 16] + + ; x + round + paddw xmm1, xmm2 + paddw xmm5, xmm6 + + ; y = x * quant_ptr >> 16 + pmulhw xmm3, xmm1 + pmulhw xmm7, xmm5 + + ; y += x + paddw xmm1, xmm3 + paddw xmm5, xmm7 + + movdqa [rsp + temp_qcoeff], xmm1 + movdqa [rsp + temp_qcoeff + 16], xmm5 + + pxor xmm6, xmm6 + ; zero qcoeff + movdqa [rsp + qcoeff], xmm6 + movdqa [rsp + qcoeff + 16], xmm6 + + mov rdx, [rdi + vp8_block_zrun_zbin_boost] ; zbin_boost_ptr + mov rax, [rdi + vp8_block_quant_shift] ; quant_shift_ptr + mov [rsp + zrun_zbin_boost], rdx + + %macro ZIGZAG_LOOP 1 + ; x + movsx ecx, WORD PTR[rsp + abs_minus_zbin + %1 * 2] + + ; if (x >= zbin) + sub cx, WORD PTR[rdx] ; x - zbin + lea rdx, [rdx + 2] ; zbin_boost_ptr++ + jl .rq_zigzag_loop_%1 ; x < zbin + + movsx edi, WORD PTR[rsp + temp_qcoeff + %1 * 2] + + ; downshift by quant_shift[rc] + movsx cx, BYTE PTR[rax + %1] ; quant_shift_ptr[rc] + sar edi, cl ; also sets Z bit + je .rq_zigzag_loop_%1 ; !y + mov WORD PTR[rsp + qcoeff + %1 * 2], di ;qcoeff_ptr[rc] = temp_qcoeff[rc] + mov rdx, [rsp + zrun_zbin_boost] ; reset to b->zrun_zbin_boost + .rq_zigzag_loop_%1: + %endmacro + ; in vp8_default_zig_zag1d order: see vp8/common/entropy.c + ZIGZAG_LOOP 0 + ZIGZAG_LOOP 1 + ZIGZAG_LOOP 4 + ZIGZAG_LOOP 8 + ZIGZAG_LOOP 5 + ZIGZAG_LOOP 2 + ZIGZAG_LOOP 3 + ZIGZAG_LOOP 6 + ZIGZAG_LOOP 9 + ZIGZAG_LOOP 12 + ZIGZAG_LOOP 13 + ZIGZAG_LOOP 10 + ZIGZAG_LOOP 7 + ZIGZAG_LOOP 11 + ZIGZAG_LOOP 14 + ZIGZAG_LOOP 15 + + movdqa xmm2, [rsp + qcoeff] + movdqa xmm3, [rsp + qcoeff + 16] + + mov rcx, [rsi + vp8_blockd_dequant] ; dequant_ptr + mov rdi, [rsi + vp8_blockd_dqcoeff] ; dqcoeff_ptr + + ; y ^ sz + pxor xmm2, xmm0 + pxor xmm3, xmm4 + ; x = (y ^ sz) - sz + psubw xmm2, xmm0 + psubw xmm3, xmm4 + + ; dequant + movdqa xmm0, [rcx] + movdqa xmm1, [rcx + 16] + + mov rcx, [rsi + vp8_blockd_qcoeff] ; qcoeff_ptr + + pmullw xmm0, xmm2 + pmullw xmm1, xmm3 + + movdqa [rcx], xmm2 ; store qcoeff + movdqa [rcx + 16], xmm3 + movdqa [rdi], xmm0 ; store dqcoeff + movdqa [rdi + 16], xmm1 + + mov rcx, [rsi + vp8_blockd_eob] + + ; select the last value (in zig_zag order) for EOB + pcmpeqw xmm2, xmm6 + pcmpeqw xmm3, xmm6 + ; ! + pcmpeqw xmm6, xmm6 + pxor xmm2, xmm6 + pxor xmm3, xmm6 + ; mask inv_zig_zag + pand xmm2, [GLOBAL(inv_zig_zag)] + pand xmm3, [GLOBAL(inv_zig_zag + 16)] + ; select the max value + pmaxsw xmm2, xmm3 + pshufd xmm3, xmm2, 00001110b + pmaxsw xmm2, xmm3 + pshuflw xmm3, xmm2, 00001110b + pmaxsw xmm2, xmm3 + pshuflw xmm3, xmm2, 00000001b + pmaxsw xmm2, xmm3 + movd eax, xmm2 + and eax, 0xff + + mov BYTE PTR [rcx], al ; store eob + + ; begin epilog + add rsp, stack_size + pop rsp + %if ABI_IS_32BIT + pop rsi + pop rdi + %else + %ifidn __OUTPUT_FORMAT__,x64 + pop rsi + pop rdi + %endif + %endif + RESTORE_GOT + RESTORE_XMM + pop rbp + ret + + ; void vp8_fast_quantize_b_sse2 | arg + ; (BLOCK *b, | 0 + ; BLOCKD *d) | 1 + + global sym(vp8_fast_quantize_b_sse2) PRIVATE + sym(vp8_fast_quantize_b_sse2): + push rbp + mov rbp, rsp + GET_GOT rbx + + %if ABI_IS_32BIT + push rdi + push rsi + %else + %ifidn __OUTPUT_FORMAT__,x64 + push rdi + push rsi + %else + ; these registers are used for passing arguments + %endif + %endif + + ; end prolog + + %if ABI_IS_32BIT + mov rdi, arg(0) ; BLOCK *b + mov rsi, arg(1) ; BLOCKD *d + %else + %ifidn __OUTPUT_FORMAT__,x64 + mov rdi, rcx ; BLOCK *b + mov rsi, rdx ; BLOCKD *d + %else + ;mov rdi, rdi ; BLOCK *b + ;mov rsi, rsi ; BLOCKD *d + %endif + %endif + + mov rax, [rdi + vp8_block_coeff] + mov rcx, [rdi + vp8_block_round] + mov rdx, [rdi + vp8_block_quant_fast] + + ; z = coeff + movdqa xmm0, [rax] + movdqa xmm4, [rax + 16] + + ; dup z so we can save sz + movdqa xmm1, xmm0 + movdqa xmm5, xmm4 + + ; sz = z >> 15 + psraw xmm0, 15 + psraw xmm4, 15 + + ; x = abs(z) = (z ^ sz) - sz + pxor xmm1, xmm0 + pxor xmm5, xmm4 + psubw xmm1, xmm0 + psubw xmm5, xmm4 + + ; x += round + paddw xmm1, [rcx] + paddw xmm5, [rcx + 16] + + mov rax, [rsi + vp8_blockd_qcoeff] + mov rcx, [rsi + vp8_blockd_dequant] + mov rdi, [rsi + vp8_blockd_dqcoeff] + + ; y = x * quant >> 16 + pmulhw xmm1, [rdx] + pmulhw xmm5, [rdx + 16] + + ; x = (y ^ sz) - sz + pxor xmm1, xmm0 + pxor xmm5, xmm4 + psubw xmm1, xmm0 + psubw xmm5, xmm4 + + ; qcoeff = x + movdqa [rax], xmm1 + movdqa [rax + 16], xmm5 + + ; x * dequant + movdqa xmm2, xmm1 + movdqa xmm3, xmm5 + pmullw xmm2, [rcx] + pmullw xmm3, [rcx + 16] + + ; dqcoeff = x * dequant + movdqa [rdi], xmm2 + movdqa [rdi + 16], xmm3 + + pxor xmm4, xmm4 ;clear all bits + pcmpeqw xmm1, xmm4 + pcmpeqw xmm5, xmm4 + + pcmpeqw xmm4, xmm4 ;set all bits + pxor xmm1, xmm4 + pxor xmm5, xmm4 + + pand xmm1, [GLOBAL(inv_zig_zag)] + pand xmm5, [GLOBAL(inv_zig_zag + 16)] + + pmaxsw xmm1, xmm5 + + mov rcx, [rsi + vp8_blockd_eob] + + ; now down to 8 + pshufd xmm5, xmm1, 00001110b + + pmaxsw xmm1, xmm5 + + ; only 4 left + pshuflw xmm5, xmm1, 00001110b + + pmaxsw xmm1, xmm5 + + ; okay, just 2! + pshuflw xmm5, xmm1, 00000001b + + pmaxsw xmm1, xmm5 + + movd eax, xmm1 + and eax, 0xff + + mov BYTE PTR [rcx], al ; store eob + + ; begin epilog + %if ABI_IS_32BIT + pop rsi + pop rdi + %else + %ifidn __OUTPUT_FORMAT__,x64 + pop rsi + pop rdi + %endif + %endif + + RESTORE_GOT + pop rbp + ret + + SECTION_RODATA + align 16 + inv_zig_zag: + dw 0x0001, 0x0002, 0x0006, 0x0007 + dw 0x0003, 0x0005, 0x0008, 0x000d + dw 0x0004, 0x0009, 0x000c, 0x000e + dw 0x000a, 0x000b, 0x000f, 0x0010 diff --cc vp8/encoder/x86/quantize_sse4.asm index 000000000,f0e5d407e..f21146457 mode 000000,100644..100644 --- a/vp8/encoder/x86/quantize_sse4.asm +++ b/vp8/encoder/x86/quantize_sse4.asm @@@ -1,0 -1,256 +1,256 @@@ + ; + ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. + ; + ; Use of this source code is governed by a BSD-style license and patent + ; grant that can be found in the LICENSE file in the root of the source + ; tree. All contributing project authors may be found in the AUTHORS + ; file in the root of the source tree. + ; + + + %include "vpx_ports/x86_abi_support.asm" -%include "asm_enc_offsets.asm" ++%include "vp8_asm_enc_offsets.asm" + + + ; void vp8_regular_quantize_b_sse4 | arg + ; (BLOCK *b, | 0 + ; BLOCKD *d) | 1 + + global sym(vp8_regular_quantize_b_sse4) PRIVATE + sym(vp8_regular_quantize_b_sse4): + + %if ABI_IS_32BIT + push rbp + mov rbp, rsp + GET_GOT rbx + push rdi + push rsi + + ALIGN_STACK 16, rax + %define qcoeff 0 ; 32 + %define stack_size 32 + sub rsp, stack_size + %else + %ifidn __OUTPUT_FORMAT__,x64 + SAVE_XMM 8, u + push rdi + push rsi + %endif + %endif + ; end prolog + + %if ABI_IS_32BIT + mov rdi, arg(0) ; BLOCK *b + mov rsi, arg(1) ; BLOCKD *d + %else + %ifidn __OUTPUT_FORMAT__,x64 + mov rdi, rcx ; BLOCK *b + mov rsi, rdx ; BLOCKD *d + %else + ;mov rdi, rdi ; BLOCK *b + ;mov rsi, rsi ; BLOCKD *d + %endif + %endif + + mov rax, [rdi + vp8_block_coeff] + mov rcx, [rdi + vp8_block_zbin] + mov rdx, [rdi + vp8_block_round] + movd xmm7, [rdi + vp8_block_zbin_extra] + + ; z + movdqa xmm0, [rax] + movdqa xmm1, [rax + 16] + + ; duplicate zbin_oq_value + pshuflw xmm7, xmm7, 0 + punpcklwd xmm7, xmm7 + + movdqa xmm2, xmm0 + movdqa xmm3, xmm1 + + ; sz + psraw xmm0, 15 + psraw xmm1, 15 + + ; (z ^ sz) + pxor xmm2, xmm0 + pxor xmm3, xmm1 + + ; x = abs(z) + psubw xmm2, xmm0 + psubw xmm3, xmm1 + + ; zbin + movdqa xmm4, [rcx] + movdqa xmm5, [rcx + 16] + + ; *zbin_ptr + zbin_oq_value + paddw xmm4, xmm7 + paddw xmm5, xmm7 + + movdqa xmm6, xmm2 + movdqa xmm7, xmm3 + + ; x - (*zbin_ptr + zbin_oq_value) + psubw xmm6, xmm4 + psubw xmm7, xmm5 + + ; round + movdqa xmm4, [rdx] + movdqa xmm5, [rdx + 16] + + mov rax, [rdi + vp8_block_quant_shift] + mov rcx, [rdi + vp8_block_quant] + mov rdx, [rdi + vp8_block_zrun_zbin_boost] + + ; x + round + paddw xmm2, xmm4 + paddw xmm3, xmm5 + + ; quant + movdqa xmm4, [rcx] + movdqa xmm5, [rcx + 16] + + ; y = x * quant_ptr >> 16 + pmulhw xmm4, xmm2 + pmulhw xmm5, xmm3 + + ; y += x + paddw xmm2, xmm4 + paddw xmm3, xmm5 + + pxor xmm4, xmm4 + %if ABI_IS_32BIT + movdqa [rsp + qcoeff], xmm4 + movdqa [rsp + qcoeff + 16], xmm4 + %else + pxor xmm8, xmm8 + %endif + + ; quant_shift + movdqa xmm5, [rax] + + ; zrun_zbin_boost + mov rax, rdx + + %macro ZIGZAG_LOOP 5 + ; x + pextrw ecx, %4, %2 + + ; if (x >= zbin) + sub cx, WORD PTR[rdx] ; x - zbin + lea rdx, [rdx + 2] ; zbin_boost_ptr++ + jl .rq_zigzag_loop_%1 ; x < zbin + + pextrw edi, %3, %2 ; y + + ; downshift by quant_shift[rc] + pextrb ecx, xmm5, %1 ; quant_shift[rc] + sar edi, cl ; also sets Z bit + je .rq_zigzag_loop_%1 ; !y + %if ABI_IS_32BIT + mov WORD PTR[rsp + qcoeff + %1 *2], di + %else + pinsrw %5, edi, %2 ; qcoeff[rc] + %endif + mov rdx, rax ; reset to b->zrun_zbin_boost + .rq_zigzag_loop_%1: + %endmacro + ; in vp8_default_zig_zag1d order: see vp8/common/entropy.c + ZIGZAG_LOOP 0, 0, xmm2, xmm6, xmm4 + ZIGZAG_LOOP 1, 1, xmm2, xmm6, xmm4 + ZIGZAG_LOOP 4, 4, xmm2, xmm6, xmm4 + ZIGZAG_LOOP 8, 0, xmm3, xmm7, xmm8 + ZIGZAG_LOOP 5, 5, xmm2, xmm6, xmm4 + ZIGZAG_LOOP 2, 2, xmm2, xmm6, xmm4 + ZIGZAG_LOOP 3, 3, xmm2, xmm6, xmm4 + ZIGZAG_LOOP 6, 6, xmm2, xmm6, xmm4 + ZIGZAG_LOOP 9, 1, xmm3, xmm7, xmm8 + ZIGZAG_LOOP 12, 4, xmm3, xmm7, xmm8 + ZIGZAG_LOOP 13, 5, xmm3, xmm7, xmm8 + ZIGZAG_LOOP 10, 2, xmm3, xmm7, xmm8 + ZIGZAG_LOOP 7, 7, xmm2, xmm6, xmm4 + ZIGZAG_LOOP 11, 3, xmm3, xmm7, xmm8 + ZIGZAG_LOOP 14, 6, xmm3, xmm7, xmm8 + ZIGZAG_LOOP 15, 7, xmm3, xmm7, xmm8 + + mov rcx, [rsi + vp8_blockd_dequant] + mov rdi, [rsi + vp8_blockd_dqcoeff] + + %if ABI_IS_32BIT + movdqa xmm4, [rsp + qcoeff] + movdqa xmm5, [rsp + qcoeff + 16] + %else + %define xmm5 xmm8 + %endif + + ; y ^ sz + pxor xmm4, xmm0 + pxor xmm5, xmm1 + ; x = (y ^ sz) - sz + psubw xmm4, xmm0 + psubw xmm5, xmm1 + + ; dequant + movdqa xmm0, [rcx] + movdqa xmm1, [rcx + 16] + + mov rcx, [rsi + vp8_blockd_qcoeff] + + pmullw xmm0, xmm4 + pmullw xmm1, xmm5 + + ; store qcoeff + movdqa [rcx], xmm4 + movdqa [rcx + 16], xmm5 + + ; store dqcoeff + movdqa [rdi], xmm0 + movdqa [rdi + 16], xmm1 + + mov rcx, [rsi + vp8_blockd_eob] + + ; select the last value (in zig_zag order) for EOB + pxor xmm6, xmm6 + pcmpeqw xmm4, xmm6 + pcmpeqw xmm5, xmm6 + + packsswb xmm4, xmm5 + pshufb xmm4, [GLOBAL(zig_zag1d)] + pmovmskb edx, xmm4 + xor rdi, rdi + mov eax, -1 + xor dx, ax + bsr eax, edx + sub edi, edx + sar edi, 31 + add eax, 1 + and eax, edi + + mov BYTE PTR [rcx], al ; store eob + + ; begin epilog + %if ABI_IS_32BIT + add rsp, stack_size + pop rsp + + pop rsi + pop rdi + RESTORE_GOT + pop rbp + %else + %undef xmm5 + %ifidn __OUTPUT_FORMAT__,x64 + pop rsi + pop rdi + RESTORE_XMM + %endif + %endif + + ret + + SECTION_RODATA + align 16 + ; vp8/common/entropy.c: vp8_default_zig_zag1d + zig_zag1d: + db 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 diff --cc vp8/encoder/x86/quantize_ssse3.asm index 000000000,dd526f4f1..35368894d mode 000000,100644..100644 --- a/vp8/encoder/x86/quantize_ssse3.asm +++ b/vp8/encoder/x86/quantize_ssse3.asm @@@ -1,0 -1,138 +1,138 @@@ + ; + ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. + ; + ; Use of this source code is governed by a BSD-style license and patent + ; grant that can be found in the LICENSE file in the root of the source + ; tree. All contributing project authors may be found in the AUTHORS + ; file in the root of the source tree. + ; + + + %include "vpx_ports/x86_abi_support.asm" -%include "asm_enc_offsets.asm" ++%include "vp8_asm_enc_offsets.asm" + + + ; void vp8_fast_quantize_b_ssse3 | arg + ; (BLOCK *b, | 0 + ; BLOCKD *d) | 1 + ; + + global sym(vp8_fast_quantize_b_ssse3) PRIVATE + sym(vp8_fast_quantize_b_ssse3): + push rbp + mov rbp, rsp + GET_GOT rbx + + %if ABI_IS_32BIT + push rdi + push rsi + %else + %ifidn __OUTPUT_FORMAT__,x64 + push rdi + push rsi + %endif + %endif + ; end prolog + + %if ABI_IS_32BIT + mov rdi, arg(0) ; BLOCK *b + mov rsi, arg(1) ; BLOCKD *d + %else + %ifidn __OUTPUT_FORMAT__,x64 + mov rdi, rcx ; BLOCK *b + mov rsi, rdx ; BLOCKD *d + %else + ;mov rdi, rdi ; BLOCK *b + ;mov rsi, rsi ; BLOCKD *d + %endif + %endif + + mov rax, [rdi + vp8_block_coeff] + mov rcx, [rdi + vp8_block_round] + mov rdx, [rdi + vp8_block_quant_fast] + + ; coeff + movdqa xmm0, [rax] + movdqa xmm4, [rax + 16] + + ; round + movdqa xmm2, [rcx] + movdqa xmm3, [rcx + 16] + + movdqa xmm1, xmm0 + movdqa xmm5, xmm4 + + ; sz = z >> 15 + psraw xmm0, 15 + psraw xmm4, 15 + + pabsw xmm1, xmm1 + pabsw xmm5, xmm5 + + paddw xmm1, xmm2 + paddw xmm5, xmm3 + + ; quant_fast + pmulhw xmm1, [rdx] + pmulhw xmm5, [rdx + 16] + + mov rax, [rsi + vp8_blockd_qcoeff] + mov rdi, [rsi + vp8_blockd_dequant] + mov rcx, [rsi + vp8_blockd_dqcoeff] + + movdqa xmm2, xmm1 ;store y for getting eob + movdqa xmm3, xmm5 + + pxor xmm1, xmm0 + pxor xmm5, xmm4 + psubw xmm1, xmm0 + psubw xmm5, xmm4 + + movdqa [rax], xmm1 + movdqa [rax + 16], xmm5 + + movdqa xmm0, [rdi] + movdqa xmm4, [rdi + 16] + + pmullw xmm0, xmm1 + pmullw xmm4, xmm5 + pxor xmm1, xmm1 + + pcmpgtw xmm2, xmm1 ;calculate eob + pcmpgtw xmm3, xmm1 + packsswb xmm2, xmm3 + pshufb xmm2, [GLOBAL(zz_shuf)] + + pmovmskb edx, xmm2 + + movdqa [rcx], xmm0 ;store dqcoeff + movdqa [rcx + 16], xmm4 ;store dqcoeff + mov rcx, [rsi + vp8_blockd_eob] + + bsr eax, edx ;count 0 + add eax, 1 + + cmp edx, 0 ;if all 0, eob=0 + cmove eax, edx + + mov BYTE PTR [rcx], al ;store eob + + ; begin epilog + %if ABI_IS_32BIT + pop rsi + pop rdi + %else + %ifidn __OUTPUT_FORMAT__,x64 + pop rsi + pop rdi + %endif + %endif + + RESTORE_GOT + pop rbp + ret + + SECTION_RODATA + align 16 + zz_shuf: + db 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 diff --cc vp8/vp8_common.mk index 000000000,a328f46c2..d54c2330c mode 000000,100644..100644 --- a/vp8/vp8_common.mk +++ b/vp8/vp8_common.mk @@@ -1,0 -1,193 +1,196 @@@ + ## + ## Copyright (c) 2010 The WebM project authors. All Rights Reserved. + ## + ## Use of this source code is governed by a BSD-style license + ## that can be found in the LICENSE file in the root of the source + ## tree. An additional intellectual property rights grant can be found + ## in the file PATENTS. All contributing project authors may + ## be found in the AUTHORS file in the root of the source tree. + ## + + VP8_COMMON_SRCS-yes += vp8_common.mk + VP8_COMMON_SRCS-yes += common/pragmas.h + VP8_COMMON_SRCS-yes += common/ppflags.h + VP8_COMMON_SRCS-yes += common/onyx.h + VP8_COMMON_SRCS-yes += common/onyxd.h + VP8_COMMON_SRCS-yes += common/alloccommon.c + VP8_COMMON_SRCS-yes += common/asm_com_offsets.c + VP8_COMMON_SRCS-yes += common/blockd.c + VP8_COMMON_SRCS-yes += common/coefupdateprobs.h + VP8_COMMON_SRCS-yes += common/debugmodes.c + VP8_COMMON_SRCS-yes += common/default_coef_probs.h + VP8_COMMON_SRCS-yes += common/dequantize.c + VP8_COMMON_SRCS-yes += common/entropy.c + VP8_COMMON_SRCS-yes += common/entropymode.c + VP8_COMMON_SRCS-yes += common/entropymv.c + VP8_COMMON_SRCS-yes += common/extend.c + VP8_COMMON_SRCS-yes += common/filter.c + VP8_COMMON_SRCS-yes += common/filter.h + VP8_COMMON_SRCS-yes += common/findnearmv.c + VP8_COMMON_SRCS-yes += common/generic/systemdependent.c + VP8_COMMON_SRCS-yes += common/idct_blk.c + VP8_COMMON_SRCS-yes += common/idctllm.c + VP8_COMMON_SRCS-yes += common/alloccommon.h + VP8_COMMON_SRCS-yes += common/blockd.h + VP8_COMMON_SRCS-yes += common/common.h + VP8_COMMON_SRCS-yes += common/entropy.h + VP8_COMMON_SRCS-yes += common/entropymode.h + VP8_COMMON_SRCS-yes += common/entropymv.h + VP8_COMMON_SRCS-yes += common/extend.h + VP8_COMMON_SRCS-yes += common/findnearmv.h + VP8_COMMON_SRCS-yes += common/header.h + VP8_COMMON_SRCS-yes += common/invtrans.h + VP8_COMMON_SRCS-yes += common/loopfilter.h + VP8_COMMON_SRCS-yes += common/modecont.h + VP8_COMMON_SRCS-yes += common/mv.h + VP8_COMMON_SRCS-yes += common/onyxc_int.h + VP8_COMMON_SRCS-yes += common/quant_common.h + VP8_COMMON_SRCS-yes += common/reconinter.h + VP8_COMMON_SRCS-yes += common/reconintra4x4.h + VP8_COMMON_SRCS-yes += common/rtcd.c + VP8_COMMON_SRCS-yes += common/rtcd_defs.sh + VP8_COMMON_SRCS-yes += common/setupintrarecon.h + VP8_COMMON_SRCS-yes += common/swapyv12buffer.h + VP8_COMMON_SRCS-yes += common/systemdependent.h + VP8_COMMON_SRCS-yes += common/threading.h + VP8_COMMON_SRCS-yes += common/treecoder.h + VP8_COMMON_SRCS-yes += common/loopfilter.c + VP8_COMMON_SRCS-yes += common/loopfilter_filters.c + VP8_COMMON_SRCS-yes += common/mbpitch.c + VP8_COMMON_SRCS-yes += common/modecont.c + VP8_COMMON_SRCS-yes += common/quant_common.c + VP8_COMMON_SRCS-yes += common/reconinter.c + VP8_COMMON_SRCS-yes += common/reconintra.c + VP8_COMMON_SRCS-yes += common/reconintra4x4.c + VP8_COMMON_SRCS-yes += common/sad_c.c + VP8_COMMON_SRCS-yes += common/setupintrarecon.c + VP8_COMMON_SRCS-yes += common/swapyv12buffer.c + VP8_COMMON_SRCS-yes += common/variance_c.c + VP8_COMMON_SRCS-yes += common/variance.h + VP8_COMMON_SRCS-yes += common/vp8_entropymodedata.h + + + + VP8_COMMON_SRCS-$(CONFIG_POSTPROC_VISUALIZER) += common/textblit.c + VP8_COMMON_SRCS-yes += common/treecoder.c + + VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/filter_x86.c + VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/filter_x86.h + VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp8_asm_stubs.c + VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/loopfilter_x86.c + VP8_COMMON_SRCS-$(CONFIG_POSTPROC) += common/mfqe.c + VP8_COMMON_SRCS-$(CONFIG_POSTPROC) += common/postproc.h + VP8_COMMON_SRCS-$(CONFIG_POSTPROC) += common/postproc.c + VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/dequantize_mmx.asm + VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/idct_blk_mmx.c + VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/idctllm_mmx.asm + VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/iwalsh_mmx.asm + VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/loopfilter_mmx.asm + VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/recon_mmx.asm + VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/sad_mmx.asm + VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/subpixel_mmx.asm + VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/variance_mmx.c + VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/variance_impl_mmx.asm + VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/idct_blk_sse2.c + VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/idctllm_sse2.asm + VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/recon_sse2.asm + VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/recon_wrapper_sse2.c + VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/sad_sse2.asm + VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/subpixel_sse2.asm + VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/loopfilter_sse2.asm + VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/iwalsh_sse2.asm + VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/variance_sse2.c + VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/variance_impl_sse2.asm + VP8_COMMON_SRCS-$(HAVE_SSE3) += common/x86/sad_sse3.asm + VP8_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/sad_ssse3.asm + VP8_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/subpixel_ssse3.asm + VP8_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/variance_ssse3.c + VP8_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/variance_impl_ssse3.asm + VP8_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/sad_sse4.asm + + ifeq ($(CONFIG_POSTPROC),yes) + VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/postproc_x86.c + VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/postproc_mmx.asm + VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/mfqe_sse2.asm + VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/postproc_sse2.asm + endif + + ifeq ($(ARCH_X86_64),yes) + VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/loopfilter_block_sse2.asm + endif + + # common (c) + VP8_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/idctllm_dspr2.c + VP8_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/filter_dspr2.c + VP8_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/loopfilter_filters_dspr2.c + VP8_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/reconinter_dspr2.c + VP8_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/idct_blk_dspr2.c + VP8_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/dequantize_dspr2.c + + # common (c) + VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/filter_arm.c + VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/loopfilter_arm.c + VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/reconintra_arm.c + VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/dequantize_arm.c + VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/variance_arm.c + + # common (media) + VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/bilinearfilter_arm.c + VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/bilinearfilter_arm.h + VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/bilinearfilter_v6$(ASM) + VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/copymem8x4_v6$(ASM) + VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/copymem8x8_v6$(ASM) + VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/copymem16x16_v6$(ASM) + VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/dc_only_idct_add_v6$(ASM) + VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/iwalsh_v6$(ASM) + VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/filter_v6$(ASM) + VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/idct_v6$(ASM) + VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/loopfilter_v6$(ASM) + VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/simpleloopfilter_v6$(ASM) + VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/sixtappredict8x4_v6$(ASM) + VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/intra4x4_predict_v6$(ASM) + VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/dequant_idct_v6$(ASM) + VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/dequantize_v6$(ASM) + VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/idct_blk_v6.c + VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/vp8_sad16x16_armv6$(ASM) + VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/vp8_variance8x8_armv6$(ASM) + VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/vp8_variance16x16_armv6$(ASM) + VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6$(ASM) + VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6$(ASM) + VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6$(ASM) + + # common (neon) + VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/bilinearpredict4x4_neon$(ASM) + VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/bilinearpredict8x4_neon$(ASM) + VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/bilinearpredict8x8_neon$(ASM) + VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/bilinearpredict16x16_neon$(ASM) + VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/copymem8x4_neon$(ASM) + VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/copymem8x8_neon$(ASM) + VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/copymem16x16_neon$(ASM) + VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/dc_only_idct_add_neon$(ASM) + VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/iwalsh_neon$(ASM) + VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/loopfilter_neon$(ASM) + VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/loopfiltersimplehorizontaledge_neon$(ASM) + VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/loopfiltersimpleverticaledge_neon$(ASM) + VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/mbloopfilter_neon$(ASM) + VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/shortidct4x4llm_neon$(ASM) + VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/sad8_neon$(ASM) + VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/sad16_neon$(ASM) + VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/sixtappredict4x4_neon$(ASM) + VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/sixtappredict8x4_neon$(ASM) + VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/sixtappredict8x8_neon$(ASM) + VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/sixtappredict16x16_neon$(ASM) + VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/buildintrapredictorsmby_neon$(ASM) + VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/save_reg_neon$(ASM) + VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/dequant_idct_neon$(ASM) + VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/idct_dequant_full_2x_neon$(ASM) + VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/idct_dequant_0_2x_neon$(ASM) + VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/dequantizeb_neon$(ASM) + VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/idct_blk_neon.c + VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/variance_neon$(ASM) + VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp8_subpixelvariance8x8_neon$(ASM) + VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp8_subpixelvariance16x16_neon$(ASM) + VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp8_subpixelvariance16x16s_neon$(ASM) ++ ++$(eval $(call asm_offsets_template,\ ++ vp8_asm_com_offsets.asm, $(VP8_PREFIX)common/asm_com_offsets.c)) diff --cc vp8/vp8cx.mk index 000000000,5976297bc..4ff3ef2c0 mode 000000,100644..100644 --- a/vp8/vp8cx.mk +++ b/vp8/vp8cx.mk @@@ -1,0 -1,124 +1,125 @@@ + ## + ## Copyright (c) 2010 The WebM project authors. All Rights Reserved. + ## + ## Use of this source code is governed by a BSD-style license + ## that can be found in the LICENSE file in the root of the source + ## tree. An additional intellectual property rights grant can be found + ## in the file PATENTS. All contributing project authors may + ## be found in the AUTHORS file in the root of the source tree. + ## + + -include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8_common.mk - + VP8_CX_EXPORTS += exports_enc + + VP8_CX_SRCS-yes += $(VP8_COMMON_SRCS-yes) + VP8_CX_SRCS-no += $(VP8_COMMON_SRCS-no) + VP8_CX_SRCS_REMOVE-yes += $(VP8_COMMON_SRCS_REMOVE-yes) + VP8_CX_SRCS_REMOVE-no += $(VP8_COMMON_SRCS_REMOVE-no) + + ifeq ($(ARCH_ARM),yes) + include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8cx_arm.mk + endif + + VP8_CX_SRCS-yes += vp8_cx_iface.c + + # encoder + #INCLUDES += algo/vpx_common/vpx_mem/include + #INCLUDES += common + #INCLUDES += common + #INCLUDES += common + #INCLUDES += algo/vpx_ref/cpu_id/include + #INCLUDES += common + #INCLUDES += encoder + + VP8_CX_SRCS-yes += encoder/asm_enc_offsets.c + VP8_CX_SRCS-yes += encoder/defaultcoefcounts.h + VP8_CX_SRCS-yes += encoder/bitstream.c + VP8_CX_SRCS-yes += encoder/boolhuff.c + VP8_CX_SRCS-yes += encoder/dct.c + VP8_CX_SRCS-yes += encoder/encodeframe.c + VP8_CX_SRCS-yes += encoder/encodeframe.h + VP8_CX_SRCS-yes += encoder/encodeintra.c + VP8_CX_SRCS-yes += encoder/encodemb.c + VP8_CX_SRCS-yes += encoder/encodemv.c + VP8_CX_SRCS-$(CONFIG_MULTITHREAD) += encoder/ethreading.c + VP8_CX_SRCS-yes += encoder/firstpass.c + VP8_CX_SRCS-yes += encoder/block.h + VP8_CX_SRCS-yes += encoder/boolhuff.h + VP8_CX_SRCS-yes += encoder/bitstream.h + VP8_CX_SRCS-$(CONFIG_TEMPORAL_DENOISING) += encoder/denoising.h + VP8_CX_SRCS-$(CONFIG_TEMPORAL_DENOISING) += encoder/denoising.c + VP8_CX_SRCS-yes += encoder/encodeintra.h + VP8_CX_SRCS-yes += encoder/encodemb.h + VP8_CX_SRCS-yes += encoder/encodemv.h + VP8_CX_SRCS-yes += encoder/firstpass.h + VP8_CX_SRCS-yes += encoder/lookahead.c + VP8_CX_SRCS-yes += encoder/lookahead.h + VP8_CX_SRCS-yes += encoder/mcomp.h + VP8_CX_SRCS-yes += encoder/modecosts.h + VP8_CX_SRCS-yes += encoder/onyx_int.h + VP8_CX_SRCS-yes += encoder/pickinter.h + VP8_CX_SRCS-yes += encoder/psnr.h + VP8_CX_SRCS-yes += encoder/quantize.h + VP8_CX_SRCS-yes += encoder/ratectrl.h + VP8_CX_SRCS-yes += encoder/rdopt.h + VP8_CX_SRCS-yes += encoder/tokenize.h + VP8_CX_SRCS-yes += encoder/treewriter.h + VP8_CX_SRCS-yes += encoder/mcomp.c + VP8_CX_SRCS-yes += encoder/modecosts.c + VP8_CX_SRCS-yes += encoder/onyx_if.c + VP8_CX_SRCS-yes += encoder/pickinter.c + VP8_CX_SRCS-yes += encoder/picklpf.c + VP8_CX_SRCS-yes += encoder/psnr.c + VP8_CX_SRCS-yes += encoder/quantize.c + VP8_CX_SRCS-yes += encoder/ratectrl.c + VP8_CX_SRCS-yes += encoder/rdopt.c + VP8_CX_SRCS-yes += encoder/segmentation.c + VP8_CX_SRCS-yes += encoder/segmentation.h + VP8_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/ssim.c + VP8_CX_SRCS-yes += encoder/tokenize.c + VP8_CX_SRCS-yes += encoder/dct_value_cost.h + VP8_CX_SRCS-yes += encoder/dct_value_tokens.h + VP8_CX_SRCS-yes += encoder/treewriter.c + VP8_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/postproc.h + VP8_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/postproc.c + VP8_CX_SRCS-yes += encoder/temporal_filter.c + VP8_CX_SRCS-$(CONFIG_MULTI_RES_ENCODING) += encoder/mr_dissim.c + VP8_CX_SRCS-$(CONFIG_MULTI_RES_ENCODING) += encoder/mr_dissim.h + + ifeq ($(CONFIG_REALTIME_ONLY),yes) + VP8_CX_SRCS_REMOVE-yes += encoder/firstpass.c + VP8_CX_SRCS_REMOVE-yes += encoder/temporal_filter.c + endif + + VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/dct_mmx.asm + VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/subtract_mmx.asm + VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp8_enc_stubs_mmx.c + VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm + VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/fwalsh_sse2.asm + VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.asm + + ifeq ($(CONFIG_TEMPORAL_DENOISING),yes) + VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/denoising_sse2.c + ifeq ($(HAVE_SSE2),yes) + vp8/encoder/x86/denoising_sse2.c.o: CFLAGS += -msse2 + endif + endif + + VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/subtract_sse2.asm + VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm + VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp8_enc_stubs_sse2.c + VP8_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/quantize_ssse3.asm + VP8_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/quantize_sse4.asm + VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_mmx.asm + VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/encodeopt.asm + VP8_CX_SRCS-$(ARCH_X86_64) += encoder/x86/ssim_opt.asm + + ifeq ($(CONFIG_REALTIME_ONLY),yes) + VP8_CX_SRCS_REMOVE-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm + endif + + + VP8_CX_SRCS-yes := $(filter-out $(VP8_CX_SRCS_REMOVE-yes),$(VP8_CX_SRCS-yes)) ++ ++$(eval $(call asm_offsets_template,\ ++ vp8_asm_enc_offsets.asm, $(VP8_PREFIX)encoder/asm_enc_offsets.c)) diff --cc vp8/vp8dx.mk index 000000000,2cfd280cb..5753e04e5 mode 000000,100644..100644 --- a/vp8/vp8dx.mk +++ b/vp8/vp8dx.mk @@@ -1,0 -1,64 +1,65 @@@ + ## + ## Copyright (c) 2010 The WebM project authors. All Rights Reserved. + ## + ## Use of this source code is governed by a BSD-style license + ## that can be found in the LICENSE file in the root of the source + ## tree. An additional intellectual property rights grant can be found + ## in the file PATENTS. All contributing project authors may + ## be found in the AUTHORS file in the root of the source tree. + ## + + -include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8_common.mk - + VP8_DX_EXPORTS += exports_dec + + VP8_DX_SRCS-yes += $(VP8_COMMON_SRCS-yes) + VP8_DX_SRCS-no += $(VP8_COMMON_SRCS-no) + VP8_DX_SRCS_REMOVE-yes += $(VP8_COMMON_SRCS_REMOVE-yes) + VP8_DX_SRCS_REMOVE-no += $(VP8_COMMON_SRCS_REMOVE-no) + + VP8_DX_SRCS-yes += vp8_dx_iface.c + + # common + #define ARM + #define DISABLE_THREAD + + #INCLUDES += algo/vpx_common/vpx_mem/include + #INCLUDES += common + #INCLUDES += common + #INCLUDES += common + #INCLUDES += common + #INCLUDES += decoder + + + + # decoder + #define ARM + #define DISABLE_THREAD + + #INCLUDES += algo/vpx_common/vpx_mem/include + #INCLUDES += common + #INCLUDES += common + #INCLUDES += common + #INCLUDES += common + #INCLUDES += decoder + + VP8_DX_SRCS-yes += decoder/asm_dec_offsets.c + VP8_DX_SRCS-yes += decoder/dboolhuff.c + VP8_DX_SRCS-yes += decoder/decodemv.c + VP8_DX_SRCS-yes += decoder/decodframe.c + VP8_DX_SRCS-yes += decoder/detokenize.c + VP8_DX_SRCS-$(CONFIG_ERROR_CONCEALMENT) += decoder/ec_types.h + VP8_DX_SRCS-$(CONFIG_ERROR_CONCEALMENT) += decoder/error_concealment.h + VP8_DX_SRCS-$(CONFIG_ERROR_CONCEALMENT) += decoder/error_concealment.c + VP8_DX_SRCS-yes += decoder/dboolhuff.h + VP8_DX_SRCS-yes += decoder/decodemv.h + VP8_DX_SRCS-yes += decoder/decoderthreading.h + VP8_DX_SRCS-yes += decoder/detokenize.h + VP8_DX_SRCS-yes += decoder/onyxd_int.h + VP8_DX_SRCS-yes += decoder/treereader.h + VP8_DX_SRCS-yes += decoder/onyxd_if.c + VP8_DX_SRCS-$(CONFIG_MULTITHREAD) += decoder/threading.c + + VP8_DX_SRCS-yes := $(filter-out $(VP8_DX_SRCS_REMOVE-yes),$(VP8_DX_SRCS-yes)) ++ ++$(eval $(call asm_offsets_template,\ ++ vp8_asm_dec_offsets.asm, $(VP8_PREFIX)decoder/asm_dec_offsets.c)) diff --cc vp9/common/rtcd_defs.sh index f909c1898,000000000..fc9195427 mode 100644,000000..100644 --- a/vp9/common/rtcd_defs.sh +++ b/vp9/common/rtcd_defs.sh @@@ -1,518 -1,0 +1,518 @@@ +common_forward_decls() { +cat <> 8) + + addne r2, r2, r4 ; if (bit) lowvalue += split + subne r4, r5, r4 ; if (bit) range = range-split + + ; Counting the leading zeros is used to normalize range. + clz r6, r4 + sub r6, r6, #24 ; shift + + ; Flag is set on the sum of count. This flag is used later + ; to determine if count >= 0 + adds r3, r3, r6 ; count += shift + lsl r5, r4, r6 ; range <<= shift + bmi token_count_lt_zero ; if(count >= 0) + + sub r6, r6, r3 ; offset = shift - count + sub r4, r6, #1 ; offset-1 + lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) + bpl token_high_bit_not_set + + ldr r4, [r0, #vp9_writer_pos] ; x + sub r4, r4, #1 ; x = w->pos-1 + b token_zero_while_start +token_zero_while_loop + mov r9, #0 + strb r9, [r7, r4] ; w->buffer[x] =(unsigned char)0 + sub r4, r4, #1 ; x-- +token_zero_while_start + cmp r4, #0 + ldrge r7, [r0, #vp9_writer_buffer] + ldrb r1, [r7, r4] + cmpge r1, #0xff + beq token_zero_while_loop + + ldr r7, [r0, #vp9_writer_buffer] + ldrb r9, [r7, r4] ; w->buffer[x] + add r9, r9, #1 + strb r9, [r7, r4] ; w->buffer[x] + 1 +token_high_bit_not_set + rsb r4, r6, #24 ; 24-offset + ldr r9, [r0, #vp9_writer_buffer] + lsr r7, r2, r4 ; lowvalue >> (24-offset) + ldr r4, [r0, #vp9_writer_pos] ; w->pos + lsl r2, r2, r6 ; lowvalue <<= offset + mov r6, r3 ; shift = count + add r1, r4, #1 ; w->pos++ + bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff + str r1, [r0, #vp9_writer_pos] + sub r3, r3, #8 ; count -= 8 + strb r7, [r9, r4] ; w->buffer[w->pos++] + +token_count_lt_zero + lsl r2, r2, r6 ; lowvalue <<= shift + + str r2, [r0, #vp9_writer_lowvalue] + str r5, [r0, #vp9_writer_range] + str r3, [r0, #vp9_writer_count] + pop {r4-r9, pc} + ENDP + +; r0 BOOL_CODER *br +|vp8_stop_encode| PROC + push {r4-r10, lr} + + ldr r2, [r0, #vp9_writer_lowvalue] + ldr r5, [r0, #vp9_writer_range] + ldr r3, [r0, #vp9_writer_count] + + mov r10, #32 + +stop_encode_loop + sub r7, r5, #1 ; range-1 + + mov r4, r7, lsl #7 ; ((range-1) * 128) + + mov r7, #1 + add r4, r7, r4, lsr #8 ; 1 + (((range-1) * 128) >> 8) + + ; Counting the leading zeros is used to normalize range. + clz r6, r4 + sub r6, r6, #24 ; shift + + ; Flag is set on the sum of count. This flag is used later + ; to determine if count >= 0 + adds r3, r3, r6 ; count += shift + lsl r5, r4, r6 ; range <<= shift + bmi token_count_lt_zero_se ; if(count >= 0) + + sub r6, r6, r3 ; offset = shift - count + sub r4, r6, #1 ; offset-1 + lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) + bpl token_high_bit_not_set_se + + ldr r4, [r0, #vp9_writer_pos] ; x + sub r4, r4, #1 ; x = w->pos-1 + b token_zero_while_start_se +token_zero_while_loop_se + mov r9, #0 + strb r9, [r7, r4] ; w->buffer[x] =(unsigned char)0 + sub r4, r4, #1 ; x-- +token_zero_while_start_se + cmp r4, #0 + ldrge r7, [r0, #vp9_writer_buffer] + ldrb r1, [r7, r4] + cmpge r1, #0xff + beq token_zero_while_loop_se + + ldr r7, [r0, #vp9_writer_buffer] + ldrb r9, [r7, r4] ; w->buffer[x] + add r9, r9, #1 + strb r9, [r7, r4] ; w->buffer[x] + 1 +token_high_bit_not_set_se + rsb r4, r6, #24 ; 24-offset + ldr r9, [r0, #vp9_writer_buffer] + lsr r7, r2, r4 ; lowvalue >> (24-offset) + ldr r4, [r0, #vp9_writer_pos] ; w->pos + lsl r2, r2, r6 ; lowvalue <<= offset + mov r6, r3 ; shift = count + add r1, r4, #1 ; w->pos++ + bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff + str r1, [r0, #vp9_writer_pos] + sub r3, r3, #8 ; count -= 8 + strb r7, [r9, r4] ; w->buffer[w->pos++] + +token_count_lt_zero_se + lsl r2, r2, r6 ; lowvalue <<= shift + + subs r10, r10, #1 + bne stop_encode_loop + + str r2, [r0, #vp9_writer_lowvalue] + str r5, [r0, #vp9_writer_range] + str r3, [r0, #vp9_writer_count] + pop {r4-r10, pc} + + ENDP + +; r0 BOOL_CODER *br +; r1 int data +; r2 int bits +|vp8_encode_value| PROC + push {r4-r11, lr} + + mov r10, r2 + + ldr r2, [r0, #vp9_writer_lowvalue] + ldr r5, [r0, #vp9_writer_range] + ldr r3, [r0, #vp9_writer_count] + + rsb r4, r10, #32 ; 32-n + + ; v is kept in r1 during the token pack loop + lsl r1, r1, r4 ; r1 = v << 32 - n + +encode_value_loop + sub r7, r5, #1 ; range-1 + + ; Decisions are made based on the bit value shifted + ; off of v, so set a flag here based on this. + ; This value is refered to as "bb" + lsls r1, r1, #1 ; bit = v >> n + mov r4, r7, lsl #7 ; ((range-1) * 128) + + mov r7, #1 + add r4, r7, r4, lsr #8 ; 1 + (((range-1) * 128) >> 8) + + addcs r2, r2, r4 ; if (bit) lowvalue += split + subcs r4, r5, r4 ; if (bit) range = range-split + + ; Counting the leading zeros is used to normalize range. + clz r6, r4 + sub r6, r6, #24 ; shift + + ; Flag is set on the sum of count. This flag is used later + ; to determine if count >= 0 + adds r3, r3, r6 ; count += shift + lsl r5, r4, r6 ; range <<= shift + bmi token_count_lt_zero_ev ; if(count >= 0) + + sub r6, r6, r3 ; offset = shift - count + sub r4, r6, #1 ; offset-1 + lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) + bpl token_high_bit_not_set_ev + + ldr r4, [r0, #vp9_writer_pos] ; x + sub r4, r4, #1 ; x = w->pos-1 + b token_zero_while_start_ev +token_zero_while_loop_ev + mov r9, #0 + strb r9, [r7, r4] ; w->buffer[x] =(unsigned char)0 + sub r4, r4, #1 ; x-- +token_zero_while_start_ev + cmp r4, #0 + ldrge r7, [r0, #vp9_writer_buffer] + ldrb r11, [r7, r4] + cmpge r11, #0xff + beq token_zero_while_loop_ev + + ldr r7, [r0, #vp9_writer_buffer] + ldrb r9, [r7, r4] ; w->buffer[x] + add r9, r9, #1 + strb r9, [r7, r4] ; w->buffer[x] + 1 +token_high_bit_not_set_ev + rsb r4, r6, #24 ; 24-offset + ldr r9, [r0, #vp9_writer_buffer] + lsr r7, r2, r4 ; lowvalue >> (24-offset) + ldr r4, [r0, #vp9_writer_pos] ; w->pos + lsl r2, r2, r6 ; lowvalue <<= offset + mov r6, r3 ; shift = count + add r11, r4, #1 ; w->pos++ + bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff + str r11, [r0, #vp9_writer_pos] + sub r3, r3, #8 ; count -= 8 + strb r7, [r9, r4] ; w->buffer[w->pos++] + +token_count_lt_zero_ev + lsl r2, r2, r6 ; lowvalue <<= shift + + subs r10, r10, #1 + bne encode_value_loop + + str r2, [r0, #vp9_writer_lowvalue] + str r5, [r0, #vp9_writer_range] + str r3, [r0, #vp9_writer_count] + pop {r4-r11, pc} + ENDP + + END diff --cc vp9/encoder/arm/armv5te/vp8_packtokens_armv5.asm index bf299770b,000000000..9ccbaa6c1 mode 100644,000000..100644 --- a/vp9/encoder/arm/armv5te/vp8_packtokens_armv5.asm +++ b/vp9/encoder/arm/armv5te/vp8_packtokens_armv5.asm @@@ -1,291 -1,0 +1,291 @@@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + + EXPORT |vp8cx_pack_tokens_armv5| + - INCLUDE asm_enc_offsets.asm ++ INCLUDE vp9_asm_enc_offsets.asm + + ARM + REQUIRE8 + PRESERVE8 + + AREA |.text|, CODE, READONLY + +; r0 vp9_writer *w +; r1 const TOKENEXTRA *p +; r2 int xcount +; r3 vp8_coef_encodings +; s0 vp8_extra_bits +; s1 vp8_coef_tree +|vp8cx_pack_tokens_armv5| PROC + push {r4-r11, lr} + + ; Add size of xcount * sizeof (TOKENEXTRA) to get stop + ; sizeof (TOKENEXTRA) is 8 + sub sp, sp, #12 + add r2, r1, r2, lsl #3 ; stop = p + xcount*sizeof(TOKENEXTRA) + str r2, [sp, #0] + str r3, [sp, #8] ; save vp8_coef_encodings + ldr r2, [r0, #vp9_writer_lowvalue] + ldr r5, [r0, #vp9_writer_range] + ldr r3, [r0, #vp9_writer_count] + b check_p_lt_stop + +while_p_lt_stop + ldrb r6, [r1, #tokenextra_token] ; t + ldr r4, [sp, #8] ; vp8_coef_encodings + mov lr, #0 + add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t + ldr r9, [r1, #tokenextra_context_tree] ; pp + + ldrb r7, [r1, #tokenextra_skip_eob_node] + + ldr r6, [r4, #vp9_token_value] ; v + ldr r8, [r4, #vp9_token_len] ; n + + ; vp8 specific skip_eob_node + cmp r7, #0 + movne lr, #2 ; i = 2 + subne r8, r8, #1 ; --n + + rsb r4, r8, #32 ; 32-n + ldr r10, [sp, #52] ; vp8_coef_tree + + ; v is kept in r12 during the token pack loop + lsl r12, r6, r4 ; r12 = v << 32 - n + +; loop start +token_loop + ldrb r4, [r9, lr, asr #1] ; pp [i>>1] + sub r7, r5, #1 ; range-1 + + ; Decisions are made based on the bit value shifted + ; off of v, so set a flag here based on this. + ; This value is refered to as "bb" + lsls r12, r12, #1 ; bb = v >> n + mul r6, r4, r7 ; ((range-1) * pp[i>>1])) + + ; bb can only be 0 or 1. So only execute this statement + ; if bb == 1, otherwise it will act like i + 0 + addcs lr, lr, #1 ; i + bb + + mov r7, #1 + ldrsb lr, [r10, lr] ; i = vp8_coef_tree[i+bb] + add r4, r7, r6, lsr #8 ; 1 + (((range-1) * pp[i>>1]) >> 8) + + addcs r2, r2, r4 ; if (bb) lowvalue += split + subcs r4, r5, r4 ; if (bb) range = range-split + + ; Counting the leading zeros is used to normalize range. + clz r6, r4 + sub r6, r6, #24 ; shift + + ; Flag is set on the sum of count. This flag is used later + ; to determine if count >= 0 + adds r3, r3, r6 ; count += shift + lsl r5, r4, r6 ; range <<= shift + bmi token_count_lt_zero ; if(count >= 0) + + sub r6, r6, r3 ; offset = shift - count + sub r4, r6, #1 ; offset-1 + lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) + bpl token_high_bit_not_set + + ldr r4, [r0, #vp9_writer_pos] ; x + sub r4, r4, #1 ; x = w->pos-1 + b token_zero_while_start +token_zero_while_loop + mov r10, #0 + strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 + sub r4, r4, #1 ; x-- +token_zero_while_start + cmp r4, #0 + ldrge r7, [r0, #vp9_writer_buffer] + ldrb r11, [r7, r4] + cmpge r11, #0xff + beq token_zero_while_loop + + ldr r7, [r0, #vp9_writer_buffer] + ldrb r10, [r7, r4] ; w->buffer[x] + add r10, r10, #1 + strb r10, [r7, r4] ; w->buffer[x] + 1 +token_high_bit_not_set + rsb r4, r6, #24 ; 24-offset + ldr r10, [r0, #vp9_writer_buffer] + lsr r7, r2, r4 ; lowvalue >> (24-offset) + ldr r4, [r0, #vp9_writer_pos] ; w->pos + lsl r2, r2, r6 ; lowvalue <<= offset + mov r6, r3 ; shift = count + add r11, r4, #1 ; w->pos++ + bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff + str r11, [r0, #vp9_writer_pos] + sub r3, r3, #8 ; count -= 8 + strb r7, [r10, r4] ; w->buffer[w->pos++] + + ; r10 is used earlier in the loop, but r10 is used as + ; temp variable here. So after r10 is used, reload + ; vp8_coef_tree_dcd into r10 + ldr r10, [sp, #52] ; vp8_coef_tree + +token_count_lt_zero + lsl r2, r2, r6 ; lowvalue <<= shift + + subs r8, r8, #1 ; --n + bne token_loop + + ldrb r6, [r1, #tokenextra_token] ; t + ldr r7, [sp, #48] ; vp8_extra_bits + ; Add t * sizeof (vp9_extra_bit_struct) to get the desired + ; element. Here vp9_extra_bit_struct == 16 + add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t + + ldr r4, [r12, #vp9_extra_bit_struct_base_val] + cmp r4, #0 + beq skip_extra_bits + +; if( b->base_val) + ldr r8, [r12, #vp9_extra_bit_struct_len] ; L + ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra + cmp r8, #0 ; if( L) + beq no_extra_bits + + ldr r9, [r12, #vp9_extra_bit_struct_prob] + asr r7, lr, #1 ; v=e>>1 + + ldr r10, [r12, #vp9_extra_bit_struct_tree] + str r10, [sp, #4] ; b->tree + + rsb r4, r8, #32 + lsl r12, r7, r4 + + mov lr, #0 ; i = 0 + +extra_bits_loop + ldrb r4, [r9, lr, asr #1] ; pp[i>>1] + sub r7, r5, #1 ; range-1 + lsls r12, r12, #1 ; v >> n + mul r6, r4, r7 ; (range-1) * pp[i>>1] + addcs lr, lr, #1 ; i + bb + + mov r7, #1 + ldrsb lr, [r10, lr] ; i = b->tree[i+bb] + add r4, r7, r6, lsr #8 ; split = 1 + (((range-1) * pp[i>>1]) >> 8) + + addcs r2, r2, r4 ; if (bb) lowvalue += split + subcs r4, r5, r4 ; if (bb) range = range-split + + clz r6, r4 + sub r6, r6, #24 + + adds r3, r3, r6 ; count += shift + lsl r5, r4, r6 ; range <<= shift + bmi extra_count_lt_zero ; if(count >= 0) + + sub r6, r6, r3 ; offset= shift - count + sub r4, r6, #1 ; offset-1 + lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) + bpl extra_high_bit_not_set + + ldr r4, [r0, #vp9_writer_pos] ; x + sub r4, r4, #1 ; x = w->pos - 1 + b extra_zero_while_start +extra_zero_while_loop + mov r10, #0 + strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 + sub r4, r4, #1 ; x-- +extra_zero_while_start + cmp r4, #0 + ldrge r7, [r0, #vp9_writer_buffer] + ldrb r11, [r7, r4] + cmpge r11, #0xff + beq extra_zero_while_loop + + ldr r7, [r0, #vp9_writer_buffer] + ldrb r10, [r7, r4] + add r10, r10, #1 + strb r10, [r7, r4] +extra_high_bit_not_set + rsb r4, r6, #24 ; 24-offset + ldr r10, [r0, #vp9_writer_buffer] + lsr r7, r2, r4 ; lowvalue >> (24-offset) + ldr r4, [r0, #vp9_writer_pos] + lsl r2, r2, r6 ; lowvalue <<= offset + mov r6, r3 ; shift = count + add r11, r4, #1 ; w->pos++ + bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff + str r11, [r0, #vp9_writer_pos] + sub r3, r3, #8 ; count -= 8 + strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-offset)) + ldr r10, [sp, #4] ; b->tree +extra_count_lt_zero + lsl r2, r2, r6 + + subs r8, r8, #1 ; --n + bne extra_bits_loop ; while (n) + +no_extra_bits + ldr lr, [r1, #4] ; e = p->Extra + add r4, r5, #1 ; range + 1 + tst lr, #1 + lsr r4, r4, #1 ; split = (range + 1) >> 1 + addne r2, r2, r4 ; lowvalue += split + subne r4, r5, r4 ; range = range-split + tst r2, #0x80000000 ; lowvalue & 0x80000000 + lsl r5, r4, #1 ; range <<= 1 + beq end_high_bit_not_set + + ldr r4, [r0, #vp9_writer_pos] + mov r7, #0 + sub r4, r4, #1 + b end_zero_while_start +end_zero_while_loop + strb r7, [r6, r4] + sub r4, r4, #1 ; x-- +end_zero_while_start + cmp r4, #0 + ldrge r6, [r0, #vp9_writer_buffer] + ldrb r12, [r6, r4] + cmpge r12, #0xff + beq end_zero_while_loop + + ldr r6, [r0, #vp9_writer_buffer] + ldrb r7, [r6, r4] + add r7, r7, #1 + strb r7, [r6, r4] +end_high_bit_not_set + adds r3, r3, #1 ; ++count + lsl r2, r2, #1 ; lowvalue <<= 1 + bne end_count_zero + + ldr r4, [r0, #vp9_writer_pos] + mvn r3, #7 + ldr r7, [r0, #vp9_writer_buffer] + lsr r6, r2, #24 ; lowvalue >> 24 + add r12, r4, #1 ; w->pos++ + bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff + str r12, [r0, #0x10] + strb r6, [r7, r4] +end_count_zero +skip_extra_bits + add r1, r1, #TOKENEXTRA_SZ ; ++p +check_p_lt_stop + ldr r4, [sp, #0] ; stop + cmp r1, r4 ; while( p < stop) + bcc while_p_lt_stop + + str r2, [r0, #vp9_writer_lowvalue] + str r5, [r0, #vp9_writer_range] + str r3, [r0, #vp9_writer_count] + add sp, sp, #12 + pop {r4-r11, pc} + ENDP + + END diff --cc vp9/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm index a1c647d6c,000000000..0938ce1a3 mode 100644,000000..100644 --- a/vp9/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm +++ b/vp9/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm @@@ -1,327 -1,0 +1,327 @@@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + + EXPORT |vp8cx_pack_mb_row_tokens_armv5| + - INCLUDE asm_enc_offsets.asm ++ INCLUDE vp9_asm_enc_offsets.asm + + ARM + REQUIRE8 + PRESERVE8 + + AREA |.text|, CODE, READONLY + +; r0 VP8_COMP *cpi +; r1 vp9_writer *w +; r2 vp8_coef_encodings +; r3 vp8_extra_bits +; s0 vp8_coef_tree + +|vp8cx_pack_mb_row_tokens_armv5| PROC + push {r4-r11, lr} + sub sp, sp, #24 + + ; Compute address of cpi->common.mb_rows + ldr r4, _VP8_COMP_common_ + ldr r6, _VP8_COMMON_MBrows_ + add r4, r0, r4 + + ldr r5, [r4, r6] ; load up mb_rows + + str r2, [sp, #20] ; save vp8_coef_encodings + str r5, [sp, #12] ; save mb_rows + str r3, [sp, #8] ; save vp8_extra_bits + + ldr r4, _VP8_COMP_tplist_ + add r4, r0, r4 + ldr r7, [r4, #0] ; dereference cpi->tp_list + + mov r0, r1 ; keep same as other loops + + ldr r2, [r0, #vp9_writer_lowvalue] + ldr r5, [r0, #vp9_writer_range] + ldr r3, [r0, #vp9_writer_count] + +mb_row_loop + + ldr r1, [r7, #tokenlist_start] + ldr r9, [r7, #tokenlist_stop] + str r9, [sp, #0] ; save stop for later comparison + str r7, [sp, #16] ; tokenlist address for next time + + b check_p_lt_stop + + ; actuall work gets done here! + +while_p_lt_stop + ldrb r6, [r1, #tokenextra_token] ; t + ldr r4, [sp, #20] ; vp8_coef_encodings + mov lr, #0 + add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t + ldr r9, [r1, #tokenextra_context_tree] ; pp + + ldrb r7, [r1, #tokenextra_skip_eob_node] + + ldr r6, [r4, #vp9_token_value] ; v + ldr r8, [r4, #vp9_token_len] ; n + + ; vp8 specific skip_eob_node + cmp r7, #0 + movne lr, #2 ; i = 2 + subne r8, r8, #1 ; --n + + rsb r4, r8, #32 ; 32-n + ldr r10, [sp, #60] ; vp8_coef_tree + + ; v is kept in r12 during the token pack loop + lsl r12, r6, r4 ; r12 = v << 32 - n + +; loop start +token_loop + ldrb r4, [r9, lr, asr #1] ; pp [i>>1] + sub r7, r5, #1 ; range-1 + + ; Decisions are made based on the bit value shifted + ; off of v, so set a flag here based on this. + ; This value is refered to as "bb" + lsls r12, r12, #1 ; bb = v >> n + mul r6, r4, r7 ; ((range-1) * pp[i>>1])) + + ; bb can only be 0 or 1. So only execute this statement + ; if bb == 1, otherwise it will act like i + 0 + addcs lr, lr, #1 ; i + bb + + mov r7, #1 + ldrsb lr, [r10, lr] ; i = vp8_coef_tree[i+bb] + add r4, r7, r6, lsr #8 ; 1 + (((range-1) * pp[i>>1]) >> 8) + + addcs r2, r2, r4 ; if (bb) lowvalue += split + subcs r4, r5, r4 ; if (bb) range = range-split + + ; Counting the leading zeros is used to normalize range. + clz r6, r4 + sub r6, r6, #24 ; shift + + ; Flag is set on the sum of count. This flag is used later + ; to determine if count >= 0 + adds r3, r3, r6 ; count += shift + lsl r5, r4, r6 ; range <<= shift + bmi token_count_lt_zero ; if(count >= 0) + + sub r6, r6, r3 ; offset = shift - count + sub r4, r6, #1 ; offset-1 + lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) + bpl token_high_bit_not_set + + ldr r4, [r0, #vp9_writer_pos] ; x + sub r4, r4, #1 ; x = w->pos-1 + b token_zero_while_start +token_zero_while_loop + mov r10, #0 + strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 + sub r4, r4, #1 ; x-- +token_zero_while_start + cmp r4, #0 + ldrge r7, [r0, #vp9_writer_buffer] + ldrb r11, [r7, r4] + cmpge r11, #0xff + beq token_zero_while_loop + + ldr r7, [r0, #vp9_writer_buffer] + ldrb r10, [r7, r4] ; w->buffer[x] + add r10, r10, #1 + strb r10, [r7, r4] ; w->buffer[x] + 1 +token_high_bit_not_set + rsb r4, r6, #24 ; 24-offset + ldr r10, [r0, #vp9_writer_buffer] + lsr r7, r2, r4 ; lowvalue >> (24-offset) + ldr r4, [r0, #vp9_writer_pos] ; w->pos + lsl r2, r2, r6 ; lowvalue <<= offset + mov r6, r3 ; shift = count + add r11, r4, #1 ; w->pos++ + bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff + str r11, [r0, #vp9_writer_pos] + sub r3, r3, #8 ; count -= 8 + strb r7, [r10, r4] ; w->buffer[w->pos++] + + ; r10 is used earlier in the loop, but r10 is used as + ; temp variable here. So after r10 is used, reload + ; vp8_coef_tree_dcd into r10 + ldr r10, [sp, #60] ; vp8_coef_tree + +token_count_lt_zero + lsl r2, r2, r6 ; lowvalue <<= shift + + subs r8, r8, #1 ; --n + bne token_loop + + ldrb r6, [r1, #tokenextra_token] ; t + ldr r7, [sp, #8] ; vp8_extra_bits + ; Add t * sizeof (vp9_extra_bit_struct) to get the desired + ; element. Here vp9_extra_bit_struct == 16 + add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t + + ldr r4, [r12, #vp9_extra_bit_struct_base_val] + cmp r4, #0 + beq skip_extra_bits + +; if( b->base_val) + ldr r8, [r12, #vp9_extra_bit_struct_len] ; L + ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra + cmp r8, #0 ; if( L) + beq no_extra_bits + + ldr r9, [r12, #vp9_extra_bit_struct_prob] + asr r7, lr, #1 ; v=e>>1 + + ldr r10, [r12, #vp9_extra_bit_struct_tree] + str r10, [sp, #4] ; b->tree + + rsb r4, r8, #32 + lsl r12, r7, r4 + + mov lr, #0 ; i = 0 + +extra_bits_loop + ldrb r4, [r9, lr, asr #1] ; pp[i>>1] + sub r7, r5, #1 ; range-1 + lsls r12, r12, #1 ; v >> n + mul r6, r4, r7 ; (range-1) * pp[i>>1] + addcs lr, lr, #1 ; i + bb + + mov r7, #1 + ldrsb lr, [r10, lr] ; i = b->tree[i+bb] + add r4, r7, r6, lsr #8 ; split = 1 + (((range-1) * pp[i>>1]) >> 8) + + addcs r2, r2, r4 ; if (bb) lowvalue += split + subcs r4, r5, r4 ; if (bb) range = range-split + + clz r6, r4 + sub r6, r6, #24 + + adds r3, r3, r6 ; count += shift + lsl r5, r4, r6 ; range <<= shift + bmi extra_count_lt_zero ; if(count >= 0) + + sub r6, r6, r3 ; offset= shift - count + sub r4, r6, #1 ; offset-1 + lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) + bpl extra_high_bit_not_set + + ldr r4, [r0, #vp9_writer_pos] ; x + sub r4, r4, #1 ; x = w->pos - 1 + b extra_zero_while_start +extra_zero_while_loop + mov r10, #0 + strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 + sub r4, r4, #1 ; x-- +extra_zero_while_start + cmp r4, #0 + ldrge r7, [r0, #vp9_writer_buffer] + ldrb r11, [r7, r4] + cmpge r11, #0xff + beq extra_zero_while_loop + + ldr r7, [r0, #vp9_writer_buffer] + ldrb r10, [r7, r4] + add r10, r10, #1 + strb r10, [r7, r4] +extra_high_bit_not_set + rsb r4, r6, #24 ; 24-offset + ldr r10, [r0, #vp9_writer_buffer] + lsr r7, r2, r4 ; lowvalue >> (24-offset) + ldr r4, [r0, #vp9_writer_pos] + lsl r2, r2, r6 ; lowvalue <<= offset + mov r6, r3 ; shift = count + add r11, r4, #1 ; w->pos++ + bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff + str r11, [r0, #vp9_writer_pos] + sub r3, r3, #8 ; count -= 8 + strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-offset)) + ldr r10, [sp, #4] ; b->tree +extra_count_lt_zero + lsl r2, r2, r6 + + subs r8, r8, #1 ; --n + bne extra_bits_loop ; while (n) + +no_extra_bits + ldr lr, [r1, #4] ; e = p->Extra + add r4, r5, #1 ; range + 1 + tst lr, #1 + lsr r4, r4, #1 ; split = (range + 1) >> 1 + addne r2, r2, r4 ; lowvalue += split + subne r4, r5, r4 ; range = range-split + tst r2, #0x80000000 ; lowvalue & 0x80000000 + lsl r5, r4, #1 ; range <<= 1 + beq end_high_bit_not_set + + ldr r4, [r0, #vp9_writer_pos] + mov r7, #0 + sub r4, r4, #1 + b end_zero_while_start +end_zero_while_loop + strb r7, [r6, r4] + sub r4, r4, #1 ; x-- +end_zero_while_start + cmp r4, #0 + ldrge r6, [r0, #vp9_writer_buffer] + ldrb r12, [r6, r4] + cmpge r12, #0xff + beq end_zero_while_loop + + ldr r6, [r0, #vp9_writer_buffer] + ldrb r7, [r6, r4] + add r7, r7, #1 + strb r7, [r6, r4] +end_high_bit_not_set + adds r3, r3, #1 ; ++count + lsl r2, r2, #1 ; lowvalue <<= 1 + bne end_count_zero + + ldr r4, [r0, #vp9_writer_pos] + mvn r3, #7 + ldr r7, [r0, #vp9_writer_buffer] + lsr r6, r2, #24 ; lowvalue >> 24 + add r12, r4, #1 ; w->pos++ + bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff + str r12, [r0, #0x10] + strb r6, [r7, r4] +end_count_zero +skip_extra_bits + add r1, r1, #TOKENEXTRA_SZ ; ++p +check_p_lt_stop + ldr r4, [sp, #0] ; stop + cmp r1, r4 ; while( p < stop) + bcc while_p_lt_stop + + ldr r6, [sp, #12] ; mb_rows + ldr r7, [sp, #16] ; tokenlist address + subs r6, r6, #1 + add r7, r7, #TOKENLIST_SZ ; next element in the array + str r6, [sp, #12] + bne mb_row_loop + + str r2, [r0, #vp9_writer_lowvalue] + str r5, [r0, #vp9_writer_range] + str r3, [r0, #vp9_writer_count] + add sp, sp, #24 + pop {r4-r11, pc} + ENDP + +_VP8_COMP_common_ + DCD vp8_comp_common +_VP8_COMMON_MBrows_ + DCD vp8_common_mb_rows +_VP8_COMP_tplist_ + DCD vp8_comp_tplist + + END diff --cc vp9/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm index 86c2feb4a,000000000..4611b407d mode 100644,000000..100644 --- a/vp9/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm +++ b/vp9/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm @@@ -1,465 -1,0 +1,465 @@@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + + EXPORT |vp8cx_pack_tokens_into_partitions_armv5| + - INCLUDE asm_enc_offsets.asm ++ INCLUDE vp9_asm_enc_offsets.asm + + ARM + REQUIRE8 + PRESERVE8 + + AREA |.text|, CODE, READONLY + +; r0 VP8_COMP *cpi +; r1 unsigned char *cx_data +; r2 int num_part +; r3 *size +; s0 vp8_coef_encodings +; s1 vp8_extra_bits, +; s2 const vp9_tree_index *, + +|vp8cx_pack_tokens_into_partitions_armv5| PROC + push {r4-r11, lr} + sub sp, sp, #44 + + ; Compute address of cpi->common.mb_rows + ldr r4, _VP8_COMP_common_ + ldr r6, _VP8_COMMON_MBrows_ + add r4, r0, r4 + + ldr r5, [r4, r6] ; load up mb_rows + + str r5, [sp, #36] ; save mb_rows + str r1, [sp, #24] ; save cx_data + str r2, [sp, #20] ; save num_part + str r3, [sp, #8] ; save *size + + ; *size = 3*(num_part -1 ); + sub r2, r2, #1 ; num_part - 1 + add r2, r2, r2, lsl #1 ; 3*(num_part - 1) + str r2, [r3] + + add r2, r2, r1 ; cx_data + *size + str r2, [sp, #40] ; ptr + + ldr r4, _VP8_COMP_tplist_ + add r4, r0, r4 + ldr r7, [r4, #0] ; dereference cpi->tp_list + str r7, [sp, #32] ; store start of cpi->tp_list + + ldr r11, _VP8_COMP_bc2_ ; load up vp9_writer out of cpi + add r0, r0, r11 + + mov r11, #0 + str r11, [sp, #28] ; i + +numparts_loop + ldr r10, [sp, #40] ; ptr + ldr r5, [sp, #36] ; move mb_rows to the counting section + sub r5, r5, r11 ; move start point with each partition + ; mb_rows starts at i + str r5, [sp, #12] + + ; Reset all of the VP8 Writer data for each partition that + ; is processed. + ; start_encode + mov r2, #0 ; vp9_writer_lowvalue + mov r5, #255 ; vp9_writer_range + mvn r3, #23 ; vp9_writer_count + + str r2, [r0, #vp9_writer_value] + str r2, [r0, #vp9_writer_pos] + str r10, [r0, #vp9_writer_buffer] + +mb_row_loop + + ldr r1, [r7, #tokenlist_start] + ldr r9, [r7, #tokenlist_stop] + str r9, [sp, #0] ; save stop for later comparison + str r7, [sp, #16] ; tokenlist address for next time + + b check_p_lt_stop + + ; actual work gets done here! + +while_p_lt_stop + ldrb r6, [r1, #tokenextra_token] ; t + ldr r4, [sp, #80] ; vp8_coef_encodings + mov lr, #0 + add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t + ldr r9, [r1, #tokenextra_context_tree] ; pp + + ldrb r7, [r1, #tokenextra_skip_eob_node] + + ldr r6, [r4, #vp9_token_value] ; v + ldr r8, [r4, #vp9_token_len] ; n + + ; vp8 specific skip_eob_node + cmp r7, #0 + movne lr, #2 ; i = 2 + subne r8, r8, #1 ; --n + + rsb r4, r8, #32 ; 32-n + ldr r10, [sp, #88] ; vp8_coef_tree + + ; v is kept in r12 during the token pack loop + lsl r12, r6, r4 ; r12 = v << 32 - n + +; loop start +token_loop + ldrb r4, [r9, lr, asr #1] ; pp [i>>1] + sub r7, r5, #1 ; range-1 + + ; Decisions are made based on the bit value shifted + ; off of v, so set a flag here based on this. + ; This value is refered to as "bb" + lsls r12, r12, #1 ; bb = v >> n + mul r6, r4, r7 ; ((range-1) * pp[i>>1])) + + ; bb can only be 0 or 1. So only execute this statement + ; if bb == 1, otherwise it will act like i + 0 + addcs lr, lr, #1 ; i + bb + + mov r7, #1 + ldrsb lr, [r10, lr] ; i = vp8_coef_tree[i+bb] + add r4, r7, r6, lsr #8 ; 1 + (((range-1) * pp[i>>1]) >> 8) + + addcs r2, r2, r4 ; if (bb) lowvalue += split + subcs r4, r5, r4 ; if (bb) range = range-split + + ; Counting the leading zeros is used to normalize range. + clz r6, r4 + sub r6, r6, #24 ; shift + + ; Flag is set on the sum of count. This flag is used later + ; to determine if count >= 0 + adds r3, r3, r6 ; count += shift + lsl r5, r4, r6 ; range <<= shift + bmi token_count_lt_zero ; if(count >= 0) + + sub r6, r6, r3 ; offset = shift - count + sub r4, r6, #1 ; offset-1 + lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) + bpl token_high_bit_not_set + + ldr r4, [r0, #vp9_writer_pos] ; x + sub r4, r4, #1 ; x = w->pos-1 + b token_zero_while_start +token_zero_while_loop + mov r10, #0 + strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 + sub r4, r4, #1 ; x-- +token_zero_while_start + cmp r4, #0 + ldrge r7, [r0, #vp9_writer_buffer] + ldrb r11, [r7, r4] + cmpge r11, #0xff + beq token_zero_while_loop + + ldr r7, [r0, #vp9_writer_buffer] + ldrb r10, [r7, r4] ; w->buffer[x] + add r10, r10, #1 + strb r10, [r7, r4] ; w->buffer[x] + 1 +token_high_bit_not_set + rsb r4, r6, #24 ; 24-offset + ldr r10, [r0, #vp9_writer_buffer] + lsr r7, r2, r4 ; lowvalue >> (24-offset) + ldr r4, [r0, #vp9_writer_pos] ; w->pos + lsl r2, r2, r6 ; lowvalue <<= offset + mov r6, r3 ; shift = count + add r11, r4, #1 ; w->pos++ + bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff + str r11, [r0, #vp9_writer_pos] + sub r3, r3, #8 ; count -= 8 + strb r7, [r10, r4] ; w->buffer[w->pos++] + + ; r10 is used earlier in the loop, but r10 is used as + ; temp variable here. So after r10 is used, reload + ; vp8_coef_tree_dcd into r10 + ldr r10, [sp, #88] ; vp8_coef_tree + +token_count_lt_zero + lsl r2, r2, r6 ; lowvalue <<= shift + + subs r8, r8, #1 ; --n + bne token_loop + + ldrb r6, [r1, #tokenextra_token] ; t + ldr r7, [sp, #84] ; vp8_extra_bits + ; Add t * sizeof (vp9_extra_bit_struct) to get the desired + ; element. Here vp9_extra_bit_struct == 16 + add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t + + ldr r4, [r12, #vp9_extra_bit_struct_base_val] + cmp r4, #0 + beq skip_extra_bits + +; if( b->base_val) + ldr r8, [r12, #vp9_extra_bit_struct_len] ; L + ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra + cmp r8, #0 ; if( L) + beq no_extra_bits + + ldr r9, [r12, #vp9_extra_bit_struct_prob] + asr r7, lr, #1 ; v=e>>1 + + ldr r10, [r12, #vp9_extra_bit_struct_tree] + str r10, [sp, #4] ; b->tree + + rsb r4, r8, #32 + lsl r12, r7, r4 + + mov lr, #0 ; i = 0 + +extra_bits_loop + ldrb r4, [r9, lr, asr #1] ; pp[i>>1] + sub r7, r5, #1 ; range-1 + lsls r12, r12, #1 ; v >> n + mul r6, r4, r7 ; (range-1) * pp[i>>1] + addcs lr, lr, #1 ; i + bb + + mov r7, #1 + ldrsb lr, [r10, lr] ; i = b->tree[i+bb] + add r4, r7, r6, lsr #8 ; split = 1 + (((range-1) * pp[i>>1]) >> 8) + + addcs r2, r2, r4 ; if (bb) lowvalue += split + subcs r4, r5, r4 ; if (bb) range = range-split + + clz r6, r4 + sub r6, r6, #24 + + adds r3, r3, r6 ; count += shift + lsl r5, r4, r6 ; range <<= shift + bmi extra_count_lt_zero ; if(count >= 0) + + sub r6, r6, r3 ; offset= shift - count + sub r4, r6, #1 ; offset-1 + lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) + bpl extra_high_bit_not_set + + ldr r4, [r0, #vp9_writer_pos] ; x + sub r4, r4, #1 ; x = w->pos - 1 + b extra_zero_while_start +extra_zero_while_loop + mov r10, #0 + strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 + sub r4, r4, #1 ; x-- +extra_zero_while_start + cmp r4, #0 + ldrge r7, [r0, #vp9_writer_buffer] + ldrb r11, [r7, r4] + cmpge r11, #0xff + beq extra_zero_while_loop + + ldr r7, [r0, #vp9_writer_buffer] + ldrb r10, [r7, r4] + add r10, r10, #1 + strb r10, [r7, r4] +extra_high_bit_not_set + rsb r4, r6, #24 ; 24-offset + ldr r10, [r0, #vp9_writer_buffer] + lsr r7, r2, r4 ; lowvalue >> (24-offset) + ldr r4, [r0, #vp9_writer_pos] + lsl r2, r2, r6 ; lowvalue <<= offset + mov r6, r3 ; shift = count + add r11, r4, #1 ; w->pos++ + bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff + str r11, [r0, #vp9_writer_pos] + sub r3, r3, #8 ; count -= 8 + strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-offset)) + ldr r10, [sp, #4] ; b->tree +extra_count_lt_zero + lsl r2, r2, r6 + + subs r8, r8, #1 ; --n + bne extra_bits_loop ; while (n) + +no_extra_bits + ldr lr, [r1, #4] ; e = p->Extra + add r4, r5, #1 ; range + 1 + tst lr, #1 + lsr r4, r4, #1 ; split = (range + 1) >> 1 + addne r2, r2, r4 ; lowvalue += split + subne r4, r5, r4 ; range = range-split + tst r2, #0x80000000 ; lowvalue & 0x80000000 + lsl r5, r4, #1 ; range <<= 1 + beq end_high_bit_not_set + + ldr r4, [r0, #vp9_writer_pos] + mov r7, #0 + sub r4, r4, #1 + b end_zero_while_start +end_zero_while_loop + strb r7, [r6, r4] + sub r4, r4, #1 ; x-- +end_zero_while_start + cmp r4, #0 + ldrge r6, [r0, #vp9_writer_buffer] + ldrb r12, [r6, r4] + cmpge r12, #0xff + beq end_zero_while_loop + + ldr r6, [r0, #vp9_writer_buffer] + ldrb r7, [r6, r4] + add r7, r7, #1 + strb r7, [r6, r4] +end_high_bit_not_set + adds r3, r3, #1 ; ++count + lsl r2, r2, #1 ; lowvalue <<= 1 + bne end_count_zero + + ldr r4, [r0, #vp9_writer_pos] + mvn r3, #7 + ldr r7, [r0, #vp9_writer_buffer] + lsr r6, r2, #24 ; lowvalue >> 24 + add r12, r4, #1 ; w->pos++ + bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff + str r12, [r0, #0x10] + strb r6, [r7, r4] +end_count_zero +skip_extra_bits + add r1, r1, #TOKENEXTRA_SZ ; ++p +check_p_lt_stop + ldr r4, [sp, #0] ; stop + cmp r1, r4 ; while( p < stop) + bcc while_p_lt_stop + + ldr r10, [sp, #20] ; num_parts + mov r1, #TOKENLIST_SZ + mul r1, r10, r1 + + ldr r6, [sp, #12] ; mb_rows + ldr r7, [sp, #16] ; tokenlist address + subs r6, r6, r10 + add r7, r7, r1 ; next element in the array + str r6, [sp, #12] + bgt mb_row_loop + + mov r12, #32 + +stop_encode_loop + sub r7, r5, #1 ; range-1 + + mov r4, r7, lsl #7 ; ((range-1) * 128) + + mov r7, #1 + add r4, r7, r4, lsr #8 ; 1 + (((range-1) * 128) >> 8) + + ; Counting the leading zeros is used to normalize range. + clz r6, r4 + sub r6, r6, #24 ; shift + + ; Flag is set on the sum of count. This flag is used later + ; to determine if count >= 0 + adds r3, r3, r6 ; count += shift + lsl r5, r4, r6 ; range <<= shift + bmi token_count_lt_zero_se ; if(count >= 0) + + sub r6, r6, r3 ; offset = shift - count + sub r4, r6, #1 ; offset-1 + lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) + bpl token_high_bit_not_set_se + + ldr r4, [r0, #vp9_writer_pos] ; x + sub r4, r4, #1 ; x = w->pos-1 + b token_zero_while_start_se +token_zero_while_loop_se + mov r10, #0 + strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 + sub r4, r4, #1 ; x-- +token_zero_while_start_se + cmp r4, #0 + ldrge r7, [r0, #vp9_writer_buffer] + ldrb r11, [r7, r4] + cmpge r11, #0xff + beq token_zero_while_loop_se + + ldr r7, [r0, #vp9_writer_buffer] + ldrb r10, [r7, r4] ; w->buffer[x] + add r10, r10, #1 + strb r10, [r7, r4] ; w->buffer[x] + 1 +token_high_bit_not_set_se + rsb r4, r6, #24 ; 24-offset + ldr r10, [r0, #vp9_writer_buffer] + lsr r7, r2, r4 ; lowvalue >> (24-offset) + ldr r4, [r0, #vp9_writer_pos] ; w->pos + lsl r2, r2, r6 ; lowvalue <<= offset + mov r6, r3 ; shift = count + add r11, r4, #1 ; w->pos++ + bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff + str r11, [r0, #vp9_writer_pos] + sub r3, r3, #8 ; count -= 8 + strb r7, [r10, r4] ; w->buffer[w->pos++] + +token_count_lt_zero_se + lsl r2, r2, r6 ; lowvalue <<= shift + + subs r12, r12, #1 + bne stop_encode_loop + + ldr r10, [sp, #8] ; *size + ldr r11, [r10] + ldr r4, [r0, #vp9_writer_pos] ; w->pos + add r11, r11, r4 ; *size += w->pos + str r11, [r10] + + ldr r9, [sp, #20] ; num_parts + sub r9, r9, #1 + ldr r10, [sp, #28] ; i + cmp r10, r9 ; if(i<(num_part - 1)) + bge skip_write_partition + + ldr r12, [sp, #40] ; ptr + add r12, r12, r4 ; ptr += w->pos + str r12, [sp, #40] + + ldr r9, [sp, #24] ; cx_data + mov r8, r4, asr #8 + strb r4, [r9, #0] + strb r8, [r9, #1] + mov r4, r4, asr #16 + strb r4, [r9, #2] + + add r9, r9, #3 ; cx_data += 3 + str r9, [sp, #24] + +skip_write_partition + + ldr r11, [sp, #28] ; i + ldr r10, [sp, #20] ; num_parts + + add r11, r11, #1 ; i++ + str r11, [sp, #28] + + ldr r7, [sp, #32] ; cpi->tp_list[i] + mov r1, #TOKENLIST_SZ + add r7, r7, r1 ; next element in cpi->tp_list + str r7, [sp, #32] ; cpi->tp_list[i+1] + + cmp r10, r11 + bgt numparts_loop + + + add sp, sp, #44 + pop {r4-r11, pc} + ENDP + +_VP8_COMP_common_ + DCD vp8_comp_common +_VP8_COMMON_MBrows_ + DCD vp8_common_mb_rows +_VP8_COMP_tplist_ + DCD vp8_comp_tplist +_VP8_COMP_bc2_ + DCD vp8_comp_bc2 + + END diff --cc vp9/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm index ae2f6030d,000000000..bca74391a mode 100644,000000..100644 --- a/vp9/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm +++ b/vp9/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm @@@ -1,224 -1,0 +1,224 @@@ +; +; Copyright (c) 2011 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + + EXPORT |vp8_fast_quantize_b_armv6| + - INCLUDE asm_enc_offsets.asm ++ INCLUDE vp9_asm_enc_offsets.asm + + ARM + REQUIRE8 + PRESERVE8 + + AREA ||.text||, CODE, READONLY, ALIGN=2 + +; r0 BLOCK *b +; r1 BLOCKD *d +|vp8_fast_quantize_b_armv6| PROC + stmfd sp!, {r1, r4-r11, lr} + + ldr r3, [r0, #vp8_block_coeff] ; coeff + ldr r4, [r0, #vp8_block_quant_fast] ; quant_fast + ldr r5, [r0, #vp8_block_round] ; round + ldr r6, [r1, #vp8_blockd_qcoeff] ; qcoeff + ldr r7, [r1, #vp8_blockd_dqcoeff] ; dqcoeff + ldr r8, [r1, #vp8_blockd_dequant] ; dequant + + ldr r2, loop_count ; loop_count=0x1000000. 'lsls' instruction + ; is used to update the counter so that + ; it can be used to mark nonzero + ; quantized coefficient pairs. + + mov r1, #0 ; flags for quantized coeffs + + ; PART 1: quantization and dequantization loop +loop + ldr r9, [r3], #4 ; [z1 | z0] + ldr r10, [r5], #4 ; [r1 | r0] + ldr r11, [r4], #4 ; [q1 | q0] + + ssat16 lr, #1, r9 ; [sz1 | sz0] + eor r9, r9, lr ; [z1 ^ sz1 | z0 ^ sz0] + ssub16 r9, r9, lr ; x = (z ^ sz) - sz + sadd16 r9, r9, r10 ; [x1+r1 | x0+r0] + + ldr r12, [r3], #4 ; [z3 | z2] + + smulbb r0, r9, r11 ; [(x0+r0)*q0] + smultt r9, r9, r11 ; [(x1+r1)*q1] + + ldr r10, [r5], #4 ; [r3 | r2] + + ssat16 r11, #1, r12 ; [sz3 | sz2] + eor r12, r12, r11 ; [z3 ^ sz3 | z2 ^ sz2] + pkhtb r0, r9, r0, asr #16 ; [y1 | y0] + ldr r9, [r4], #4 ; [q3 | q2] + ssub16 r12, r12, r11 ; x = (z ^ sz) - sz + + sadd16 r12, r12, r10 ; [x3+r3 | x2+r2] + + eor r0, r0, lr ; [(y1 ^ sz1) | (y0 ^ sz0)] + + smulbb r10, r12, r9 ; [(x2+r2)*q2] + smultt r12, r12, r9 ; [(x3+r3)*q3] + + ssub16 r0, r0, lr ; x = (y ^ sz) - sz + + cmp r0, #0 ; check if zero + orrne r1, r1, r2, lsr #24 ; add flag for nonzero coeffs + + str r0, [r6], #4 ; *qcoeff++ = x + ldr r9, [r8], #4 ; [dq1 | dq0] + + pkhtb r10, r12, r10, asr #16 ; [y3 | y2] + eor r10, r10, r11 ; [(y3 ^ sz3) | (y2 ^ sz2)] + ssub16 r10, r10, r11 ; x = (y ^ sz) - sz + + cmp r10, #0 ; check if zero + orrne r1, r1, r2, lsr #23 ; add flag for nonzero coeffs + + str r10, [r6], #4 ; *qcoeff++ = x + ldr r11, [r8], #4 ; [dq3 | dq2] + + smulbb r12, r0, r9 ; [x0*dq0] + smultt r0, r0, r9 ; [x1*dq1] + + smulbb r9, r10, r11 ; [x2*dq2] + smultt r10, r10, r11 ; [x3*dq3] + + lsls r2, r2, #2 ; update loop counter + strh r12, [r7, #0] ; dqcoeff[0] = [x0*dq0] + strh r0, [r7, #2] ; dqcoeff[1] = [x1*dq1] + strh r9, [r7, #4] ; dqcoeff[2] = [x2*dq2] + strh r10, [r7, #6] ; dqcoeff[3] = [x3*dq3] + add r7, r7, #8 ; dqcoeff += 8 + bne loop + + ; PART 2: check position for eob... + mov lr, #0 ; init eob + cmp r1, #0 ; coeffs after quantization? + ldr r11, [sp, #0] ; restore BLOCKD pointer + beq end ; skip eob calculations if all zero + + ldr r0, [r11, #vp8_blockd_qcoeff] + + ; check shortcut for nonzero qcoeffs + tst r1, #0x80 + bne quant_coeff_15_14 + tst r1, #0x20 + bne quant_coeff_13_11 + tst r1, #0x8 + bne quant_coeff_12_7 + tst r1, #0x40 + bne quant_coeff_10_9 + tst r1, #0x10 + bne quant_coeff_8_3 + tst r1, #0x2 + bne quant_coeff_6_5 + tst r1, #0x4 + bne quant_coeff_4_2 + b quant_coeff_1_0 + +quant_coeff_15_14 + ldrh r2, [r0, #30] ; rc=15, i=15 + mov lr, #16 + cmp r2, #0 + bne end + + ldrh r3, [r0, #28] ; rc=14, i=14 + mov lr, #15 + cmp r3, #0 + bne end + +quant_coeff_13_11 + ldrh r2, [r0, #22] ; rc=11, i=13 + mov lr, #14 + cmp r2, #0 + bne end + +quant_coeff_12_7 + ldrh r3, [r0, #14] ; rc=7, i=12 + mov lr, #13 + cmp r3, #0 + bne end + + ldrh r2, [r0, #20] ; rc=10, i=11 + mov lr, #12 + cmp r2, #0 + bne end + +quant_coeff_10_9 + ldrh r3, [r0, #26] ; rc=13, i=10 + mov lr, #11 + cmp r3, #0 + bne end + + ldrh r2, [r0, #24] ; rc=12, i=9 + mov lr, #10 + cmp r2, #0 + bne end + +quant_coeff_8_3 + ldrh r3, [r0, #18] ; rc=9, i=8 + mov lr, #9 + cmp r3, #0 + bne end + + ldrh r2, [r0, #12] ; rc=6, i=7 + mov lr, #8 + cmp r2, #0 + bne end + +quant_coeff_6_5 + ldrh r3, [r0, #6] ; rc=3, i=6 + mov lr, #7 + cmp r3, #0 + bne end + + ldrh r2, [r0, #4] ; rc=2, i=5 + mov lr, #6 + cmp r2, #0 + bne end + +quant_coeff_4_2 + ldrh r3, [r0, #10] ; rc=5, i=4 + mov lr, #5 + cmp r3, #0 + bne end + + ldrh r2, [r0, #16] ; rc=8, i=3 + mov lr, #4 + cmp r2, #0 + bne end + + ldrh r3, [r0, #8] ; rc=4, i=2 + mov lr, #3 + cmp r3, #0 + bne end + +quant_coeff_1_0 + ldrh r2, [r0, #2] ; rc=1, i=1 + mov lr, #2 + cmp r2, #0 + bne end + + mov lr, #1 ; rc=0, i=0 + +end + str lr, [r11, #vp8_blockd_eob] + ldmfd sp!, {r1, r4-r11, pc} + + ENDP + +loop_count + DCD 0x1000000 + + END + diff --cc vp9/encoder/arm/armv6/vp8_subtract_armv6.asm index 0ca74387b,000000000..bb466c4e9 mode 100644,000000..100644 --- a/vp9/encoder/arm/armv6/vp8_subtract_armv6.asm +++ b/vp9/encoder/arm/armv6/vp8_subtract_armv6.asm @@@ -1,265 -1,0 +1,265 @@@ +; +; Copyright (c) 2011 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + + EXPORT |vp8_subtract_mby_armv6| + EXPORT |vp8_subtract_mbuv_armv6| + EXPORT |vp8_subtract_b_armv6| + - INCLUDE asm_enc_offsets.asm ++ INCLUDE vp9_asm_enc_offsets.asm + + ARM + REQUIRE8 + PRESERVE8 + + AREA ||.text||, CODE, READONLY, ALIGN=2 + +; r0 BLOCK *be +; r1 BLOCKD *bd +; r2 int pitch +|vp8_subtract_b_armv6| PROC + + stmfd sp!, {r4-r9} + + ldr r4, [r0, #vp8_block_base_src] + ldr r5, [r0, #vp8_block_src] + ldr r6, [r0, #vp8_block_src_diff] + + ldr r3, [r4] + ldr r7, [r0, #vp8_block_src_stride] + add r3, r3, r5 ; src = *base_src + src + ldr r8, [r1, #vp8_blockd_predictor] + + mov r9, #4 ; loop count + +loop_block + + ldr r0, [r3], r7 ; src + ldr r1, [r8], r2 ; pred + + uxtb16 r4, r0 ; [s2 | s0] + uxtb16 r5, r1 ; [p2 | p0] + uxtb16 r0, r0, ror #8 ; [s3 | s1] + uxtb16 r1, r1, ror #8 ; [p3 | p1] + + usub16 r4, r4, r5 ; [d2 | d0] + usub16 r5, r0, r1 ; [d3 | d1] + + subs r9, r9, #1 ; decrement loop counter + + pkhbt r0, r4, r5, lsl #16 ; [d1 | d0] + pkhtb r1, r5, r4, asr #16 ; [d3 | d2] + + str r0, [r6, #0] ; diff + str r1, [r6, #4] ; diff + + add r6, r6, r2, lsl #1 ; update diff pointer + bne loop_block + + ldmfd sp!, {r4-r9} + mov pc, lr + + ENDP + + +; r0 short *diff +; r1 unsigned char *usrc +; r2 unsigned char *vsrc +; r3 unsigned char *pred +; stack int stride +|vp8_subtract_mbuv_armv6| PROC + + stmfd sp!, {r4-r12, lr} + + add r0, r0, #512 ; set *diff point to Cb + add r3, r3, #256 ; set *pred point to Cb + + mov r4, #8 ; loop count + ldr r5, [sp, #40] ; stride + + ; Subtract U block +loop_u + ldr r6, [r1] ; src (A) + ldr r7, [r3], #4 ; pred (A) + + uxtb16 r8, r6 ; [s2 | s0] (A) + uxtb16 r9, r7 ; [p2 | p0] (A) + uxtb16 r10, r6, ror #8 ; [s3 | s1] (A) + uxtb16 r11, r7, ror #8 ; [p3 | p1] (A) + + usub16 r6, r8, r9 ; [d2 | d0] (A) + usub16 r7, r10, r11 ; [d3 | d1] (A) + + ldr r10, [r1, #4] ; src (B) + ldr r11, [r3], #4 ; pred (B) + + pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A) + pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A) + + str r8, [r0], #4 ; diff (A) + uxtb16 r8, r10 ; [s2 | s0] (B) + str r9, [r0], #4 ; diff (A) + + uxtb16 r9, r11 ; [p2 | p0] (B) + uxtb16 r10, r10, ror #8 ; [s3 | s1] (B) + uxtb16 r11, r11, ror #8 ; [p3 | p1] (B) + + usub16 r6, r8, r9 ; [d2 | d0] (B) + usub16 r7, r10, r11 ; [d3 | d1] (B) + + add r1, r1, r5 ; update usrc pointer + + pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B) + pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B) + + str r8, [r0], #4 ; diff (B) + subs r4, r4, #1 ; update loop counter + str r9, [r0], #4 ; diff (B) + + bne loop_u + + mov r4, #8 ; loop count + + ; Subtract V block +loop_v + ldr r6, [r2] ; src (A) + ldr r7, [r3], #4 ; pred (A) + + uxtb16 r8, r6 ; [s2 | s0] (A) + uxtb16 r9, r7 ; [p2 | p0] (A) + uxtb16 r10, r6, ror #8 ; [s3 | s1] (A) + uxtb16 r11, r7, ror #8 ; [p3 | p1] (A) + + usub16 r6, r8, r9 ; [d2 | d0] (A) + usub16 r7, r10, r11 ; [d3 | d1] (A) + + ldr r10, [r2, #4] ; src (B) + ldr r11, [r3], #4 ; pred (B) + + pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A) + pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A) + + str r8, [r0], #4 ; diff (A) + uxtb16 r8, r10 ; [s2 | s0] (B) + str r9, [r0], #4 ; diff (A) + + uxtb16 r9, r11 ; [p2 | p0] (B) + uxtb16 r10, r10, ror #8 ; [s3 | s1] (B) + uxtb16 r11, r11, ror #8 ; [p3 | p1] (B) + + usub16 r6, r8, r9 ; [d2 | d0] (B) + usub16 r7, r10, r11 ; [d3 | d1] (B) + + add r2, r2, r5 ; update vsrc pointer + + pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B) + pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B) + + str r8, [r0], #4 ; diff (B) + subs r4, r4, #1 ; update loop counter + str r9, [r0], #4 ; diff (B) + + bne loop_v + + ldmfd sp!, {r4-r12, pc} + + ENDP + + +; r0 short *diff +; r1 unsigned char *src +; r2 unsigned char *pred +; r3 int stride +|vp8_subtract_mby_armv6| PROC + + stmfd sp!, {r4-r11} + + mov r4, #16 +loop + ldr r6, [r1] ; src (A) + ldr r7, [r2], #4 ; pred (A) + + uxtb16 r8, r6 ; [s2 | s0] (A) + uxtb16 r9, r7 ; [p2 | p0] (A) + uxtb16 r10, r6, ror #8 ; [s3 | s1] (A) + uxtb16 r11, r7, ror #8 ; [p3 | p1] (A) + + usub16 r6, r8, r9 ; [d2 | d0] (A) + usub16 r7, r10, r11 ; [d3 | d1] (A) + + ldr r10, [r1, #4] ; src (B) + ldr r11, [r2], #4 ; pred (B) + + pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A) + pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A) + + str r8, [r0], #4 ; diff (A) + uxtb16 r8, r10 ; [s2 | s0] (B) + str r9, [r0], #4 ; diff (A) + + uxtb16 r9, r11 ; [p2 | p0] (B) + uxtb16 r10, r10, ror #8 ; [s3 | s1] (B) + uxtb16 r11, r11, ror #8 ; [p3 | p1] (B) + + usub16 r6, r8, r9 ; [d2 | d0] (B) + usub16 r7, r10, r11 ; [d3 | d1] (B) + + ldr r10, [r1, #8] ; src (C) + ldr r11, [r2], #4 ; pred (C) + + pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B) + pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B) + + str r8, [r0], #4 ; diff (B) + uxtb16 r8, r10 ; [s2 | s0] (C) + str r9, [r0], #4 ; diff (B) + + uxtb16 r9, r11 ; [p2 | p0] (C) + uxtb16 r10, r10, ror #8 ; [s3 | s1] (C) + uxtb16 r11, r11, ror #8 ; [p3 | p1] (C) + + usub16 r6, r8, r9 ; [d2 | d0] (C) + usub16 r7, r10, r11 ; [d3 | d1] (C) + + ldr r10, [r1, #12] ; src (D) + ldr r11, [r2], #4 ; pred (D) + + pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (C) + pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (C) + + str r8, [r0], #4 ; diff (C) + uxtb16 r8, r10 ; [s2 | s0] (D) + str r9, [r0], #4 ; diff (C) + + uxtb16 r9, r11 ; [p2 | p0] (D) + uxtb16 r10, r10, ror #8 ; [s3 | s1] (D) + uxtb16 r11, r11, ror #8 ; [p3 | p1] (D) + + usub16 r6, r8, r9 ; [d2 | d0] (D) + usub16 r7, r10, r11 ; [d3 | d1] (D) + + add r1, r1, r3 ; update src pointer + + pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (D) + pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (D) + + str r8, [r0], #4 ; diff (D) + subs r4, r4, #1 ; update loop counter + str r9, [r0], #4 ; diff (D) + + bne loop + + ldmfd sp!, {r4-r11} + mov pc, lr + + ENDP + + END + diff --cc vp9/encoder/arm/neon/fastquantizeb_neon.asm index 259707658,000000000..fa3aff8ac mode 100644,000000..100644 --- a/vp9/encoder/arm/neon/fastquantizeb_neon.asm +++ b/vp9/encoder/arm/neon/fastquantizeb_neon.asm @@@ -1,261 -1,0 +1,261 @@@ +; +; Copyright (c) 2011 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + + EXPORT |vp8_fast_quantize_b_neon| + EXPORT |vp8_fast_quantize_b_pair_neon| + - INCLUDE asm_enc_offsets.asm ++ INCLUDE vp9_asm_enc_offsets.asm + + ARM + REQUIRE8 + PRESERVE8 + + AREA ||.text||, CODE, READONLY, ALIGN=4 + +;vp8_fast_quantize_b_pair_neon(BLOCK *b1, BLOCK *b2, BLOCKD *d1, BLOCKD *d2); +|vp8_fast_quantize_b_pair_neon| PROC + + stmfd sp!, {r4-r9} + vstmdb sp!, {q4-q7} + + ldr r4, [r0, #vp8_block_coeff] + ldr r5, [r0, #vp8_block_quant_fast] + ldr r6, [r0, #vp8_block_round] + + vld1.16 {q0, q1}, [r4@128] ; load z + + ldr r7, [r2, #vp8_blockd_qcoeff] + + vabs.s16 q4, q0 ; calculate x = abs(z) + vabs.s16 q5, q1 + + ;right shift 15 to get sign, all 0 if it is positive, all 1 if it is negative + vshr.s16 q2, q0, #15 ; sz + vshr.s16 q3, q1, #15 + + vld1.s16 {q6, q7}, [r6@128] ; load round_ptr [0-15] + vld1.s16 {q8, q9}, [r5@128] ; load quant_ptr [0-15] + + ldr r4, [r1, #vp8_block_coeff] + + vadd.s16 q4, q6 ; x + Round + vadd.s16 q5, q7 + + vld1.16 {q0, q1}, [r4@128] ; load z2 + + vqdmulh.s16 q4, q8 ; y = ((Round+abs(z)) * Quant) >> 16 + vqdmulh.s16 q5, q9 + + vabs.s16 q10, q0 ; calculate x2 = abs(z_2) + vabs.s16 q11, q1 + vshr.s16 q12, q0, #15 ; sz2 + vshr.s16 q13, q1, #15 + + ;modify data to have its original sign + veor.s16 q4, q2 ; y^sz + veor.s16 q5, q3 + + vadd.s16 q10, q6 ; x2 + Round + vadd.s16 q11, q7 + + ldr r8, [r2, #vp8_blockd_dequant] + + vqdmulh.s16 q10, q8 ; y2 = ((Round+abs(z)) * Quant) >> 16 + vqdmulh.s16 q11, q9 + + vshr.s16 q4, #1 ; right shift 1 after vqdmulh + vshr.s16 q5, #1 + + vld1.s16 {q6, q7}, [r8@128] ;load dequant_ptr[i] + + vsub.s16 q4, q2 ; x1=(y^sz)-sz = (y^sz)-(-1) (2's complement) + vsub.s16 q5, q3 + + vshr.s16 q10, #1 ; right shift 1 after vqdmulh + vshr.s16 q11, #1 + + ldr r9, [r2, #vp8_blockd_dqcoeff] + + veor.s16 q10, q12 ; y2^sz2 + veor.s16 q11, q13 + + vst1.s16 {q4, q5}, [r7] ; store: qcoeff = x1 + + + vsub.s16 q10, q12 ; x2=(y^sz)-sz = (y^sz)-(-1) (2's complement) + vsub.s16 q11, q13 + + ldr r6, [r3, #vp8_blockd_qcoeff] + + vmul.s16 q2, q6, q4 ; x * Dequant + vmul.s16 q3, q7, q5 + + ldr r0, _inv_zig_zag_ ; load ptr of inverse zigzag table + + vceq.s16 q8, q8 ; set q8 to all 1 + + vst1.s16 {q10, q11}, [r6] ; store: qcoeff = x2 + + vmul.s16 q12, q6, q10 ; x2 * Dequant + vmul.s16 q13, q7, q11 + + vld1.16 {q6, q7}, [r0@128] ; load inverse scan order + + vtst.16 q14, q4, q8 ; now find eob + vtst.16 q15, q5, q8 ; non-zero element is set to all 1 + + vst1.s16 {q2, q3}, [r9] ; store dqcoeff = x * Dequant + + ldr r7, [r3, #vp8_blockd_dqcoeff] + + vand q0, q6, q14 ; get all valid numbers from scan array + vand q1, q7, q15 + + vst1.s16 {q12, q13}, [r7] ; store dqcoeff = x * Dequant + + vtst.16 q2, q10, q8 ; now find eob + vtst.16 q3, q11, q8 ; non-zero element is set to all 1 + + vmax.u16 q0, q0, q1 ; find maximum value in q0, q1 + + vand q10, q6, q2 ; get all valid numbers from scan array + vand q11, q7, q3 + vmax.u16 q10, q10, q11 ; find maximum value in q10, q11 + + vmax.u16 d0, d0, d1 + vmax.u16 d20, d20, d21 + vmovl.u16 q0, d0 + vmovl.u16 q10, d20 + + + vmax.u32 d0, d0, d1 + vmax.u32 d20, d20, d21 + vpmax.u32 d0, d0, d0 + vpmax.u32 d20, d20, d20 + + add r4, r2, #vp8_blockd_eob + add r5, r3, #vp8_blockd_eob + + vst1.32 {d0[0]}, [r4@32] + vst1.32 {d20[0]}, [r5@32] + + vldmia sp!, {q4-q7} + ldmfd sp!, {r4-r9} + bx lr + + ENDP + +;void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d) +|vp8_fast_quantize_b_neon| PROC + + stmfd sp!, {r4-r7} + + ldr r3, [r0, #vp8_block_coeff] + ldr r4, [r0, #vp8_block_quant_fast] + ldr r5, [r0, #vp8_block_round] + + vld1.16 {q0, q1}, [r3@128] ; load z + vorr.s16 q14, q0, q1 ; check if all zero (step 1) + ldr r6, [r1, #vp8_blockd_qcoeff] + ldr r7, [r1, #vp8_blockd_dqcoeff] + vorr.s16 d28, d28, d29 ; check if all zero (step 2) + + vabs.s16 q12, q0 ; calculate x = abs(z) + vabs.s16 q13, q1 + + ;right shift 15 to get sign, all 0 if it is positive, all 1 if it is negative + vshr.s16 q2, q0, #15 ; sz + vmov r2, r3, d28 ; check if all zero (step 3) + vshr.s16 q3, q1, #15 + + vld1.s16 {q14, q15}, [r5@128]; load round_ptr [0-15] + vld1.s16 {q8, q9}, [r4@128] ; load quant_ptr [0-15] + + vadd.s16 q12, q14 ; x + Round + vadd.s16 q13, q15 + + ldr r0, _inv_zig_zag_ ; load ptr of inverse zigzag table + + vqdmulh.s16 q12, q8 ; y = ((Round+abs(z)) * Quant) >> 16 + vqdmulh.s16 q13, q9 + + vld1.16 {q10, q11}, [r0@128]; load inverse scan order + + vceq.s16 q8, q8 ; set q8 to all 1 + + ldr r4, [r1, #vp8_blockd_dequant] + + vshr.s16 q12, #1 ; right shift 1 after vqdmulh + vshr.s16 q13, #1 + + orr r2, r2, r3 ; check if all zero (step 4) + cmp r2, #0 ; check if all zero (step 5) + beq zero_output ; check if all zero (step 6) + + ;modify data to have its original sign + veor.s16 q12, q2 ; y^sz + veor.s16 q13, q3 + + vsub.s16 q12, q2 ; x1=(y^sz)-sz = (y^sz)-(-1) (2's complement) + vsub.s16 q13, q3 + + vld1.s16 {q2, q3}, [r4@128] ; load dequant_ptr[i] + + vtst.16 q14, q12, q8 ; now find eob + vtst.16 q15, q13, q8 ; non-zero element is set to all 1 + + vst1.s16 {q12, q13}, [r6@128]; store: qcoeff = x1 + + vand q10, q10, q14 ; get all valid numbers from scan array + vand q11, q11, q15 + + + vmax.u16 q0, q10, q11 ; find maximum value in q0, q1 + vmax.u16 d0, d0, d1 + vmovl.u16 q0, d0 + + vmul.s16 q2, q12 ; x * Dequant + vmul.s16 q3, q13 + + vmax.u32 d0, d0, d1 + vpmax.u32 d0, d0, d0 + + vst1.s16 {q2, q3}, [r7@128] ; store dqcoeff = x * Dequant + + add r4, r1, #vp8_blockd_eob + vst1.32 {d0[0]}, [r4@32] + + ldmfd sp!, {r4-r7} + bx lr + +zero_output + str r2, [r1, #vp8_blockd_eob] + vst1.s16 {q0, q1}, [r6@128] ; qcoeff = 0 + vst1.s16 {q0, q1}, [r7@128] ; dqcoeff = 0 + + ldmfd sp!, {r4-r7} + bx lr + + ENDP + +; default inverse zigzag table is defined in vp9/common/entropy.c +_inv_zig_zag_ + DCD inv_zig_zag + + ALIGN 16 ; enable use of @128 bit aligned loads +inv_zig_zag + DCW 0x0001, 0x0002, 0x0006, 0x0007 + DCW 0x0003, 0x0005, 0x0008, 0x000d + DCW 0x0004, 0x0009, 0x000c, 0x000e + DCW 0x000a, 0x000b, 0x000f, 0x0010 + + END + diff --cc vp9/encoder/arm/neon/subtract_neon.asm index 68c295062,000000000..eab14868e mode 100644,000000..100644 --- a/vp9/encoder/arm/neon/subtract_neon.asm +++ b/vp9/encoder/arm/neon/subtract_neon.asm @@@ -1,185 -1,0 +1,185 @@@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + EXPORT |vp8_subtract_b_neon| + EXPORT |vp8_subtract_mby_neon| + EXPORT |vp8_subtract_mbuv_neon| + - INCLUDE asm_enc_offsets.asm ++ INCLUDE vp9_asm_enc_offsets.asm + + ARM + REQUIRE8 + PRESERVE8 + + AREA ||.text||, CODE, READONLY, ALIGN=2 + +;void vp8_subtract_b_neon(BLOCK *be, BLOCKD *bd, int pitch) +|vp8_subtract_b_neon| PROC + + stmfd sp!, {r4-r7} + + ldr r3, [r0, #vp8_block_base_src] + ldr r4, [r0, #vp8_block_src] + ldr r5, [r0, #vp8_block_src_diff] + ldr r3, [r3] + ldr r6, [r0, #vp8_block_src_stride] + add r3, r3, r4 ; src = *base_src + src + ldr r7, [r1, #vp8_blockd_predictor] + + vld1.8 {d0}, [r3], r6 ;load src + vld1.8 {d1}, [r7], r2 ;load pred + vld1.8 {d2}, [r3], r6 + vld1.8 {d3}, [r7], r2 + vld1.8 {d4}, [r3], r6 + vld1.8 {d5}, [r7], r2 + vld1.8 {d6}, [r3], r6 + vld1.8 {d7}, [r7], r2 + + vsubl.u8 q10, d0, d1 + vsubl.u8 q11, d2, d3 + vsubl.u8 q12, d4, d5 + vsubl.u8 q13, d6, d7 + + mov r2, r2, lsl #1 + + vst1.16 {d20}, [r5], r2 ;store diff + vst1.16 {d22}, [r5], r2 + vst1.16 {d24}, [r5], r2 + vst1.16 {d26}, [r5], r2 + + ldmfd sp!, {r4-r7} + bx lr + + ENDP + + +;========================================== +;void vp8_subtract_mby_neon(short *diff, unsigned char *src, unsigned char *pred, int stride) +|vp8_subtract_mby_neon| PROC + mov r12, #4 + +subtract_mby_loop + vld1.8 {q0}, [r1], r3 ;load src + vld1.8 {q1}, [r2]! ;load pred + vld1.8 {q2}, [r1], r3 + vld1.8 {q3}, [r2]! + vld1.8 {q4}, [r1], r3 + vld1.8 {q5}, [r2]! + vld1.8 {q6}, [r1], r3 + vld1.8 {q7}, [r2]! + + vsubl.u8 q8, d0, d2 + vsubl.u8 q9, d1, d3 + vsubl.u8 q10, d4, d6 + vsubl.u8 q11, d5, d7 + vsubl.u8 q12, d8, d10 + vsubl.u8 q13, d9, d11 + vsubl.u8 q14, d12, d14 + vsubl.u8 q15, d13, d15 + + vst1.16 {q8}, [r0]! ;store diff + vst1.16 {q9}, [r0]! + vst1.16 {q10}, [r0]! + vst1.16 {q11}, [r0]! + vst1.16 {q12}, [r0]! + vst1.16 {q13}, [r0]! + vst1.16 {q14}, [r0]! + vst1.16 {q15}, [r0]! + + subs r12, r12, #1 + bne subtract_mby_loop + + bx lr + ENDP + +;================================= +;void vp8_subtract_mbuv_neon(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride) +|vp8_subtract_mbuv_neon| PROC + ldr r12, [sp] + +;u + add r0, r0, #512 ; short *udiff = diff + 256; + add r3, r3, #256 ; unsigned char *upred = pred + 256; + + vld1.8 {d0}, [r1], r12 ;load src + vld1.8 {d1}, [r3]! ;load pred + vld1.8 {d2}, [r1], r12 + vld1.8 {d3}, [r3]! + vld1.8 {d4}, [r1], r12 + vld1.8 {d5}, [r3]! + vld1.8 {d6}, [r1], r12 + vld1.8 {d7}, [r3]! + vld1.8 {d8}, [r1], r12 + vld1.8 {d9}, [r3]! + vld1.8 {d10}, [r1], r12 + vld1.8 {d11}, [r3]! + vld1.8 {d12}, [r1], r12 + vld1.8 {d13}, [r3]! + vld1.8 {d14}, [r1], r12 + vld1.8 {d15}, [r3]! + + vsubl.u8 q8, d0, d1 + vsubl.u8 q9, d2, d3 + vsubl.u8 q10, d4, d5 + vsubl.u8 q11, d6, d7 + vsubl.u8 q12, d8, d9 + vsubl.u8 q13, d10, d11 + vsubl.u8 q14, d12, d13 + vsubl.u8 q15, d14, d15 + + vst1.16 {q8}, [r0]! ;store diff + vst1.16 {q9}, [r0]! + vst1.16 {q10}, [r0]! + vst1.16 {q11}, [r0]! + vst1.16 {q12}, [r0]! + vst1.16 {q13}, [r0]! + vst1.16 {q14}, [r0]! + vst1.16 {q15}, [r0]! + +;v + vld1.8 {d0}, [r2], r12 ;load src + vld1.8 {d1}, [r3]! ;load pred + vld1.8 {d2}, [r2], r12 + vld1.8 {d3}, [r3]! + vld1.8 {d4}, [r2], r12 + vld1.8 {d5}, [r3]! + vld1.8 {d6}, [r2], r12 + vld1.8 {d7}, [r3]! + vld1.8 {d8}, [r2], r12 + vld1.8 {d9}, [r3]! + vld1.8 {d10}, [r2], r12 + vld1.8 {d11}, [r3]! + vld1.8 {d12}, [r2], r12 + vld1.8 {d13}, [r3]! + vld1.8 {d14}, [r2], r12 + vld1.8 {d15}, [r3]! + + vsubl.u8 q8, d0, d1 + vsubl.u8 q9, d2, d3 + vsubl.u8 q10, d4, d5 + vsubl.u8 q11, d6, d7 + vsubl.u8 q12, d8, d9 + vsubl.u8 q13, d10, d11 + vsubl.u8 q14, d12, d13 + vsubl.u8 q15, d14, d15 + + vst1.16 {q8}, [r0]! ;store diff + vst1.16 {q9}, [r0]! + vst1.16 {q10}, [r0]! + vst1.16 {q11}, [r0]! + vst1.16 {q12}, [r0]! + vst1.16 {q13}, [r0]! + vst1.16 {q14}, [r0]! + vst1.16 {q15}, [r0]! + + bx lr + ENDP + + END diff --cc vp9/encoder/x86/quantize_sse2.asm index 9b563c514,000000000..af6aa6b3b mode 100644,000000..100644 --- a/vp9/encoder/x86/quantize_sse2.asm +++ b/vp9/encoder/x86/quantize_sse2.asm @@@ -1,380 -1,0 +1,380 @@@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license and patent +; grant that can be found in the LICENSE file in the root of the source +; tree. All contributing project authors may be found in the AUTHORS +; file in the root of the source tree. +; + + +%include "vpx_ports/x86_abi_support.asm" - %include "asm_enc_offsets.asm" ++%include "vp9_asm_enc_offsets.asm" + + +; void vp9_regular_quantize_b_sse2 | arg +; (BLOCK *b, | 0 +; BLOCKD *d) | 1 + +global sym(vp9_regular_quantize_b_sse2) +sym(vp9_regular_quantize_b_sse2): + push rbp + mov rbp, rsp + SAVE_XMM 7 + GET_GOT rbx + +%if ABI_IS_32BIT + push rdi + push rsi +%else + %ifidn __OUTPUT_FORMAT__,x64 + push rdi + push rsi + %endif +%endif + + ALIGN_STACK 16, rax + %define zrun_zbin_boost 0 ; 8 + %define abs_minus_zbin 8 ; 32 + %define temp_qcoeff 40 ; 32 + %define qcoeff 72 ; 32 + %define stack_size 104 + sub rsp, stack_size + ; end prolog + +%if ABI_IS_32BIT + mov rdi, arg(0) ; BLOCK *b + mov rsi, arg(1) ; BLOCKD *d +%else + %ifidn __OUTPUT_FORMAT__,x64 + mov rdi, rcx ; BLOCK *b + mov rsi, rdx ; BLOCKD *d + %else + ;mov rdi, rdi ; BLOCK *b + ;mov rsi, rsi ; BLOCKD *d + %endif +%endif + + mov rdx, [rdi + vp9_block_coeff] ; coeff_ptr + mov rcx, [rdi + vp9_block_zbin] ; zbin_ptr + movd xmm7, [rdi + vp9_block_zbin_extra] ; zbin_oq_value + + ; z + movdqa xmm0, [rdx] + movdqa xmm4, [rdx + 16] + mov rdx, [rdi + vp9_block_round] ; round_ptr + + pshuflw xmm7, xmm7, 0 + punpcklwd xmm7, xmm7 ; duplicated zbin_oq_value + + movdqa xmm1, xmm0 + movdqa xmm5, xmm4 + + ; sz + psraw xmm0, 15 + psraw xmm4, 15 + + ; (z ^ sz) + pxor xmm1, xmm0 + pxor xmm5, xmm4 + + ; x = abs(z) + psubw xmm1, xmm0 + psubw xmm5, xmm4 + + movdqa xmm2, [rcx] + movdqa xmm3, [rcx + 16] + mov rcx, [rdi + vp9_block_quant] ; quant_ptr + + ; *zbin_ptr + zbin_oq_value + paddw xmm2, xmm7 + paddw xmm3, xmm7 + + ; x - (*zbin_ptr + zbin_oq_value) + psubw xmm1, xmm2 + psubw xmm5, xmm3 + movdqa [rsp + abs_minus_zbin], xmm1 + movdqa [rsp + abs_minus_zbin + 16], xmm5 + + ; add (zbin_ptr + zbin_oq_value) back + paddw xmm1, xmm2 + paddw xmm5, xmm3 + + movdqa xmm2, [rdx] + movdqa xmm6, [rdx + 16] + + movdqa xmm3, [rcx] + movdqa xmm7, [rcx + 16] + + ; x + round + paddw xmm1, xmm2 + paddw xmm5, xmm6 + + ; y = x * quant_ptr >> 16 + pmulhw xmm3, xmm1 + pmulhw xmm7, xmm5 + + ; y += x + paddw xmm1, xmm3 + paddw xmm5, xmm7 + + movdqa [rsp + temp_qcoeff], xmm1 + movdqa [rsp + temp_qcoeff + 16], xmm5 + + pxor xmm6, xmm6 + ; zero qcoeff + movdqa [rsp + qcoeff], xmm6 + movdqa [rsp + qcoeff + 16], xmm6 + + mov rdx, [rdi + vp9_block_zrun_zbin_boost] ; zbin_boost_ptr + mov rax, [rdi + vp9_block_quant_shift] ; quant_shift_ptr + mov [rsp + zrun_zbin_boost], rdx + +%macro ZIGZAG_LOOP 1 + ; x + movsx ecx, WORD PTR[rsp + abs_minus_zbin + %1 * 2] + + ; if (x >= zbin) + sub cx, WORD PTR[rdx] ; x - zbin + lea rdx, [rdx + 2] ; zbin_boost_ptr++ + jl .rq_zigzag_loop_%1 ; x < zbin + + movsx edi, WORD PTR[rsp + temp_qcoeff + %1 * 2] + + ; downshift by quant_shift[rc] + movsx cx, BYTE PTR[rax + %1] ; quant_shift_ptr[rc] + sar edi, cl ; also sets Z bit + je .rq_zigzag_loop_%1 ; !y + mov WORD PTR[rsp + qcoeff + %1 * 2], di ;qcoeff_ptr[rc] = temp_qcoeff[rc] + mov rdx, [rsp + zrun_zbin_boost] ; reset to b->zrun_zbin_boost +.rq_zigzag_loop_%1: +%endmacro +; in vp9_default_zig_zag1d order: see vp9/common/entropy.c +ZIGZAG_LOOP 0 +ZIGZAG_LOOP 1 +ZIGZAG_LOOP 4 +ZIGZAG_LOOP 8 +ZIGZAG_LOOP 5 +ZIGZAG_LOOP 2 +ZIGZAG_LOOP 3 +ZIGZAG_LOOP 6 +ZIGZAG_LOOP 9 +ZIGZAG_LOOP 12 +ZIGZAG_LOOP 13 +ZIGZAG_LOOP 10 +ZIGZAG_LOOP 7 +ZIGZAG_LOOP 11 +ZIGZAG_LOOP 14 +ZIGZAG_LOOP 15 + + movdqa xmm2, [rsp + qcoeff] + movdqa xmm3, [rsp + qcoeff + 16] + + mov rcx, [rsi + vp9_blockd_dequant] ; dequant_ptr + mov rdi, [rsi + vp9_blockd_dqcoeff] ; dqcoeff_ptr + + ; y ^ sz + pxor xmm2, xmm0 + pxor xmm3, xmm4 + ; x = (y ^ sz) - sz + psubw xmm2, xmm0 + psubw xmm3, xmm4 + + ; dequant + movdqa xmm0, [rcx] + movdqa xmm1, [rcx + 16] + + mov rcx, [rsi + vp9_blockd_qcoeff] ; qcoeff_ptr + + pmullw xmm0, xmm2 + pmullw xmm1, xmm3 + + movdqa [rcx], xmm2 ; store qcoeff + movdqa [rcx + 16], xmm3 + movdqa [rdi], xmm0 ; store dqcoeff + movdqa [rdi + 16], xmm1 + + ; select the last value (in zig_zag order) for EOB + pcmpeqw xmm2, xmm6 + pcmpeqw xmm3, xmm6 + ; ! + pcmpeqw xmm6, xmm6 + pxor xmm2, xmm6 + pxor xmm3, xmm6 + ; mask inv_zig_zag + pand xmm2, [GLOBAL(inv_zig_zag)] + pand xmm3, [GLOBAL(inv_zig_zag + 16)] + ; select the max value + pmaxsw xmm2, xmm3 + pshufd xmm3, xmm2, 00001110b + pmaxsw xmm2, xmm3 + pshuflw xmm3, xmm2, 00001110b + pmaxsw xmm2, xmm3 + pshuflw xmm3, xmm2, 00000001b + pmaxsw xmm2, xmm3 + movd eax, xmm2 + and eax, 0xff + mov [rsi + vp9_blockd_eob], eax + + ; begin epilog + add rsp, stack_size + pop rsp +%if ABI_IS_32BIT + pop rsi + pop rdi +%else + %ifidn __OUTPUT_FORMAT__,x64 + pop rsi + pop rdi + %endif +%endif + RESTORE_GOT + RESTORE_XMM + pop rbp + ret + +; void vp9_fast_quantize_b_sse2 | arg +; (BLOCK *b, | 0 +; BLOCKD *d) | 1 + +global sym(vp9_fast_quantize_b_sse2) +sym(vp9_fast_quantize_b_sse2): + push rbp + mov rbp, rsp + GET_GOT rbx + +%if ABI_IS_32BIT + push rdi + push rsi +%else + %ifidn __OUTPUT_FORMAT__,x64 + push rdi + push rsi + %else + ; these registers are used for passing arguments + %endif +%endif + + ; end prolog + +%if ABI_IS_32BIT + mov rdi, arg(0) ; BLOCK *b + mov rsi, arg(1) ; BLOCKD *d +%else + %ifidn __OUTPUT_FORMAT__,x64 + mov rdi, rcx ; BLOCK *b + mov rsi, rdx ; BLOCKD *d + %else + ;mov rdi, rdi ; BLOCK *b + ;mov rsi, rsi ; BLOCKD *d + %endif +%endif + + mov rax, [rdi + vp9_block_coeff] + mov rcx, [rdi + vp9_block_round] + mov rdx, [rdi + vp9_block_quant_fast] + + ; z = coeff + movdqa xmm0, [rax] + movdqa xmm4, [rax + 16] + + ; dup z so we can save sz + movdqa xmm1, xmm0 + movdqa xmm5, xmm4 + + ; sz = z >> 15 + psraw xmm0, 15 + psraw xmm4, 15 + + ; x = abs(z) = (z ^ sz) - sz + pxor xmm1, xmm0 + pxor xmm5, xmm4 + psubw xmm1, xmm0 + psubw xmm5, xmm4 + + ; x += round + paddw xmm1, [rcx] + paddw xmm5, [rcx + 16] + + mov rax, [rsi + vp9_blockd_qcoeff] + mov rcx, [rsi + vp9_blockd_dequant] + mov rdi, [rsi + vp9_blockd_dqcoeff] + + ; y = x * quant >> 16 + pmulhw xmm1, [rdx] + pmulhw xmm5, [rdx + 16] + + ; x = (y ^ sz) - sz + pxor xmm1, xmm0 + pxor xmm5, xmm4 + psubw xmm1, xmm0 + psubw xmm5, xmm4 + + ; qcoeff = x + movdqa [rax], xmm1 + movdqa [rax + 16], xmm5 + + ; x * dequant + movdqa xmm2, xmm1 + movdqa xmm3, xmm5 + pmullw xmm2, [rcx] + pmullw xmm3, [rcx + 16] + + ; dqcoeff = x * dequant + movdqa [rdi], xmm2 + movdqa [rdi + 16], xmm3 + + pxor xmm4, xmm4 ;clear all bits + pcmpeqw xmm1, xmm4 + pcmpeqw xmm5, xmm4 + + pcmpeqw xmm4, xmm4 ;set all bits + pxor xmm1, xmm4 + pxor xmm5, xmm4 + + pand xmm1, [GLOBAL(inv_zig_zag)] + pand xmm5, [GLOBAL(inv_zig_zag + 16)] + + pmaxsw xmm1, xmm5 + + ; now down to 8 + pshufd xmm5, xmm1, 00001110b + + pmaxsw xmm1, xmm5 + + ; only 4 left + pshuflw xmm5, xmm1, 00001110b + + pmaxsw xmm1, xmm5 + + ; okay, just 2! + pshuflw xmm5, xmm1, 00000001b + + pmaxsw xmm1, xmm5 + + movd eax, xmm1 + and eax, 0xff + mov [rsi + vp9_blockd_eob], eax + + ; begin epilog +%if ABI_IS_32BIT + pop rsi + pop rdi +%else + %ifidn __OUTPUT_FORMAT__,x64 + pop rsi + pop rdi + %endif +%endif + + RESTORE_GOT + pop rbp + ret + +SECTION_RODATA +align 16 +inv_zig_zag: + dw 0x0001, 0x0002, 0x0006, 0x0007 + dw 0x0003, 0x0005, 0x0008, 0x000d + dw 0x0004, 0x0009, 0x000c, 0x000e + dw 0x000a, 0x000b, 0x000f, 0x0010 diff --cc vp9/encoder/x86/quantize_sse4.asm index c7429ecd3,000000000..4245b947c mode 100644,000000..100644 --- a/vp9/encoder/x86/quantize_sse4.asm +++ b/vp9/encoder/x86/quantize_sse4.asm @@@ -1,254 -1,0 +1,254 @@@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license and patent +; grant that can be found in the LICENSE file in the root of the source +; tree. All contributing project authors may be found in the AUTHORS +; file in the root of the source tree. +; + + +%include "vpx_ports/x86_abi_support.asm" - %include "asm_enc_offsets.asm" ++%include "vp9_asm_enc_offsets.asm" + + +; void vp9_regular_quantize_b_sse4 | arg +; (BLOCK *b, | 0 +; BLOCKD *d) | 1 + +global sym(vp9_regular_quantize_b_sse4) +sym(vp9_regular_quantize_b_sse4): + +%if ABI_IS_32BIT + push rbp + mov rbp, rsp + GET_GOT rbx + push rdi + push rsi + + ALIGN_STACK 16, rax + %define qcoeff 0 ; 32 + %define stack_size 32 + sub rsp, stack_size +%else + %ifidn __OUTPUT_FORMAT__,x64 + SAVE_XMM 8, u + push rdi + push rsi + %endif +%endif + ; end prolog + +%if ABI_IS_32BIT + mov rdi, arg(0) ; BLOCK *b + mov rsi, arg(1) ; BLOCKD *d +%else + %ifidn __OUTPUT_FORMAT__,x64 + mov rdi, rcx ; BLOCK *b + mov rsi, rdx ; BLOCKD *d + %else + ;mov rdi, rdi ; BLOCK *b + ;mov rsi, rsi ; BLOCKD *d + %endif +%endif + + mov rax, [rdi + vp9_block_coeff] + mov rcx, [rdi + vp9_block_zbin] + mov rdx, [rdi + vp9_block_round] + movd xmm7, [rdi + vp9_block_zbin_extra] + + ; z + movdqa xmm0, [rax] + movdqa xmm1, [rax + 16] + + ; duplicate zbin_oq_value + pshuflw xmm7, xmm7, 0 + punpcklwd xmm7, xmm7 + + movdqa xmm2, xmm0 + movdqa xmm3, xmm1 + + ; sz + psraw xmm0, 15 + psraw xmm1, 15 + + ; (z ^ sz) + pxor xmm2, xmm0 + pxor xmm3, xmm1 + + ; x = abs(z) + psubw xmm2, xmm0 + psubw xmm3, xmm1 + + ; zbin + movdqa xmm4, [rcx] + movdqa xmm5, [rcx + 16] + + ; *zbin_ptr + zbin_oq_value + paddw xmm4, xmm7 + paddw xmm5, xmm7 + + movdqa xmm6, xmm2 + movdqa xmm7, xmm3 + + ; x - (*zbin_ptr + zbin_oq_value) + psubw xmm6, xmm4 + psubw xmm7, xmm5 + + ; round + movdqa xmm4, [rdx] + movdqa xmm5, [rdx + 16] + + mov rax, [rdi + vp9_block_quant_shift] + mov rcx, [rdi + vp9_block_quant] + mov rdx, [rdi + vp9_block_zrun_zbin_boost] + + ; x + round + paddw xmm2, xmm4 + paddw xmm3, xmm5 + + ; quant + movdqa xmm4, [rcx] + movdqa xmm5, [rcx + 16] + + ; y = x * quant_ptr >> 16 + pmulhw xmm4, xmm2 + pmulhw xmm5, xmm3 + + ; y += x + paddw xmm2, xmm4 + paddw xmm3, xmm5 + + pxor xmm4, xmm4 +%if ABI_IS_32BIT + movdqa [rsp + qcoeff], xmm4 + movdqa [rsp + qcoeff + 16], xmm4 +%else + pxor xmm8, xmm8 +%endif + + ; quant_shift + movdqa xmm5, [rax] + + ; zrun_zbin_boost + mov rax, rdx + +%macro ZIGZAG_LOOP 5 + ; x + pextrw ecx, %4, %2 + + ; if (x >= zbin) + sub cx, WORD PTR[rdx] ; x - zbin + lea rdx, [rdx + 2] ; zbin_boost_ptr++ + jl .rq_zigzag_loop_%1 ; x < zbin + + pextrw edi, %3, %2 ; y + + ; downshift by quant_shift[rc] + pextrb ecx, xmm5, %1 ; quant_shift[rc] + sar edi, cl ; also sets Z bit + je .rq_zigzag_loop_%1 ; !y +%if ABI_IS_32BIT + mov WORD PTR[rsp + qcoeff + %1 *2], di +%else + pinsrw %5, edi, %2 ; qcoeff[rc] +%endif + mov rdx, rax ; reset to b->zrun_zbin_boost +.rq_zigzag_loop_%1: +%endmacro +; in vp9_default_zig_zag1d order: see vp9/common/entropy.c +ZIGZAG_LOOP 0, 0, xmm2, xmm6, xmm4 +ZIGZAG_LOOP 1, 1, xmm2, xmm6, xmm4 +ZIGZAG_LOOP 4, 4, xmm2, xmm6, xmm4 +ZIGZAG_LOOP 8, 0, xmm3, xmm7, xmm8 +ZIGZAG_LOOP 5, 5, xmm2, xmm6, xmm4 +ZIGZAG_LOOP 2, 2, xmm2, xmm6, xmm4 +ZIGZAG_LOOP 3, 3, xmm2, xmm6, xmm4 +ZIGZAG_LOOP 6, 6, xmm2, xmm6, xmm4 +ZIGZAG_LOOP 9, 1, xmm3, xmm7, xmm8 +ZIGZAG_LOOP 12, 4, xmm3, xmm7, xmm8 +ZIGZAG_LOOP 13, 5, xmm3, xmm7, xmm8 +ZIGZAG_LOOP 10, 2, xmm3, xmm7, xmm8 +ZIGZAG_LOOP 7, 7, xmm2, xmm6, xmm4 +ZIGZAG_LOOP 11, 3, xmm3, xmm7, xmm8 +ZIGZAG_LOOP 14, 6, xmm3, xmm7, xmm8 +ZIGZAG_LOOP 15, 7, xmm3, xmm7, xmm8 + + mov rcx, [rsi + vp9_blockd_dequant] + mov rdi, [rsi + vp9_blockd_dqcoeff] + +%if ABI_IS_32BIT + movdqa xmm4, [rsp + qcoeff] + movdqa xmm5, [rsp + qcoeff + 16] +%else + %define xmm5 xmm8 +%endif + + ; y ^ sz + pxor xmm4, xmm0 + pxor xmm5, xmm1 + ; x = (y ^ sz) - sz + psubw xmm4, xmm0 + psubw xmm5, xmm1 + + ; dequant + movdqa xmm0, [rcx] + movdqa xmm1, [rcx + 16] + + mov rcx, [rsi + vp9_blockd_qcoeff] + + pmullw xmm0, xmm4 + pmullw xmm1, xmm5 + + ; store qcoeff + movdqa [rcx], xmm4 + movdqa [rcx + 16], xmm5 + + ; store dqcoeff + movdqa [rdi], xmm0 + movdqa [rdi + 16], xmm1 + + ; select the last value (in zig_zag order) for EOB + pxor xmm6, xmm6 + pcmpeqw xmm4, xmm6 + pcmpeqw xmm5, xmm6 + + packsswb xmm4, xmm5 + pshufb xmm4, [GLOBAL(zig_zag1d)] + pmovmskb edx, xmm4 + xor rdi, rdi + mov eax, -1 + xor dx, ax + bsr eax, edx + sub edi, edx + sar edi, 31 + add eax, 1 + and eax, edi + + mov [rsi + vp9_blockd_eob], eax + + ; begin epilog +%if ABI_IS_32BIT + add rsp, stack_size + pop rsp + + pop rsi + pop rdi + RESTORE_GOT + pop rbp +%else + %undef xmm5 + %ifidn __OUTPUT_FORMAT__,x64 + pop rsi + pop rdi + RESTORE_XMM + %endif +%endif + + ret + +SECTION_RODATA +align 16 +; vp9/common/entropy.c: vp9_default_zig_zag1d +zig_zag1d: + db 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 diff --cc vp9/encoder/x86/quantize_ssse3.asm index 14a9912d2,000000000..8c464287a mode 100644,000000..100644 --- a/vp9/encoder/x86/quantize_ssse3.asm +++ b/vp9/encoder/x86/quantize_ssse3.asm @@@ -1,138 -1,0 +1,138 @@@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license and patent +; grant that can be found in the LICENSE file in the root of the source +; tree. All contributing project authors may be found in the AUTHORS +; file in the root of the source tree. +; + + +%include "vpx_ports/x86_abi_support.asm" - %include "asm_enc_offsets.asm" ++%include "vp9_asm_enc_offsets.asm" + + +; void vp9_fast_quantize_b_ssse3 | arg +; (BLOCK *b, | 0 +; BLOCKD *d) | 1 +; + +global sym(vp9_fast_quantize_b_ssse3) +sym(vp9_fast_quantize_b_ssse3): + push rbp + mov rbp, rsp + GET_GOT rbx + +%if ABI_IS_32BIT + push rdi + push rsi +%else + %ifidn __OUTPUT_FORMAT__,x64 + push rdi + push rsi + %endif +%endif + ; end prolog + +%if ABI_IS_32BIT + mov rdi, arg(0) ; BLOCK *b + mov rsi, arg(1) ; BLOCKD *d +%else + %ifidn __OUTPUT_FORMAT__,x64 + mov rdi, rcx ; BLOCK *b + mov rsi, rdx ; BLOCKD *d + %else + ;mov rdi, rdi ; BLOCK *b + ;mov rsi, rsi ; BLOCKD *d + %endif +%endif + + mov rax, [rdi + vp9_block_coeff] + mov rcx, [rdi + vp9_block_round] + mov rdx, [rdi + vp9_block_quant_fast] + + ; coeff + movdqa xmm0, [rax] + movdqa xmm4, [rax + 16] + + ; round + movdqa xmm2, [rcx] + movdqa xmm3, [rcx + 16] + + movdqa xmm1, xmm0 + movdqa xmm5, xmm4 + + ; sz = z >> 15 + psraw xmm0, 15 + psraw xmm4, 15 + + pabsw xmm1, xmm1 + pabsw xmm5, xmm5 + + paddw xmm1, xmm2 + paddw xmm5, xmm3 + + ; quant_fast + pmulhw xmm1, [rdx] + pmulhw xmm5, [rdx + 16] + + mov rax, [rsi + vp9_blockd_qcoeff] + mov rdi, [rsi + vp9_blockd_dequant] + mov rcx, [rsi + vp9_blockd_dqcoeff] + + pxor xmm1, xmm0 + pxor xmm5, xmm4 + psubw xmm1, xmm0 + psubw xmm5, xmm4 + + movdqa [rax], xmm1 + movdqa [rax + 16], xmm5 + + movdqa xmm2, [rdi] + movdqa xmm3, [rdi + 16] + + pxor xmm4, xmm4 + pmullw xmm2, xmm1 + pmullw xmm3, xmm5 + + pcmpeqw xmm1, xmm4 ;non zero mask + pcmpeqw xmm5, xmm4 ;non zero mask + packsswb xmm1, xmm5 + pshufb xmm1, [GLOBAL(zz_shuf)] + + pmovmskb edx, xmm1 + + xor rdi, rdi + mov eax, -1 + xor dx, ax ;flip the bits for bsr + bsr eax, edx + + movdqa [rcx], xmm2 ;store dqcoeff + movdqa [rcx + 16], xmm3 ;store dqcoeff + + sub edi, edx ;check for all zeros in bit mask + sar edi, 31 ;0 or -1 + add eax, 1 + and eax, edi ;if the bit mask was all zero, + ;then eob = 0 + mov [rsi + vp9_blockd_eob], eax + + ; begin epilog +%if ABI_IS_32BIT + pop rsi + pop rdi +%else + %ifidn __OUTPUT_FORMAT__,x64 + pop rsi + pop rdi + %endif +%endif + + RESTORE_GOT + pop rbp + ret + +SECTION_RODATA +align 16 +zz_shuf: + db 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 diff --cc vp9/vp9_common.mk index c5237d846,000000000..82e1e1053 mode 100644,000000..100644 --- a/vp9/vp9_common.mk +++ b/vp9/vp9_common.mk @@@ -1,179 -1,0 +1,183 @@@ +## +## Copyright (c) 2010 The WebM project authors. All Rights Reserved. +## +## Use of this source code is governed by a BSD-style license +## that can be found in the LICENSE file in the root of the source +## tree. An additional intellectual property rights grant can be found +## in the file PATENTS. All contributing project authors may +## be found in the AUTHORS file in the root of the source tree. +## + +VP9_COMMON_SRCS-yes += vp9_common.mk +VP9_COMMON_SRCS-yes += common/type_aliases.h +VP9_COMMON_SRCS-yes += common/pragmas.h +VP9_COMMON_SRCS-yes += common/ppflags.h +VP9_COMMON_SRCS-yes += common/onyx.h +VP9_COMMON_SRCS-yes += common/onyxd.h +VP9_COMMON_SRCS-yes += common/alloccommon.c +VP9_COMMON_SRCS-yes += common/asm_com_offsets.c +VP9_COMMON_SRCS-yes += common/blockd.c +VP9_COMMON_SRCS-yes += common/coefupdateprobs.h +VP9_COMMON_SRCS-yes += common/debugmodes.c +VP9_COMMON_SRCS-yes += common/entropy.c +VP9_COMMON_SRCS-yes += common/entropymode.c +VP9_COMMON_SRCS-yes += common/entropymv.c +VP9_COMMON_SRCS-yes += common/extend.c +VP9_COMMON_SRCS-yes += common/filter.c +VP9_COMMON_SRCS-yes += common/filter.h +VP9_COMMON_SRCS-yes += common/findnearmv.c +VP9_COMMON_SRCS-yes += common/generic/systemdependent.c +VP9_COMMON_SRCS-yes += common/idctllm.c +VP9_COMMON_SRCS-yes += common/alloccommon.h +VP9_COMMON_SRCS-yes += common/blockd.h +VP9_COMMON_SRCS-yes += common/common.h +VP9_COMMON_SRCS-yes += common/common_types.h +VP9_COMMON_SRCS-yes += common/entropy.h +VP9_COMMON_SRCS-yes += common/entropymode.h +VP9_COMMON_SRCS-yes += common/entropymv.h +VP9_COMMON_SRCS-yes += common/extend.h +VP9_COMMON_SRCS-yes += common/findnearmv.h +VP9_COMMON_SRCS-yes += common/header.h +VP9_COMMON_SRCS-yes += common/idct.h +VP9_COMMON_SRCS-yes += common/invtrans.h +VP9_COMMON_SRCS-yes += common/loopfilter.h +VP9_COMMON_SRCS-yes += common/modecont.h +VP9_COMMON_SRCS-yes += common/mv.h +VP9_COMMON_SRCS-yes += common/onyxc_int.h +VP9_COMMON_SRCS-yes += common/pred_common.h +VP9_COMMON_SRCS-yes += common/pred_common.c +VP9_COMMON_SRCS-yes += common/quant_common.h +VP9_COMMON_SRCS-yes += common/reconinter.h +VP9_COMMON_SRCS-yes += common/reconintra.h +VP9_COMMON_SRCS-yes += common/reconintra4x4.h +VP9_COMMON_SRCS-yes += common/rtcd.c +VP9_COMMON_SRCS-yes += common/rtcd_defs.sh +VP9_COMMON_SRCS-yes += common/sadmxn.h +VP9_COMMON_SRCS-yes += common/seg_common.h +VP9_COMMON_SRCS-yes += common/seg_common.c +VP9_COMMON_SRCS-yes += common/setupintrarecon.h +VP9_COMMON_SRCS-yes += common/subpixel.h +VP9_COMMON_SRCS-yes += common/swapyv12buffer.h +VP9_COMMON_SRCS-yes += common/systemdependent.h +VP9_COMMON_SRCS-yes += common/treecoder.h +VP9_COMMON_SRCS-yes += common/invtrans.c +VP9_COMMON_SRCS-yes += common/loopfilter.c +VP9_COMMON_SRCS-yes += common/loopfilter_filters.c +VP9_COMMON_SRCS-yes += common/mbpitch.c +VP9_COMMON_SRCS-yes += common/modecont.c +VP9_COMMON_SRCS-yes += common/modecontext.c +VP9_COMMON_SRCS-yes += common/mvref_common.c +VP9_COMMON_SRCS-yes += common/mvref_common.h +VP9_COMMON_SRCS-yes += common/quant_common.c +VP9_COMMON_SRCS-yes += common/recon.c +VP9_COMMON_SRCS-yes += common/reconinter.c +VP9_COMMON_SRCS-yes += common/reconintra.c +VP9_COMMON_SRCS-yes += common/reconintra4x4.c +VP9_COMMON_SRCS-yes += common/setupintrarecon.c +VP9_COMMON_SRCS-yes += common/swapyv12buffer.c +VP9_COMMON_SRCS-$(CONFIG_POSTPROC_VISUALIZER) += common/textblit.c +VP9_COMMON_SRCS-yes += common/treecoder.c +VP9_COMMON_SRCS-$(CONFIG_IMPLICIT_SEGMENTATION) += common/implicit_segmentation.c + +VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/idct_x86.h +VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/subpixel_x86.h +VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/loopfilter_x86.h +VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/postproc_x86.h +VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/x86_systemdependent.c +VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp8_asm_stubs.c +VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/loopfilter_x86.c +VP9_COMMON_SRCS-$(CONFIG_POSTPROC) += common/postproc.h +VP9_COMMON_SRCS-$(CONFIG_POSTPROC) += common/postproc.c +VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/idctllm_mmx.asm +VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/iwalsh_mmx.asm +VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/recon_mmx.asm +VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/subpixel_mmx.asm +VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/loopfilter_mmx.asm +VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/idctllm_sse2.asm +VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/recon_sse2.asm +VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/recon_wrapper_sse2.c +VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/subpixel_sse2.asm +VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/loopfilter_sse2.asm +VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/iwalsh_sse2.asm +VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/subpixel_8t_ssse3.asm +VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/subpixel_ssse3.asm +ifeq ($(CONFIG_POSTPROC),yes) +VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/postproc_mmx.asm +VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/postproc_sse2.asm +endif + +# common (c) +ifeq ($(CONFIG_CSM),yes) +VP9_COMMON_SRCS-yes += common/maskingmv.c +VP9_COMMON_SRCS-$(HAVE_SSE3) += common/x86/mask_sse3.asm +endif + +VP9_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/filter_sse4.c +ifeq ($(HAVE_SSE4_1),yes) +vp9/common/x86/filter_sse4.c.o: CFLAGS += -msse4 +endif + +VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/filter_sse2.c +VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/sadmxn_x86.c +ifeq ($(HAVE_SSE2),yes) +vp9/common/x86/filter_sse2.c.o: CFLAGS += -msse2 +vp9/common/x86/loopfilter_x86.c.o: CFLAGS += -msse2 +vp9/common/x86/sadmxn_x86.c.o: CFLAGS += -msse2 +endif + +VP9_COMMON_SRCS-$(ARCH_ARM) += common/arm/arm_systemdependent.c +VP9_COMMON_SRCS-$(ARCH_ARM) += common/arm/bilinearfilter_arm.c +VP9_COMMON_SRCS-$(ARCH_ARM) += common/arm/bilinearfilter_arm.h +VP9_COMMON_SRCS-$(ARCH_ARM) += common/arm/filter_arm.c +VP9_COMMON_SRCS-$(ARCH_ARM) += common/arm/idct_arm.h +VP9_COMMON_SRCS-$(ARCH_ARM) += common/arm/loopfilter_arm.c +VP9_COMMON_SRCS-$(ARCH_ARM) += common/arm/loopfilter_arm.h +VP9_COMMON_SRCS-$(ARCH_ARM) += common/arm/recon_arm.h +VP9_COMMON_SRCS-$(ARCH_ARM) += common/arm/reconintra_arm.c +VP9_COMMON_SRCS-$(ARCH_ARM) += common/arm/subpixel_arm.h + +# common (armv6) +VP9_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/bilinearfilter_v6$(ASM) +VP9_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/copymem8x4_v6$(ASM) +VP9_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/copymem8x8_v6$(ASM) +VP9_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/copymem16x16_v6$(ASM) +VP9_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/dc_only_idct_add_v6$(ASM) +VP9_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/iwalsh_v6$(ASM) +VP9_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/filter_v6$(ASM) +VP9_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/idct_v6$(ASM) +VP9_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/loopfilter_v6$(ASM) +VP9_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/recon_v6$(ASM) +VP9_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/simpleloopfilter_v6$(ASM) +VP9_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/sixtappredict8x4_v6$(ASM) + +# common (neon) +VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/bilinearpredict4x4_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/bilinearpredict8x4_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/bilinearpredict8x8_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/bilinearpredict16x16_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/copymem8x4_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/copymem8x8_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/copymem16x16_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/dc_only_idct_add_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/iwalsh_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/loopfilter_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/loopfiltersimplehorizontaledge_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/loopfiltersimpleverticaledge_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/mbloopfilter_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/recon2b_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/recon4b_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/reconb_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/shortidct4x4llm_1_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/shortidct4x4llm_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/sixtappredict4x4_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/sixtappredict8x4_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/sixtappredict8x8_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/sixtappredict16x16_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/recon16x16mb_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/buildintrapredictorsmby_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/save_neon_reg$(ASM) +VP9_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/recon_neon.c ++ ++ ++$(eval $(call asm_offsets_template,\ ++ vp9_asm_com_offsets.asm, $(VP9_PREFIX)common/asm_com_offsets.c)) diff --cc vp9/vp9cx.mk index ebb7a575d,000000000..6e157b0c4 mode 100644,000000..100644 --- a/vp9/vp9cx.mk +++ b/vp9/vp9cx.mk @@@ -1,120 -1,0 +1,120 @@@ +## +## Copyright (c) 2010 The WebM project authors. All Rights Reserved. +## +## Use of this source code is governed by a BSD-style license +## that can be found in the LICENSE file in the root of the source +## tree. An additional intellectual property rights grant can be found +## in the file PATENTS. All contributing project authors may +## be found in the AUTHORS file in the root of the source tree. +## + - - include $(SRC_PATH_BARE)/$(VP9_PREFIX)vp9_common.mk - +VP9_CX_EXPORTS += exports_enc + +VP9_CX_SRCS-yes += $(VP9_COMMON_SRCS-yes) +VP9_CX_SRCS-no += $(VP9_COMMON_SRCS-no) +VP9_CX_SRCS_REMOVE-yes += $(VP9_COMMON_SRCS_REMOVE-yes) +VP9_CX_SRCS_REMOVE-no += $(VP9_COMMON_SRCS_REMOVE-no) + +ifeq ($(ARCH_ARM),yes) + include $(SRC_PATH_BARE)/$(VP9_PREFIX)vp9cx_arm.mk +endif + +VP9_CX_SRCS-yes += vp9_cx_iface.c + +# encoder +#INCLUDES += algo/vpx_common/vpx_mem/include +#INCLUDES += common +#INCLUDES += common +#INCLUDES += common +#INCLUDES += algo/vpx_ref/cpu_id/include +#INCLUDES += common +#INCLUDES += encoder + +VP9_CX_SRCS-yes += encoder/asm_enc_offsets.c +VP9_CX_SRCS-yes += encoder/bitstream.c +VP9_CX_SRCS-yes += encoder/boolhuff.c +VP9_CX_SRCS-yes += encoder/dct.c +VP9_CX_SRCS-yes += encoder/encodeframe.c +VP9_CX_SRCS-yes += encoder/encodeintra.c +VP9_CX_SRCS-yes += encoder/encodemb.c +VP9_CX_SRCS-yes += encoder/encodemv.c +VP9_CX_SRCS-yes += encoder/firstpass.c +VP9_CX_SRCS-yes += encoder/generic/csystemdependent.c +VP9_CX_SRCS-yes += encoder/block.h +VP9_CX_SRCS-yes += encoder/boolhuff.h +VP9_CX_SRCS-yes += encoder/bitstream.h +VP9_CX_SRCS-yes += encoder/encodeintra.h +VP9_CX_SRCS-yes += encoder/encodemb.h +VP9_CX_SRCS-yes += encoder/encodemv.h +VP9_CX_SRCS-yes += encoder/firstpass.h +VP9_CX_SRCS-yes += encoder/lookahead.c +VP9_CX_SRCS-yes += encoder/lookahead.h +VP9_CX_SRCS-yes += encoder/mcomp.h +VP9_CX_SRCS-yes += encoder/modecosts.h +VP9_CX_SRCS-yes += encoder/onyx_int.h +VP9_CX_SRCS-yes += encoder/psnr.h +VP9_CX_SRCS-yes += encoder/quantize.h +VP9_CX_SRCS-yes += encoder/ratectrl.h +VP9_CX_SRCS-yes += encoder/rdopt.h +VP9_CX_SRCS-yes += encoder/tokenize.h +VP9_CX_SRCS-yes += encoder/treewriter.h +VP9_CX_SRCS-yes += encoder/variance.h +VP9_CX_SRCS-yes += encoder/mcomp.c +VP9_CX_SRCS-yes += encoder/modecosts.c +VP9_CX_SRCS-yes += encoder/onyx_if.c +VP9_CX_SRCS-yes += encoder/picklpf.c +VP9_CX_SRCS-yes += encoder/psnr.c +VP9_CX_SRCS-yes += encoder/quantize.c +VP9_CX_SRCS-yes += encoder/ratectrl.c +VP9_CX_SRCS-yes += encoder/rdopt.c +VP9_CX_SRCS-yes += encoder/sad_c.c +VP9_CX_SRCS-yes += encoder/satd_c.c +VP9_CX_SRCS-yes += encoder/segmentation.c +VP9_CX_SRCS-yes += encoder/segmentation.h +VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/ssim.c +VP9_CX_SRCS-yes += encoder/tokenize.c +VP9_CX_SRCS-yes += encoder/treewriter.c +VP9_CX_SRCS-yes += encoder/variance_c.c +ifeq ($(CONFIG_POSTPROC),yes) +VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/postproc.h +VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/postproc.c +endif +VP9_CX_SRCS-yes += encoder/temporal_filter.c +VP9_CX_SRCS-yes += encoder/temporal_filter.h +VP9_CX_SRCS-yes += encoder/mbgraph.c +VP9_CX_SRCS-yes += encoder/mbgraph.h + + +VP9_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/mcomp_x86.h +VP9_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_x86.h +VP9_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/temporal_filter_x86.h +VP9_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/x86_csystemdependent.c +VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/variance_mmx.c +VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/variance_impl_mmx.asm +VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/sad_mmx.asm +VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/dct_mmx.asm +VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/subtract_mmx.asm +VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm +VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/variance_sse2.c +VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/variance_impl_sse2.asm +VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/sad_sse2.asm +VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/fwalsh_sse2.asm +VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.asm +VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/subtract_sse2.asm +VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm +VP9_CX_SRCS-$(HAVE_SSE3) += encoder/x86/sad_sse3.asm +VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/sad_ssse3.asm +VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/variance_ssse3.c +VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/variance_impl_ssse3.asm +VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/quantize_ssse3.asm +VP9_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/sad_sse4.asm +VP9_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/quantize_sse4.asm +VP9_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_mmx.asm +VP9_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/encodeopt.asm +VP9_CX_SRCS-$(ARCH_X86_64) += encoder/x86/ssim_opt.asm + + +VP9_CX_SRCS-yes := $(filter-out $(VP9_CX_SRCS_REMOVE-yes),$(VP9_CX_SRCS-yes)) ++ ++$(eval $(call asm_offsets_template,\ ++ vp9_asm_enc_offsets.asm, $(VP9_PREFIX)encoder/asm_enc_offsets.c)) diff --cc vp9/vp9dx.mk index dc9f2d390,000000000..75f6bfd6e mode 100644,000000..100644 --- a/vp9/vp9dx.mk +++ b/vp9/vp9dx.mk @@@ -1,71 -1,0 +1,71 @@@ +## +## Copyright (c) 2010 The WebM project authors. All Rights Reserved. +## +## Use of this source code is governed by a BSD-style license +## that can be found in the LICENSE file in the root of the source +## tree. An additional intellectual property rights grant can be found +## in the file PATENTS. All contributing project authors may +## be found in the AUTHORS file in the root of the source tree. +## + - - include $(SRC_PATH_BARE)/$(VP9_PREFIX)vp9_common.mk - +VP9_DX_EXPORTS += exports_dec + +VP9_DX_SRCS-yes += $(VP9_COMMON_SRCS-yes) +VP9_DX_SRCS-no += $(VP9_COMMON_SRCS-no) +VP9_DX_SRCS_REMOVE-yes += $(VP9_COMMON_SRCS_REMOVE-yes) +VP9_DX_SRCS_REMOVE-no += $(VP9_COMMON_SRCS_REMOVE-no) + +ifeq ($(ARCH_ARM),yes) + include $(SRC_PATH_BARE)/$(VP9_PREFIX)vp9dx_arm.mk +endif + +VP9_DX_SRCS-yes += vp9_dx_iface.c + +# common +#define ARM +#define DISABLE_THREAD + +#INCLUDES += algo/vpx_common/vpx_mem/include +#INCLUDES += common +#INCLUDES += common +#INCLUDES += common +#INCLUDES += common +#INCLUDES += decoder + + + +# decoder +#define ARM +#define DISABLE_THREAD + +#INCLUDES += algo/vpx_common/vpx_mem/include +#INCLUDES += common +#INCLUDES += common +#INCLUDES += common +#INCLUDES += common +#INCLUDES += decoder + +VP9_DX_SRCS-yes += decoder/asm_dec_offsets.c +VP9_DX_SRCS-yes += decoder/dboolhuff.c +VP9_DX_SRCS-yes += decoder/decodemv.c +VP9_DX_SRCS-yes += decoder/decodframe.c +VP9_DX_SRCS-yes += decoder/dequantize.c +VP9_DX_SRCS-yes += decoder/detokenize.c +VP9_DX_SRCS-yes += decoder/dboolhuff.h +VP9_DX_SRCS-yes += decoder/decodemv.h +VP9_DX_SRCS-yes += decoder/dequantize.h +VP9_DX_SRCS-yes += decoder/detokenize.h +VP9_DX_SRCS-yes += decoder/onyxd_int.h +VP9_DX_SRCS-yes += decoder/treereader.h +VP9_DX_SRCS-yes += decoder/onyxd_if.c +VP9_DX_SRCS-yes += decoder/idct_blk.c + +VP9_DX_SRCS-yes := $(filter-out $(VP9_DX_SRCS_REMOVE-yes),$(VP9_DX_SRCS-yes)) + +VP9_DX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += decoder/x86/x86_dsystemdependent.c +VP9_DX_SRCS-$(HAVE_MMX) += decoder/x86/dequantize_mmx.asm +VP9_DX_SRCS-$(HAVE_MMX) += decoder/x86/idct_blk_mmx.c +VP9_DX_SRCS-$(HAVE_SSE2) += decoder/x86/idct_blk_sse2.c ++ ++$(eval $(call asm_offsets_template,\ ++ vp9_asm_dec_offsets.asm, $(VP9_PREFIX)decoder/asm_dec_offsets.c)) diff --cc vpx_ports/arm_cpudetect.c index f36d46d51,8ff95a110..b23344858 --- a/vpx_ports/arm_cpudetect.c +++ b/vpx_ports/arm_cpudetect.c @@@ -35,24 -39,25 +35,26 @@@ int arm_cpu_caps(void) /* This function should actually be a no-op. There is no way to adjust any of * these because the RTCD tables do not exist: the functions are called * statically */ - int flags; - int mask; - if (!arm_cpu_env_flags(&flags)) - { - return flags; - } - mask = arm_cpu_env_mask(); + int flags; + int mask; + if (!arm_cpu_env_flags(&flags)) { + return flags; + } + mask = arm_cpu_env_mask(); #if HAVE_EDSP - flags |= HAS_EDSP; + flags |= HAS_EDSP; #endif /* HAVE_EDSP */ #if HAVE_MEDIA - flags |= HAS_MEDIA; + flags |= HAS_MEDIA; #endif /* HAVE_MEDIA */ #if HAVE_NEON - flags |= HAS_NEON; + flags |= HAS_NEON; #endif /* HAVE_NEON */ - return flags & mask; + return flags & mask; } ++#elif defined(_MSC_VER) /* end !CONFIG_RUNTIME_CPU_DETECT */ ++ #elif defined(_MSC_VER) /* end !CONFIG_RUNTIME_CPU_DETECT */ /*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/ #define WIN32_LEAN_AND_MEAN @@@ -109,29 -127,31 +111,31 @@@ return flags & mask #elif defined(__ANDROID__) /* end _MSC_VER */ #include -int arm_cpu_caps(void) -{ - int flags; - int mask; - uint64_t features; - if (!arm_cpu_env_flags(&flags)) - { - return flags; - } - mask = arm_cpu_env_mask(); - features = android_getCpuFeatures(); +int arm_cpu_caps(void) { + int flags; + int mask; + uint64_t features; + if (!arm_cpu_env_flags(&flags)) { + return flags; + } + mask = arm_cpu_env_mask(); + features = android_getCpuFeatures(); #if HAVE_EDSP - flags |= HAS_EDSP; + flags |= HAS_EDSP; #endif /* HAVE_EDSP */ #if HAVE_MEDIA - flags |= HAS_MEDIA; + flags |= HAS_MEDIA; #endif /* HAVE_MEDIA */ #if HAVE_NEON - if (features & ANDROID_CPU_ARM_FEATURE_NEON) - flags |= HAS_NEON; + if (features & ANDROID_CPU_ARM_FEATURE_NEON) + flags |= HAS_NEON; #endif /* HAVE_NEON */ - return flags & mask; + return flags & mask; } ++#elif defined(__linux__) /* end __ANDROID__ */ ++ #elif defined(__linux__) /* end __ANDROID__ */ #include diff --cc vpx_ports/x86.h index f88377290,9dd8c4b59..f1cf6265e --- a/vpx_ports/x86.h +++ b/vpx_ports/x86.h @@@ -185,25 -188,29 +185,38 @@@ x86_readtsc(void) #if defined(__GNUC__) && __GNUC__ static void -x87_set_control_word(unsigned short mode) -{ - __asm__ __volatile__("fldcw %0" : : "m"(*&mode)); +x87_set_control_word(unsigned short mode) { + __asm__ __volatile__("fldcw %0" : : "m"( *&mode)); } static unsigned short -x87_get_control_word(void) -{ - unsigned short mode; - __asm__ __volatile__("fstcw %0\n\t":"=m"(*&mode):); +x87_get_control_word(void) { + unsigned short mode; + __asm__ __volatile__("fstcw %0\n\t":"=m"( *&mode):); + return mode; + } + #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) + static void + x87_set_control_word(unsigned short mode) + { + asm volatile("fldcw %0" : : "m"(*&mode)); + } + static unsigned short + x87_get_control_word(void) + { + unsigned short mode; + asm volatile("fstcw %0\n\t":"=m"(*&mode):); - return mode; + return mode; +} +#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) +static void +x87_set_control_word(unsigned short mode) { + asm volatile("fldcw %0" : : "m"( *&mode)); +} +static unsigned short +x87_get_control_word(void) { + unsigned short mode; + asm volatile("fstcw %0\n\t":"=m"( *&mode):); + return mode; } #elif ARCH_X86_64 /* No fldcw intrinsics on Windows x64, punt to external asm */ diff --cc vpxdec.c index 4e598298f,9b728bf82..44a80e3df --- a/vpxdec.c +++ b/vpxdec.c @@@ -22,7 -22,7 +22,7 @@@ #include "vpx_config.h" #include "vpx/vpx_decoder.h" #include "vpx_ports/vpx_timer.h" - #if CONFIG_VP9_DECODER -#if CONFIG_VP8_DECODER ++#if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER #include "vpx/vp8dx.h" #endif #if CONFIG_MD5 @@@ -49,14 -49,16 +49,17 @@@ static const char *exec_name; #define VP8_FOURCC (0x00385056) -static const struct -{ - char const *name; - vpx_codec_iface_t *iface; - unsigned int fourcc; - unsigned int fourcc_mask; -} ifaces[] = -{ +static const struct { + char const *name; + const vpx_codec_iface_t *(*iface)(void); + unsigned int fourcc; + unsigned int fourcc_mask; +} ifaces[] = { + #if CONFIG_VP8_DECODER - {"vp8", &vpx_codec_vp8_dx_algo, VP8_FOURCC, 0x00FFFFFF}, ++ {"vp8", vpx_codec_vp8_dx, VP8_FOURCC, 0x00FFFFFF}, ++#endif +#if CONFIG_VP9_DECODER + {"vp9", vpx_codec_vp8_dx, VP8_FOURCC, 0x00FFFFFF}, #endif }; @@@ -95,121 -95,129 +98,121 @@@ static const arg_def_t error_concealmen static const arg_def_t md5arg = ARG_DEF(NULL, "md5", 0, "Compute the MD5 sum of the decoded frame"); #endif -static const arg_def_t *all_args[] = -{ - &codecarg, &use_yv12, &use_i420, &flipuvarg, &noblitarg, - &progressarg, &limitarg, &postprocarg, &summaryarg, &outputfile, - &threadsarg, &verbosearg, +static const arg_def_t *all_args[] = { + &codecarg, &use_yv12, &use_i420, &flipuvarg, &noblitarg, + &progressarg, &limitarg, &skiparg, &postprocarg, &summaryarg, &outputfile, + &threadsarg, &verbosearg, #if CONFIG_MD5 - &md5arg, + &md5arg, #endif - &error_concealment, - NULL + &error_concealment, + NULL }; - #if CONFIG_VP9_DECODER -#if CONFIG_VP8_DECODER ++#if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER static const arg_def_t addnoise_level = ARG_DEF(NULL, "noise-level", 1, - "Enable VP8 postproc add noise"); + "Enable VP8 postproc add noise"); static const arg_def_t deblock = ARG_DEF(NULL, "deblock", 0, - "Enable VP8 deblocking"); + "Enable VP8 deblocking"); static const arg_def_t demacroblock_level = ARG_DEF(NULL, "demacroblock-level", 1, - "Enable VP8 demacroblocking, w/ level"); + "Enable VP8 demacroblocking, w/ level"); static const arg_def_t pp_debug_info = ARG_DEF(NULL, "pp-debug-info", 1, - "Enable VP8 visible debug info"); + "Enable VP8 visible debug info"); static const arg_def_t pp_disp_ref_frame = ARG_DEF(NULL, "pp-dbg-ref-frame", 1, - "Display only selected reference frame per macro block"); + "Display only selected reference frame per macro block"); static const arg_def_t pp_disp_mb_modes = ARG_DEF(NULL, "pp-dbg-mb-modes", 1, - "Display only selected macro block modes"); + "Display only selected macro block modes"); static const arg_def_t pp_disp_b_modes = ARG_DEF(NULL, "pp-dbg-b-modes", 1, - "Display only selected block modes"); + "Display only selected block modes"); static const arg_def_t pp_disp_mvs = ARG_DEF(NULL, "pp-dbg-mvs", 1, - "Draw only selected motion vectors"); + "Draw only selected motion vectors"); static const arg_def_t mfqe = ARG_DEF(NULL, "mfqe", 0, - "Enable multiframe quality enhancement"); + "Enable multiframe quality enhancement"); -static const arg_def_t *vp8_pp_args[] = -{ - &addnoise_level, &deblock, &demacroblock_level, &pp_debug_info, - &pp_disp_ref_frame, &pp_disp_mb_modes, &pp_disp_b_modes, &pp_disp_mvs, &mfqe, - NULL +static const arg_def_t *vp8_pp_args[] = { + &addnoise_level, &deblock, &demacroblock_level, &pp_debug_info, + &pp_disp_ref_frame, &pp_disp_mb_modes, &pp_disp_b_modes, &pp_disp_mvs, &mfqe, + NULL }; #endif -static void usage_exit() -{ - int i; +static void usage_exit() { + int i; - fprintf(stderr, "Usage: %s filename\n\n" - "Options:\n", exec_name); - arg_show_usage(stderr, all_args); -#if CONFIG_VP8_DECODER - fprintf(stderr, "\nVP8 Postprocessing Options:\n"); - arg_show_usage(stderr, vp8_pp_args); + fprintf(stderr, "Usage: %s filename\n\n" + "Options:\n", exec_name); + arg_show_usage(stderr, all_args); - #if CONFIG_VP9_DECODER ++#if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER + fprintf(stderr, "\nVP8 Postprocessing Options:\n"); + arg_show_usage(stderr, vp8_pp_args); #endif - fprintf(stderr, - "\nOutput File Patterns:\n\n" - " The -o argument specifies the name of the file(s) to " - "write to. If the\n argument does not include any escape " - "characters, the output will be\n written to a single file. " - "Otherwise, the filename will be calculated by\n expanding " - "the following escape characters:\n"); - fprintf(stderr, - "\n\t%%w - Frame width" - "\n\t%%h - Frame height" - "\n\t%% - Frame number, zero padded to places (1..9)" - "\n\n Pattern arguments are only supported in conjunction " - "with the --yv12 and\n --i420 options. If the -o option is " - "not specified, the output will be\n directed to stdout.\n" - ); - fprintf(stderr, "\nIncluded decoders:\n\n"); - - for (i = 0; i < sizeof(ifaces) / sizeof(ifaces[0]); i++) - fprintf(stderr, " %-6s - %s\n", - ifaces[i].name, - vpx_codec_iface_name(ifaces[i].iface)); - - exit(EXIT_FAILURE); + fprintf(stderr, + "\nOutput File Patterns:\n\n" + " The -o argument specifies the name of the file(s) to " + "write to. If the\n argument does not include any escape " + "characters, the output will be\n written to a single file. " + "Otherwise, the filename will be calculated by\n expanding " + "the following escape characters:\n"); + fprintf(stderr, + "\n\t%%w - Frame width" + "\n\t%%h - Frame height" + "\n\t%% - Frame number, zero padded to places (1..9)" + "\n\n Pattern arguments are only supported in conjunction " + "with the --yv12 and\n --i420 options. If the -o option is " + "not specified, the output will be\n directed to stdout.\n" + ); + fprintf(stderr, "\nIncluded decoders:\n\n"); + + for (i = 0; i < sizeof(ifaces) / sizeof(ifaces[0]); i++) + fprintf(stderr, " %-6s - %s\n", + ifaces[i].name, + vpx_codec_iface_name(ifaces[i].iface())); + + exit(EXIT_FAILURE); } -void die(const char *fmt, ...) -{ - va_list ap; - va_start(ap, fmt); - vfprintf(stderr, fmt, ap); - fprintf(stderr, "\n"); - usage_exit(); +void die(const char *fmt, ...) { + va_list ap; + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + fprintf(stderr, "\n"); + usage_exit(); } -static unsigned int mem_get_le16(const void *vmem) -{ - unsigned int val; - const unsigned char *mem = (const unsigned char *)vmem; +static unsigned int mem_get_le16(const void *vmem) { + unsigned int val; + const unsigned char *mem = (const unsigned char *)vmem; - val = mem[1] << 8; - val |= mem[0]; - return val; + val = mem[1] << 8; + val |= mem[0]; + return val; } -static unsigned int mem_get_le32(const void *vmem) -{ - unsigned int val; - const unsigned char *mem = (const unsigned char *)vmem; +static unsigned int mem_get_le32(const void *vmem) { + unsigned int val; + const unsigned char *mem = (const unsigned char *)vmem; - val = mem[3] << 24; - val |= mem[2] << 16; - val |= mem[1] << 8; - val |= mem[0]; - return val; + val = mem[3] << 24; + val |= mem[2] << 16; + val |= mem[1] << 8; + val |= mem[0]; + return val; } -enum file_kind -{ - RAW_FILE, - IVF_FILE, - WEBM_FILE +enum file_kind { + RAW_FILE, + IVF_FILE, + WEBM_FILE }; -struct input_ctx -{ - enum file_kind kind; - FILE *infile; - nestegg *nestegg_ctx; - nestegg_packet *pkt; - unsigned int chunk; - unsigned int chunks; - unsigned int video_track; +struct input_ctx { + enum file_kind kind; + FILE *infile; + nestegg *nestegg_ctx; + nestegg_packet *pkt; + unsigned int chunk; + unsigned int chunks; + unsigned int video_track; }; #define IVF_FRAME_HDR_SZ (sizeof(uint32_t) + sizeof(uint64_t)) @@@ -663,401 -697,447 +666,401 @@@ void generate_filename(const char *patt } -int main(int argc, const char **argv_) -{ - vpx_codec_ctx_t decoder; - char *fn = NULL; - int i; - uint8_t *buf = NULL; - size_t buf_sz = 0, buf_alloc_sz = 0; - FILE *infile; - int frame_in = 0, frame_out = 0, flipuv = 0, noblit = 0, do_md5 = 0, progress = 0; - int stop_after = 0, postproc = 0, summary = 0, quiet = 1; - int ec_enabled = 0; - vpx_codec_iface_t *iface = NULL; - unsigned int fourcc; - unsigned long dx_time = 0; - struct arg arg; - char **argv, **argi, **argj; - const char *outfile_pattern = 0; - char outfile[PATH_MAX]; - int single_file; - int use_y4m = 1; - unsigned int width; - unsigned int height; - unsigned int fps_den; - unsigned int fps_num; - void *out = NULL; - vpx_codec_dec_cfg_t cfg = {0}; -#if CONFIG_VP8_DECODER - vp8_postproc_cfg_t vp8_pp_cfg = {0}; - int vp8_dbg_color_ref_frame = 0; - int vp8_dbg_color_mb_modes = 0; - int vp8_dbg_color_b_modes = 0; - int vp8_dbg_display_mv = 0; +int main(int argc, const char **argv_) { + vpx_codec_ctx_t decoder; + char *fn = NULL; + int i; + uint8_t *buf = NULL; + size_t buf_sz = 0, buf_alloc_sz = 0; + FILE *infile; + int frame_in = 0, frame_out = 0, flipuv = 0, noblit = 0, do_md5 = 0, progress = 0; + int stop_after = 0, postproc = 0, summary = 0, quiet = 1; + int arg_skip = 0; + int ec_enabled = 0; + vpx_codec_iface_t *iface = NULL; + unsigned int fourcc; + unsigned long dx_time = 0; + struct arg arg; + char **argv, **argi, **argj; + const char *outfile_pattern = 0; + char outfile[PATH_MAX]; + int single_file; + int use_y4m = 1; + unsigned int width; + unsigned int height; + unsigned int fps_den; + unsigned int fps_num; + void *out = NULL; + vpx_codec_dec_cfg_t cfg = {0}; - #if CONFIG_VP9_DECODER ++#if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER + vp8_postproc_cfg_t vp8_pp_cfg = {0}; + int vp8_dbg_color_ref_frame = 0; + int vp8_dbg_color_mb_modes = 0; + int vp8_dbg_color_b_modes = 0; + int vp8_dbg_display_mv = 0; #endif - struct input_ctx input = {0}; - int frames_corrupted = 0; - int dec_flags = 0; - - /* Parse command line */ - exec_name = argv_[0]; - argv = argv_dup(argc - 1, argv_ + 1); - - for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) - { - memset(&arg, 0, sizeof(arg)); - arg.argv_step = 1; - - if (arg_match(&arg, &codecarg, argi)) - { - int j, k = -1; - - for (j = 0; j < sizeof(ifaces) / sizeof(ifaces[0]); j++) - if (!strcmp(ifaces[j].name, arg.val)) - k = j; - - if (k >= 0) - iface = ifaces[k].iface; - else - die("Error: Unrecognized argument (%s) to --codec\n", - arg.val); - } - else if (arg_match(&arg, &outputfile, argi)) - outfile_pattern = arg.val; - else if (arg_match(&arg, &use_yv12, argi)) - { - use_y4m = 0; - flipuv = 1; - } - else if (arg_match(&arg, &use_i420, argi)) - { - use_y4m = 0; - flipuv = 0; - } - else if (arg_match(&arg, &flipuvarg, argi)) - flipuv = 1; - else if (arg_match(&arg, &noblitarg, argi)) - noblit = 1; - else if (arg_match(&arg, &progressarg, argi)) - progress = 1; - else if (arg_match(&arg, &limitarg, argi)) - stop_after = arg_parse_uint(&arg); - else if (arg_match(&arg, &postprocarg, argi)) - postproc = 1; - else if (arg_match(&arg, &md5arg, argi)) - do_md5 = 1; - else if (arg_match(&arg, &summaryarg, argi)) - summary = 1; - else if (arg_match(&arg, &threadsarg, argi)) - cfg.threads = arg_parse_uint(&arg); - else if (arg_match(&arg, &verbosearg, argi)) - quiet = 0; - -#if CONFIG_VP8_DECODER - else if (arg_match(&arg, &addnoise_level, argi)) - { - postproc = 1; - vp8_pp_cfg.post_proc_flag |= VP8_ADDNOISE; - vp8_pp_cfg.noise_level = arg_parse_uint(&arg); - } - else if (arg_match(&arg, &demacroblock_level, argi)) - { - postproc = 1; - vp8_pp_cfg.post_proc_flag |= VP8_DEMACROBLOCK; - vp8_pp_cfg.deblocking_level = arg_parse_uint(&arg); - } - else if (arg_match(&arg, &deblock, argi)) - { - postproc = 1; - vp8_pp_cfg.post_proc_flag |= VP8_DEBLOCK; - } - else if (arg_match(&arg, &mfqe, argi)) - { - postproc = 1; - vp8_pp_cfg.post_proc_flag |= VP8_MFQE; - } - else if (arg_match(&arg, &pp_debug_info, argi)) - { - unsigned int level = arg_parse_uint(&arg); - - postproc = 1; - vp8_pp_cfg.post_proc_flag &= ~0x7; - - if (level) - vp8_pp_cfg.post_proc_flag |= level; - } - else if (arg_match(&arg, &pp_disp_ref_frame, argi)) - { - unsigned int flags = arg_parse_int(&arg); - if (flags) - { - postproc = 1; - vp8_dbg_color_ref_frame = flags; - } - } - else if (arg_match(&arg, &pp_disp_mb_modes, argi)) - { - unsigned int flags = arg_parse_int(&arg); - if (flags) - { - postproc = 1; - vp8_dbg_color_mb_modes = flags; - } - } - else if (arg_match(&arg, &pp_disp_b_modes, argi)) - { - unsigned int flags = arg_parse_int(&arg); - if (flags) - { - postproc = 1; - vp8_dbg_color_b_modes = flags; - } - } - else if (arg_match(&arg, &pp_disp_mvs, argi)) - { - unsigned int flags = arg_parse_int(&arg); - if (flags) - { - postproc = 1; - vp8_dbg_display_mv = flags; - } - } - else if (arg_match(&arg, &error_concealment, argi)) - { - ec_enabled = 1; - } + struct input_ctx input = {0}; + int frames_corrupted = 0; + int dec_flags = 0; + + /* Parse command line */ + exec_name = argv_[0]; + argv = argv_dup(argc - 1, argv_ + 1); + + for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) { + memset(&arg, 0, sizeof(arg)); + arg.argv_step = 1; + + if (arg_match(&arg, &codecarg, argi)) { + int j, k = -1; + + for (j = 0; j < sizeof(ifaces) / sizeof(ifaces[0]); j++) + if (!strcmp(ifaces[j].name, arg.val)) + k = j; + + if (k >= 0) + iface = ifaces[k].iface(); + else + die("Error: Unrecognized argument (%s) to --codec\n", + arg.val); + } else if (arg_match(&arg, &outputfile, argi)) + outfile_pattern = arg.val; + else if (arg_match(&arg, &use_yv12, argi)) { + use_y4m = 0; + flipuv = 1; + } else if (arg_match(&arg, &use_i420, argi)) { + use_y4m = 0; + flipuv = 0; + } else if (arg_match(&arg, &flipuvarg, argi)) + flipuv = 1; + else if (arg_match(&arg, &noblitarg, argi)) + noblit = 1; + else if (arg_match(&arg, &progressarg, argi)) + progress = 1; + else if (arg_match(&arg, &limitarg, argi)) + stop_after = arg_parse_uint(&arg); + else if (arg_match(&arg, &skiparg, argi)) + arg_skip = arg_parse_uint(&arg); + else if (arg_match(&arg, &postprocarg, argi)) + postproc = 1; + else if (arg_match(&arg, &md5arg, argi)) + do_md5 = 1; + else if (arg_match(&arg, &summaryarg, argi)) + summary = 1; + else if (arg_match(&arg, &threadsarg, argi)) + cfg.threads = arg_parse_uint(&arg); + else if (arg_match(&arg, &verbosearg, argi)) + quiet = 0; + - #if CONFIG_VP9_DECODER ++#if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER + else if (arg_match(&arg, &addnoise_level, argi)) { + postproc = 1; + vp8_pp_cfg.post_proc_flag |= VP8_ADDNOISE; + vp8_pp_cfg.noise_level = arg_parse_uint(&arg); + } else if (arg_match(&arg, &demacroblock_level, argi)) { + postproc = 1; + vp8_pp_cfg.post_proc_flag |= VP8_DEMACROBLOCK; + vp8_pp_cfg.deblocking_level = arg_parse_uint(&arg); + } else if (arg_match(&arg, &deblock, argi)) { + postproc = 1; + vp8_pp_cfg.post_proc_flag |= VP8_DEBLOCK; + } else if (arg_match(&arg, &mfqe, argi)) { + postproc = 1; + vp8_pp_cfg.post_proc_flag |= VP8_MFQE; + } else if (arg_match(&arg, &pp_debug_info, argi)) { + unsigned int level = arg_parse_uint(&arg); + + postproc = 1; + vp8_pp_cfg.post_proc_flag &= ~0x7; + + if (level) + vp8_pp_cfg.post_proc_flag |= level; + } else if (arg_match(&arg, &pp_disp_ref_frame, argi)) { + unsigned int flags = arg_parse_int(&arg); + if (flags) { + postproc = 1; + vp8_dbg_color_ref_frame = flags; + } + } else if (arg_match(&arg, &pp_disp_mb_modes, argi)) { + unsigned int flags = arg_parse_int(&arg); + if (flags) { + postproc = 1; + vp8_dbg_color_mb_modes = flags; + } + } else if (arg_match(&arg, &pp_disp_b_modes, argi)) { + unsigned int flags = arg_parse_int(&arg); + if (flags) { + postproc = 1; + vp8_dbg_color_b_modes = flags; + } + } else if (arg_match(&arg, &pp_disp_mvs, argi)) { + unsigned int flags = arg_parse_int(&arg); + if (flags) { + postproc = 1; + vp8_dbg_display_mv = flags; + } + } else if (arg_match(&arg, &error_concealment, argi)) { + ec_enabled = 1; + } #endif - else - argj++; - } + else + argj++; + } - /* Check for unrecognized options */ - for (argi = argv; *argi; argi++) - if (argi[0][0] == '-' && strlen(argi[0]) > 1) - die("Error: Unrecognized option %s\n", *argi); + /* Check for unrecognized options */ + for (argi = argv; *argi; argi++) + if (argi[0][0] == '-' && strlen(argi[0]) > 1) + die("Error: Unrecognized option %s\n", *argi); - /* Handle non-option arguments */ - fn = argv[0]; + /* Handle non-option arguments */ + fn = argv[0]; - if (!fn) - usage_exit(); + if (!fn) + usage_exit(); - /* Open file */ - infile = strcmp(fn, "-") ? fopen(fn, "rb") : set_binary_mode(stdin); + /* Open file */ + infile = strcmp(fn, "-") ? fopen(fn, "rb") : set_binary_mode(stdin); - if (!infile) - { - fprintf(stderr, "Failed to open file '%s'", - strcmp(fn, "-") ? fn : "stdin"); - return EXIT_FAILURE; - } + if (!infile) { + fprintf(stderr, "Failed to open file '%s'", + strcmp(fn, "-") ? fn : "stdin"); + return EXIT_FAILURE; + } #if CONFIG_OS_SUPPORT - /* Make sure we don't dump to the terminal, unless forced to with -o - */ - if(!outfile_pattern && isatty(fileno(stdout)) && !do_md5 && !noblit) - { - fprintf(stderr, - "Not dumping raw video to your terminal. Use '-o -' to " - "override.\n"); - return EXIT_FAILURE; - } + /* Make sure we don't dump to the terminal, unless forced to with -o - */ + if (!outfile_pattern && isatty(fileno(stdout)) && !do_md5 && !noblit) { + fprintf(stderr, + "Not dumping raw video to your terminal. Use '-o -' to " + "override.\n"); + return EXIT_FAILURE; + } #endif - input.infile = infile; - if(file_is_ivf(infile, &fourcc, &width, &height, &fps_den, - &fps_num)) - input.kind = IVF_FILE; - else if(file_is_webm(&input, &fourcc, &width, &height, &fps_den, &fps_num)) - input.kind = WEBM_FILE; - else if(file_is_raw(infile, &fourcc, &width, &height, &fps_den, &fps_num)) - input.kind = RAW_FILE; - else - { - fprintf(stderr, "Unrecognized input file type.\n"); - return EXIT_FAILURE; + input.infile = infile; + if (file_is_ivf(infile, &fourcc, &width, &height, &fps_den, + &fps_num)) + input.kind = IVF_FILE; + else if (file_is_webm(&input, &fourcc, &width, &height, &fps_den, &fps_num)) + input.kind = WEBM_FILE; + else if (file_is_raw(infile, &fourcc, &width, &height, &fps_den, &fps_num)) + input.kind = RAW_FILE; + else { + fprintf(stderr, "Unrecognized input file type.\n"); + return EXIT_FAILURE; + } + + /* If the output file is not set or doesn't have a sequence number in + * it, then we only open it once. + */ + outfile_pattern = outfile_pattern ? outfile_pattern : "-"; + single_file = 1; + { + const char *p = outfile_pattern; + do { + p = strchr(p, '%'); + if (p && p[1] >= '1' && p[1] <= '9') { + /* pattern contains sequence number, so it's not unique. */ + single_file = 0; + break; + } + if (p) + p++; + } while (p); + } + + if (single_file && !noblit) { + generate_filename(outfile_pattern, outfile, sizeof(outfile) - 1, + width, height, 0); + out = out_open(outfile, do_md5); + } + + if (use_y4m && !noblit) { + char buffer[128]; + if (!single_file) { + fprintf(stderr, "YUV4MPEG2 not supported with output patterns," + " try --i420 or --yv12.\n"); + return EXIT_FAILURE; } - /* If the output file is not set or doesn't have a sequence number in - * it, then we only open it once. - */ - outfile_pattern = outfile_pattern ? outfile_pattern : "-"; - single_file = 1; - { - const char *p = outfile_pattern; - do - { - p = strchr(p, '%'); - if(p && p[1] >= '1' && p[1] <= '9') - { - /* pattern contains sequence number, so it's not unique. */ - single_file = 0; - break; - } - if(p) - p++; - } while(p); + if (input.kind == WEBM_FILE) + if (webm_guess_framerate(&input, &fps_den, &fps_num)) { + fprintf(stderr, "Failed to guess framerate -- error parsing " + "webm file?\n"); + return EXIT_FAILURE; + } + + + /*Note: We can't output an aspect ratio here because IVF doesn't + store one, and neither does VP8. + That will have to wait until these tools support WebM natively.*/ + sprintf(buffer, "YUV4MPEG2 C%s W%u H%u F%u:%u I%c\n", + "420jpeg", width, height, fps_num, fps_den, 'p'); + out_put(out, (unsigned char *)buffer, + (unsigned int)strlen(buffer), do_md5); + } + + /* Try to determine the codec from the fourcc. */ + for (i = 0; i < sizeof(ifaces) / sizeof(ifaces[0]); i++) + if ((fourcc & ifaces[i].fourcc_mask) == ifaces[i].fourcc) { + vpx_codec_iface_t *ivf_iface = ifaces[i].iface(); + + if (iface && iface != ivf_iface) + fprintf(stderr, "Notice -- IVF header indicates codec: %s\n", + ifaces[i].name); + else + iface = ivf_iface; + + break; } - if(single_file && !noblit) - { - generate_filename(outfile_pattern, outfile, sizeof(outfile)-1, - width, height, 0); - out = out_open(outfile, do_md5); - } + dec_flags = (postproc ? VPX_CODEC_USE_POSTPROC : 0) | + (ec_enabled ? VPX_CODEC_USE_ERROR_CONCEALMENT : 0); + if (vpx_codec_dec_init(&decoder, iface ? iface : ifaces[0].iface(), &cfg, + dec_flags)) { + fprintf(stderr, "Failed to initialize decoder: %s\n", vpx_codec_error(&decoder)); + return EXIT_FAILURE; + } + + if (!quiet) + fprintf(stderr, "%s\n", decoder.name); + - #if CONFIG_VP9_DECODER ++#if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER + + if (vp8_pp_cfg.post_proc_flag + && vpx_codec_control(&decoder, VP8_SET_POSTPROC, &vp8_pp_cfg)) { + fprintf(stderr, "Failed to configure postproc: %s\n", vpx_codec_error(&decoder)); + return EXIT_FAILURE; + } + + if (vp8_dbg_color_ref_frame + && vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_REF_FRAME, vp8_dbg_color_ref_frame)) { + fprintf(stderr, "Failed to configure reference block visualizer: %s\n", vpx_codec_error(&decoder)); + return EXIT_FAILURE; + } + + if (vp8_dbg_color_mb_modes + && vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_MB_MODES, vp8_dbg_color_mb_modes)) { + fprintf(stderr, "Failed to configure macro block visualizer: %s\n", vpx_codec_error(&decoder)); + return EXIT_FAILURE; + } + + if (vp8_dbg_color_b_modes + && vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_B_MODES, vp8_dbg_color_b_modes)) { + fprintf(stderr, "Failed to configure block visualizer: %s\n", vpx_codec_error(&decoder)); + return EXIT_FAILURE; + } + + if (vp8_dbg_display_mv + && vpx_codec_control(&decoder, VP8_SET_DBG_DISPLAY_MV, vp8_dbg_display_mv)) { + fprintf(stderr, "Failed to configure motion vector visualizer: %s\n", vpx_codec_error(&decoder)); + return EXIT_FAILURE; + } +#endif - if (use_y4m && !noblit) - { - char buffer[128]; - if (!single_file) - { - fprintf(stderr, "YUV4MPEG2 not supported with output patterns," - " try --i420 or --yv12.\n"); - return EXIT_FAILURE; - } - if(input.kind == WEBM_FILE) - if(webm_guess_framerate(&input, &fps_den, &fps_num)) - { - fprintf(stderr, "Failed to guess framerate -- error parsing " - "webm file?\n"); - return EXIT_FAILURE; - } - - - /*Note: We can't output an aspect ratio here because IVF doesn't - store one, and neither does VP8. - That will have to wait until these tools support WebM natively.*/ - sprintf(buffer, "YUV4MPEG2 C%s W%u H%u F%u:%u I%c\n", - "420jpeg", width, height, fps_num, fps_den, 'p'); - out_put(out, (unsigned char *)buffer, - (unsigned int)strlen(buffer), do_md5); - } + if(arg_skip) + fprintf(stderr, "Skiping first %d frames.\n", arg_skip); + while (arg_skip) { + if (read_frame(&input, &buf, &buf_sz, &buf_alloc_sz)) + break; + arg_skip--; + } - /* Try to determine the codec from the fourcc. */ - for (i = 0; i < sizeof(ifaces) / sizeof(ifaces[0]); i++) - if ((fourcc & ifaces[i].fourcc_mask) == ifaces[i].fourcc) - { - vpx_codec_iface_t *ivf_iface = ifaces[i].iface; + /* Decode file */ + while (!read_frame(&input, &buf, &buf_sz, &buf_alloc_sz)) { + vpx_codec_iter_t iter = NULL; + vpx_image_t *img; + struct vpx_usec_timer timer; + int corrupted; - if (iface && iface != ivf_iface) - fprintf(stderr, "Notice -- IVF header indicates codec: %s\n", - ifaces[i].name); - else - iface = ivf_iface; + vpx_usec_timer_start(&timer); - break; - } + if (vpx_codec_decode(&decoder, buf, (unsigned int)buf_sz, NULL, 0)) { + const char *detail = vpx_codec_error_detail(&decoder); + fprintf(stderr, "Failed to decode frame: %s\n", vpx_codec_error(&decoder)); - dec_flags = (postproc ? VPX_CODEC_USE_POSTPROC : 0) | - (ec_enabled ? VPX_CODEC_USE_ERROR_CONCEALMENT : 0); - if (vpx_codec_dec_init(&decoder, iface ? iface : ifaces[0].iface, &cfg, - dec_flags)) - { - fprintf(stderr, "Failed to initialize decoder: %s\n", vpx_codec_error(&decoder)); - return EXIT_FAILURE; + if (detail) + fprintf(stderr, " Additional information: %s\n", detail); + + goto fail; } - if (!quiet) - fprintf(stderr, "%s\n", decoder.name); + vpx_usec_timer_mark(&timer); + dx_time += (unsigned int)vpx_usec_timer_elapsed(&timer); -#if CONFIG_VP8_DECODER + ++frame_in; - if (vp8_pp_cfg.post_proc_flag - && vpx_codec_control(&decoder, VP8_SET_POSTPROC, &vp8_pp_cfg)) - { - fprintf(stderr, "Failed to configure postproc: %s\n", vpx_codec_error(&decoder)); - return EXIT_FAILURE; + if (vpx_codec_control(&decoder, VP8D_GET_FRAME_CORRUPTED, &corrupted)) { + fprintf(stderr, "Failed VP8_GET_FRAME_CORRUPTED: %s\n", + vpx_codec_error(&decoder)); + goto fail; } + frames_corrupted += corrupted; - if (vp8_dbg_color_ref_frame - && vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_REF_FRAME, vp8_dbg_color_ref_frame)) - { - fprintf(stderr, "Failed to configure reference block visualizer: %s\n", vpx_codec_error(&decoder)); - return EXIT_FAILURE; - } + vpx_usec_timer_start(&timer); - if (vp8_dbg_color_mb_modes - && vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_MB_MODES, vp8_dbg_color_mb_modes)) - { - fprintf(stderr, "Failed to configure macro block visualizer: %s\n", vpx_codec_error(&decoder)); - return EXIT_FAILURE; - } + if ((img = vpx_codec_get_frame(&decoder, &iter))) + ++frame_out; - if (vp8_dbg_color_b_modes - && vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_B_MODES, vp8_dbg_color_b_modes)) - { - fprintf(stderr, "Failed to configure block visualizer: %s\n", vpx_codec_error(&decoder)); - return EXIT_FAILURE; - } + vpx_usec_timer_mark(&timer); + dx_time += (unsigned int)vpx_usec_timer_elapsed(&timer); - if (vp8_dbg_display_mv - && vpx_codec_control(&decoder, VP8_SET_DBG_DISPLAY_MV, vp8_dbg_display_mv)) - { - fprintf(stderr, "Failed to configure motion vector visualizer: %s\n", vpx_codec_error(&decoder)); - return EXIT_FAILURE; - } -#endif + if (progress) + show_progress(frame_in, frame_out, dx_time); - /* Decode file */ - while (!read_frame(&input, &buf, &buf_sz, &buf_alloc_sz)) - { - vpx_codec_iter_t iter = NULL; - vpx_image_t *img; - struct vpx_usec_timer timer; - int corrupted; + if (!noblit) { + if (img) { + unsigned int y; + char out_fn[PATH_MAX]; + uint8_t *buf; - vpx_usec_timer_start(&timer); + if (!single_file) { + size_t len = sizeof(out_fn) - 1; - if (vpx_codec_decode(&decoder, buf, (unsigned int)buf_sz, NULL, 0)) - { - const char *detail = vpx_codec_error_detail(&decoder); - fprintf(stderr, "Failed to decode frame: %s\n", vpx_codec_error(&decoder)); + out_fn[len] = '\0'; + generate_filename(outfile_pattern, out_fn, len - 1, + img->d_w, img->d_h, frame_in); + out = out_open(out_fn, do_md5); + } else if (use_y4m) + out_put(out, (unsigned char *)"FRAME\n", 6, do_md5); - if (detail) - fprintf(stderr, " Additional information: %s\n", detail); + buf = img->planes[VPX_PLANE_Y]; - goto fail; + for (y = 0; y < img->d_h; y++) { + out_put(out, buf, img->d_w, do_md5); + buf += img->stride[VPX_PLANE_Y]; } - vpx_usec_timer_mark(&timer); - dx_time += (unsigned int)vpx_usec_timer_elapsed(&timer); + buf = img->planes[flipuv ? VPX_PLANE_V : VPX_PLANE_U]; - ++frame_in; - - if (vpx_codec_control(&decoder, VP8D_GET_FRAME_CORRUPTED, &corrupted)) - { - fprintf(stderr, "Failed VP8_GET_FRAME_CORRUPTED: %s\n", - vpx_codec_error(&decoder)); - goto fail; + for (y = 0; y < (1 + img->d_h) / 2; y++) { + out_put(out, buf, (1 + img->d_w) / 2, do_md5); + buf += img->stride[VPX_PLANE_U]; } - frames_corrupted += corrupted; - - vpx_usec_timer_start(&timer); - - if ((img = vpx_codec_get_frame(&decoder, &iter))) - ++frame_out; - - vpx_usec_timer_mark(&timer); - dx_time += (unsigned int)vpx_usec_timer_elapsed(&timer); - - if (progress) - show_progress(frame_in, frame_out, dx_time); - - if (!noblit) - { - if (img) - { - unsigned int y; - char out_fn[PATH_MAX]; - uint8_t *buf; - - if (!single_file) - { - size_t len = sizeof(out_fn)-1; - - out_fn[len] = '\0'; - generate_filename(outfile_pattern, out_fn, len-1, - img->d_w, img->d_h, frame_in); - out = out_open(out_fn, do_md5); - } - else if(use_y4m) - out_put(out, (unsigned char *)"FRAME\n", 6, do_md5); - - buf = img->planes[VPX_PLANE_Y]; - - for (y = 0; y < img->d_h; y++) - { - out_put(out, buf, img->d_w, do_md5); - buf += img->stride[VPX_PLANE_Y]; - } - - buf = img->planes[flipuv?VPX_PLANE_V:VPX_PLANE_U]; - - for (y = 0; y < (1 + img->d_h) / 2; y++) - { - out_put(out, buf, (1 + img->d_w) / 2, do_md5); - buf += img->stride[VPX_PLANE_U]; - } - - buf = img->planes[flipuv?VPX_PLANE_U:VPX_PLANE_V]; - - for (y = 0; y < (1 + img->d_h) / 2; y++) - { - out_put(out, buf, (1 + img->d_w) / 2, do_md5); - buf += img->stride[VPX_PLANE_V]; - } - - if (!single_file) - out_close(out, out_fn, do_md5); - } + + buf = img->planes[flipuv ? VPX_PLANE_U : VPX_PLANE_V]; + + for (y = 0; y < (1 + img->d_h) / 2; y++) { + out_put(out, buf, (1 + img->d_w) / 2, do_md5); + buf += img->stride[VPX_PLANE_V]; } - if (stop_after && frame_in >= stop_after) - break; + if (!single_file) + out_close(out, out_fn, do_md5); + } } - if (summary || progress) - { - show_progress(frame_in, frame_out, dx_time); - fprintf(stderr, "\n"); - } + if (stop_after && frame_in >= stop_after) + break; + } - if (frames_corrupted) - fprintf(stderr, "WARNING: %d frames corrupted.\n",frames_corrupted); + if (summary || progress) { + show_progress(frame_in, frame_out, dx_time); + fprintf(stderr, "\n"); + } + + if (frames_corrupted) + fprintf(stderr, "WARNING: %d frames corrupted.\n", frames_corrupted); fail: diff --cc vpxenc.c index 484eb16c3,7449e6c6c..6688231d7 --- a/vpxenc.c +++ b/vpxenc.c @@@ -31,14 -32,7 +31,14 @@@ #include #include #endif + - #if CONFIG_VP9_ENCODER ++#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER #include "vpx/vp8cx.h" +#endif - #if CONFIG_VP9_DECODER ++#if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER +#include "vpx/vp8dx.h" +#endif + #include "vpx_ports/mem_ops.h" #include "vpx_ports/vpx_timer.h" #include "tools_common.h" @@@ -86,17 -82,15 +86,21 @@@ static size_t wrap_fwrite(const void *p static const char *exec_name; -static const struct codec_item -{ - char const *name; - vpx_codec_iface_t *iface; - unsigned int fourcc; -} codecs[] = -{ -#if CONFIG_VP8_ENCODER - {"vp8", &vpx_codec_vp8_cx_algo, 0x30385056}, +static const struct codec_item { + char const *name; + const vpx_codec_iface_t *(*iface)(void); + const vpx_codec_iface_t *(*dx_iface)(void); + unsigned int fourcc; +} codecs[] = { ++#if CONFIG_VP8_ENCODER && CONFIG_VP8_DECODER ++ {"vp8", &vpx_codec_vp8_cx, &vpx_codec_vp8_dx, 0x30385056}, ++#elif CONFIG_VP9_ENCODER && !CONFIG_VP9_DECODER ++ {"vp8", &vpx_codec_vp8_cx, NULL, 0x30385056}, ++#endif +#if CONFIG_VP9_ENCODER && CONFIG_VP9_DECODER + {"vp9", &vpx_codec_vp8_cx, &vpx_codec_vp8_dx, 0x30385056}, - #endif - #if CONFIG_VP9_ENCODER && !CONFIG_VP9_DECODER ++#elif CONFIG_VP9_ENCODER && !CONFIG_VP9_DECODER + {"vp9", &vpx_codec_vp8_cx, NULL, 0x30385056}, #endif }; @@@ -1054,104 -1113,96 +1058,104 @@@ static const arg_def_t *kf_args[] = }; - #if CONFIG_VP9_ENCODER -#if CONFIG_VP8_ENCODER ++#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER static const arg_def_t noise_sens = ARG_DEF(NULL, "noise-sensitivity", 1, - "Noise sensitivity (frames to blur)"); + "Noise sensitivity (frames to blur)"); static const arg_def_t sharpness = ARG_DEF(NULL, "sharpness", 1, - "Filter sharpness (0-7)"); + "Filter sharpness (0-7)"); static const arg_def_t static_thresh = ARG_DEF(NULL, "static-thresh", 1, - "Motion detection threshold"); + "Motion detection threshold"); #endif - #if CONFIG_VP9_ENCODER -#if CONFIG_VP8_ENCODER ++#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER static const arg_def_t cpu_used = ARG_DEF(NULL, "cpu-used", 1, - "CPU Used (-16..16)"); + "CPU Used (-16..16)"); #endif - #if CONFIG_VP9_ENCODER -#if CONFIG_VP8_ENCODER ++#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER static const arg_def_t token_parts = ARG_DEF(NULL, "token-parts", 1, - "Number of token partitions to use, log2"); + "Number of token partitions to use, log2"); static const arg_def_t auto_altref = ARG_DEF(NULL, "auto-alt-ref", 1, - "Enable automatic alt reference frames"); + "Enable automatic alt reference frames"); static const arg_def_t arnr_maxframes = ARG_DEF(NULL, "arnr-maxframes", 1, - "AltRef Max Frames"); + "AltRef Max Frames"); static const arg_def_t arnr_strength = ARG_DEF(NULL, "arnr-strength", 1, - "AltRef Strength"); + "AltRef Strength"); static const arg_def_t arnr_type = ARG_DEF(NULL, "arnr-type", 1, - "AltRef Type"); + "AltRef Type"); static const struct arg_enum_list tuning_enum[] = { - {"psnr", VP8_TUNE_PSNR}, - {"ssim", VP8_TUNE_SSIM}, - {NULL, 0} + {"psnr", VP8_TUNE_PSNR}, + {"ssim", VP8_TUNE_SSIM}, + {NULL, 0} }; static const arg_def_t tune_ssim = ARG_DEF_ENUM(NULL, "tune", 1, - "Material to favor", tuning_enum); + "Material to favor", tuning_enum); static const arg_def_t cq_level = ARG_DEF(NULL, "cq-level", 1, - "Constrained Quality Level"); + "Constrained Quality Level"); static const arg_def_t max_intra_rate_pct = ARG_DEF(NULL, "max-intra-rate", 1, - "Max I-frame bitrate (pct)"); + "Max I-frame bitrate (pct)"); +#if CONFIG_LOSSLESS +static const arg_def_t lossless = ARG_DEF(NULL, "lossless", 1, "Lossless mode"); +#endif -static const arg_def_t *vp8_args[] = -{ - &cpu_used, &auto_altref, &noise_sens, &sharpness, &static_thresh, - &token_parts, &arnr_maxframes, &arnr_strength, &arnr_type, - &tune_ssim, &cq_level, &max_intra_rate_pct, NULL +static const arg_def_t *vp8_args[] = { + &cpu_used, &auto_altref, &noise_sens, &sharpness, &static_thresh, + &token_parts, &arnr_maxframes, &arnr_strength, &arnr_type, + &tune_ssim, &cq_level, &max_intra_rate_pct, +#if CONFIG_LOSSLESS + &lossless, +#endif + NULL }; -static const int vp8_arg_ctrl_map[] = -{ - VP8E_SET_CPUUSED, VP8E_SET_ENABLEAUTOALTREF, - VP8E_SET_NOISE_SENSITIVITY, VP8E_SET_SHARPNESS, VP8E_SET_STATIC_THRESHOLD, - VP8E_SET_TOKEN_PARTITIONS, - VP8E_SET_ARNR_MAXFRAMES, VP8E_SET_ARNR_STRENGTH , VP8E_SET_ARNR_TYPE, - VP8E_SET_TUNING, VP8E_SET_CQ_LEVEL, VP8E_SET_MAX_INTRA_BITRATE_PCT, 0 +static const int vp8_arg_ctrl_map[] = { + VP8E_SET_CPUUSED, VP8E_SET_ENABLEAUTOALTREF, + VP8E_SET_NOISE_SENSITIVITY, VP8E_SET_SHARPNESS, VP8E_SET_STATIC_THRESHOLD, + VP8E_SET_TOKEN_PARTITIONS, + VP8E_SET_ARNR_MAXFRAMES, VP8E_SET_ARNR_STRENGTH, VP8E_SET_ARNR_TYPE, + VP8E_SET_TUNING, VP8E_SET_CQ_LEVEL, VP8E_SET_MAX_INTRA_BITRATE_PCT, +#if CONFIG_LOSSLESS + VP9E_SET_LOSSLESS, +#endif + 0 }; #endif static const arg_def_t *no_args[] = { NULL }; -static void usage_exit() -{ - int i; - - fprintf(stderr, "Usage: %s -o dst_filename src_filename \n", - exec_name); - - fprintf(stderr, "\nOptions:\n"); - arg_show_usage(stdout, main_args); - fprintf(stderr, "\nEncoder Global Options:\n"); - arg_show_usage(stdout, global_args); - fprintf(stderr, "\nRate Control Options:\n"); - arg_show_usage(stdout, rc_args); - fprintf(stderr, "\nTwopass Rate Control Options:\n"); - arg_show_usage(stdout, rc_twopass_args); - fprintf(stderr, "\nKeyframe Placement Options:\n"); - arg_show_usage(stdout, kf_args); -#if CONFIG_VP8_ENCODER - fprintf(stderr, "\nVP8 Specific Options:\n"); - arg_show_usage(stdout, vp8_args); +static void usage_exit() { + int i; + + fprintf(stderr, "Usage: %s -o dst_filename src_filename \n", + exec_name); + + fprintf(stderr, "\nOptions:\n"); + arg_show_usage(stdout, main_args); + fprintf(stderr, "\nEncoder Global Options:\n"); + arg_show_usage(stdout, global_args); + fprintf(stderr, "\nRate Control Options:\n"); + arg_show_usage(stdout, rc_args); + fprintf(stderr, "\nTwopass Rate Control Options:\n"); + arg_show_usage(stdout, rc_twopass_args); + fprintf(stderr, "\nKeyframe Placement Options:\n"); + arg_show_usage(stdout, kf_args); - #if CONFIG_VP9_ENCODER ++#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER + fprintf(stderr, "\nVP8 Specific Options:\n"); + arg_show_usage(stdout, vp8_args); #endif - fprintf(stderr, "\nStream timebase (--timebase):\n" - " The desired precision of timestamps in the output, expressed\n" - " in fractional seconds. Default is 1/1000.\n"); - fprintf(stderr, "\n" - "Included encoders:\n" - "\n"); - - for (i = 0; i < sizeof(codecs) / sizeof(codecs[0]); i++) - fprintf(stderr, " %-6s - %s\n", - codecs[i].name, - vpx_codec_iface_name(codecs[i].iface)); - - exit(EXIT_FAILURE); + fprintf(stderr, "\nStream timebase (--timebase):\n" + " The desired precision of timestamps in the output, expressed\n" + " in fractional seconds. Default is 1/1000.\n"); + fprintf(stderr, "\n" + "Included encoders:\n" + "\n"); + + for (i = 0; i < sizeof(codecs) / sizeof(codecs[0]); i++) + fprintf(stderr, " %-6s - %s\n", + codecs[i].name, + vpx_codec_iface_name(codecs[i].iface())); + + exit(EXIT_FAILURE); } @@@ -1725,139 -1790,159 +1729,139 @@@ static struct stream_state *new_stream( static int parse_stream_params(struct global_config *global, struct stream_state *stream, - char **argv) -{ - char **argi, **argj; - struct arg arg; - static const arg_def_t **ctrl_args = no_args; - static const int *ctrl_args_map = NULL; - struct stream_config *config = &stream->config; - int eos_mark_found = 0; - - /* Handle codec specific options */ - if (global->codec->iface == &vpx_codec_vp8_cx_algo) - { - ctrl_args = vp8_args; - ctrl_args_map = vp8_arg_ctrl_map; + char **argv) { + char **argi, **argj; + struct arg arg; + static const arg_def_t **ctrl_args = no_args; + static const int *ctrl_args_map = NULL; + struct stream_config *config = &stream->config; + int eos_mark_found = 0; + + /* Handle codec specific options */ - if (global->codec->iface == vpx_codec_vp8x_cx) { ++ if (global->codec->iface == vpx_codec_vp8_cx) { + ctrl_args = vp8_args; + ctrl_args_map = vp8_arg_ctrl_map; + } + + for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) { + arg.argv_step = 1; + + /* Once we've found an end-of-stream marker (--) we want to continue + * shifting arguments but not consuming them. + */ + if (eos_mark_found) { + argj++; + continue; + } else if (!strcmp(*argj, "--")) { + eos_mark_found = 1; + continue; } - for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) - { - arg.argv_step = 1; - - /* Once we've found an end-of-stream marker (--) we want to continue - * shifting arguments but not consuming them. - */ - if (eos_mark_found) - { - argj++; - continue; - } - else if (!strcmp(*argj, "--")) - { - eos_mark_found = 1; - continue; - } - - if (0); - else if (arg_match(&arg, &outputfile, argi)) - config->out_fn = arg.val; - else if (arg_match(&arg, &fpf_name, argi)) - config->stats_fn = arg.val; - else if (arg_match(&arg, &use_ivf, argi)) - config->write_webm = 0; - else if (arg_match(&arg, &threads, argi)) - config->cfg.g_threads = arg_parse_uint(&arg); - else if (arg_match(&arg, &profile, argi)) - config->cfg.g_profile = arg_parse_uint(&arg); - else if (arg_match(&arg, &width, argi)) - config->cfg.g_w = arg_parse_uint(&arg); - else if (arg_match(&arg, &height, argi)) - config->cfg.g_h = arg_parse_uint(&arg); - else if (arg_match(&arg, &stereo_mode, argi)) - config->stereo_fmt = arg_parse_enum_or_int(&arg); - else if (arg_match(&arg, &timebase, argi)) - { - config->cfg.g_timebase = arg_parse_rational(&arg); - validate_positive_rational(arg.name, &config->cfg.g_timebase); - } - else if (arg_match(&arg, &error_resilient, argi)) - config->cfg.g_error_resilient = arg_parse_uint(&arg); - else if (arg_match(&arg, &lag_in_frames, argi)) - config->cfg.g_lag_in_frames = arg_parse_uint(&arg); - else if (arg_match(&arg, &dropframe_thresh, argi)) - config->cfg.rc_dropframe_thresh = arg_parse_uint(&arg); - else if (arg_match(&arg, &resize_allowed, argi)) - config->cfg.rc_resize_allowed = arg_parse_uint(&arg); - else if (arg_match(&arg, &resize_up_thresh, argi)) - config->cfg.rc_resize_up_thresh = arg_parse_uint(&arg); - else if (arg_match(&arg, &resize_down_thresh, argi)) - config->cfg.rc_resize_down_thresh = arg_parse_uint(&arg); - else if (arg_match(&arg, &end_usage, argi)) - config->cfg.rc_end_usage = arg_parse_enum_or_int(&arg); - else if (arg_match(&arg, &target_bitrate, argi)) - config->cfg.rc_target_bitrate = arg_parse_uint(&arg); - else if (arg_match(&arg, &min_quantizer, argi)) - config->cfg.rc_min_quantizer = arg_parse_uint(&arg); - else if (arg_match(&arg, &max_quantizer, argi)) - config->cfg.rc_max_quantizer = arg_parse_uint(&arg); - else if (arg_match(&arg, &undershoot_pct, argi)) - config->cfg.rc_undershoot_pct = arg_parse_uint(&arg); - else if (arg_match(&arg, &overshoot_pct, argi)) - config->cfg.rc_overshoot_pct = arg_parse_uint(&arg); - else if (arg_match(&arg, &buf_sz, argi)) - config->cfg.rc_buf_sz = arg_parse_uint(&arg); - else if (arg_match(&arg, &buf_initial_sz, argi)) - config->cfg.rc_buf_initial_sz = arg_parse_uint(&arg); - else if (arg_match(&arg, &buf_optimal_sz, argi)) - config->cfg.rc_buf_optimal_sz = arg_parse_uint(&arg); - else if (arg_match(&arg, &bias_pct, argi)) - { - config->cfg.rc_2pass_vbr_bias_pct = arg_parse_uint(&arg); - - if (global->passes < 2) - warn("option %s ignored in one-pass mode.\n", arg.name); - } - else if (arg_match(&arg, &minsection_pct, argi)) - { - config->cfg.rc_2pass_vbr_minsection_pct = arg_parse_uint(&arg); - - if (global->passes < 2) - warn("option %s ignored in one-pass mode.\n", arg.name); - } - else if (arg_match(&arg, &maxsection_pct, argi)) - { - config->cfg.rc_2pass_vbr_maxsection_pct = arg_parse_uint(&arg); + if (0); + else if (arg_match(&arg, &outputfile, argi)) + config->out_fn = arg.val; + else if (arg_match(&arg, &fpf_name, argi)) + config->stats_fn = arg.val; + else if (arg_match(&arg, &use_ivf, argi)) + config->write_webm = 0; + else if (arg_match(&arg, &threads, argi)) + config->cfg.g_threads = arg_parse_uint(&arg); + else if (arg_match(&arg, &profile, argi)) + config->cfg.g_profile = arg_parse_uint(&arg); + else if (arg_match(&arg, &width, argi)) + config->cfg.g_w = arg_parse_uint(&arg); + else if (arg_match(&arg, &height, argi)) + config->cfg.g_h = arg_parse_uint(&arg); + else if (arg_match(&arg, &stereo_mode, argi)) + config->stereo_fmt = arg_parse_enum_or_int(&arg); + else if (arg_match(&arg, &timebase, argi)) { + config->cfg.g_timebase = arg_parse_rational(&arg); + validate_positive_rational(arg.name, &config->cfg.g_timebase); + } else if (arg_match(&arg, &error_resilient, argi)) + config->cfg.g_error_resilient = arg_parse_uint(&arg); + else if (arg_match(&arg, &lag_in_frames, argi)) + config->cfg.g_lag_in_frames = arg_parse_uint(&arg); + else if (arg_match(&arg, &dropframe_thresh, argi)) + config->cfg.rc_dropframe_thresh = arg_parse_uint(&arg); + else if (arg_match(&arg, &resize_allowed, argi)) + config->cfg.rc_resize_allowed = arg_parse_uint(&arg); + else if (arg_match(&arg, &resize_up_thresh, argi)) + config->cfg.rc_resize_up_thresh = arg_parse_uint(&arg); + else if (arg_match(&arg, &resize_down_thresh, argi)) + config->cfg.rc_resize_down_thresh = arg_parse_uint(&arg); + else if (arg_match(&arg, &end_usage, argi)) + config->cfg.rc_end_usage = arg_parse_enum_or_int(&arg); + else if (arg_match(&arg, &target_bitrate, argi)) + config->cfg.rc_target_bitrate = arg_parse_uint(&arg); + else if (arg_match(&arg, &min_quantizer, argi)) + config->cfg.rc_min_quantizer = arg_parse_uint(&arg); + else if (arg_match(&arg, &max_quantizer, argi)) + config->cfg.rc_max_quantizer = arg_parse_uint(&arg); + else if (arg_match(&arg, &undershoot_pct, argi)) + config->cfg.rc_undershoot_pct = arg_parse_uint(&arg); + else if (arg_match(&arg, &overshoot_pct, argi)) + config->cfg.rc_overshoot_pct = arg_parse_uint(&arg); + else if (arg_match(&arg, &buf_sz, argi)) + config->cfg.rc_buf_sz = arg_parse_uint(&arg); + else if (arg_match(&arg, &buf_initial_sz, argi)) + config->cfg.rc_buf_initial_sz = arg_parse_uint(&arg); + else if (arg_match(&arg, &buf_optimal_sz, argi)) + config->cfg.rc_buf_optimal_sz = arg_parse_uint(&arg); + else if (arg_match(&arg, &bias_pct, argi)) { + config->cfg.rc_2pass_vbr_bias_pct = arg_parse_uint(&arg); + + if (global->passes < 2) + warn("option %s ignored in one-pass mode.\n", arg.name); + } else if (arg_match(&arg, &minsection_pct, argi)) { + config->cfg.rc_2pass_vbr_minsection_pct = arg_parse_uint(&arg); + + if (global->passes < 2) + warn("option %s ignored in one-pass mode.\n", arg.name); + } else if (arg_match(&arg, &maxsection_pct, argi)) { + config->cfg.rc_2pass_vbr_maxsection_pct = arg_parse_uint(&arg); + + if (global->passes < 2) + warn("option %s ignored in one-pass mode.\n", arg.name); + } else if (arg_match(&arg, &kf_min_dist, argi)) + config->cfg.kf_min_dist = arg_parse_uint(&arg); + else if (arg_match(&arg, &kf_max_dist, argi)) { + config->cfg.kf_max_dist = arg_parse_uint(&arg); + config->have_kf_max_dist = 1; + } else if (arg_match(&arg, &kf_disabled, argi)) + config->cfg.kf_mode = VPX_KF_DISABLED; + else { + int i, match = 0; + + for (i = 0; ctrl_args[i]; i++) { + if (arg_match(&arg, ctrl_args[i], argi)) { + int j; + match = 1; + + /* Point either to the next free element or the first + * instance of this control. + */ + for (j = 0; j < config->arg_ctrl_cnt; j++) + if (config->arg_ctrls[j][0] == ctrl_args_map[i]) + break; + + /* Update/insert */ + assert(j < ARG_CTRL_CNT_MAX); + if (j < ARG_CTRL_CNT_MAX) { + config->arg_ctrls[j][0] = ctrl_args_map[i]; + config->arg_ctrls[j][1] = arg_parse_enum_or_int(&arg); + if (j == config->arg_ctrl_cnt) + config->arg_ctrl_cnt++; + } - if (global->passes < 2) - warn("option %s ignored in one-pass mode.\n", arg.name); - } - else if (arg_match(&arg, &kf_min_dist, argi)) - config->cfg.kf_min_dist = arg_parse_uint(&arg); - else if (arg_match(&arg, &kf_max_dist, argi)) - { - config->cfg.kf_max_dist = arg_parse_uint(&arg); - config->have_kf_max_dist = 1; } - else if (arg_match(&arg, &kf_disabled, argi)) - config->cfg.kf_mode = VPX_KF_DISABLED; - else - { - int i, match = 0; - - for (i = 0; ctrl_args[i]; i++) - { - if (arg_match(&arg, ctrl_args[i], argi)) - { - int j; - match = 1; - - /* Point either to the next free element or the first - * instance of this control. - */ - for(j=0; jarg_ctrl_cnt; j++) - if(config->arg_ctrls[j][0] == ctrl_args_map[i]) - break; - - /* Update/insert */ - assert(j < ARG_CTRL_CNT_MAX); - if (j < ARG_CTRL_CNT_MAX) - { - config->arg_ctrls[j][0] = ctrl_args_map[i]; - config->arg_ctrls[j][1] = arg_parse_enum_or_int(&arg); - if(j == config->arg_ctrl_cnt) - config->arg_ctrl_cnt++; - } - - } - } + } - if (!match) - argj++; - } + if (!match) + argj++; } + } - return eos_mark_found; + return eos_mark_found; }