Merge commit 'refs/changes/09/809/1' of https://review.webmproject.org/p/libvpx

author Jim Bankoski <jimbankoski@google.com>

Tue, 26 Oct 2010 11:34:57 +0000 (07:34 -0400)

committer Jim Bankoski <jimbankoski@google.com>

Tue, 26 Oct 2010 11:34:57 +0000 (07:34 -0400)
author Jim Bankoski <jimbankoski@google.com>
Tue, 26 Oct 2010 11:34:57 +0000 (07:34 -0400)
committer Jim Bankoski <jimbankoski@google.com>
Tue, 26 Oct 2010 11:34:57 +0000 (07:34 -0400)
diff --git a/args.c b/args.c

index 5365e91203e032438a58462f416b375f0581cc51..782929022a7137aa2ae041beef2bfe5978178524 100644 (file)
--- a/args.c
+++ b/args.c
@@ -120,9 +120,13 @@ void arg_show_usage(FILE *fp, const struct arg_def *const *defs)
          char *long_val = def->has_val ? "=<arg>" : "";
  
          if (def->short_name && def->long_name)
-            snprintf(option_text, 37, "-%s%s, --%s%s",
-                     def->short_name, short_val,
+        {
+            char *comma = def->has_val ? "," : ",      ";
+
+            snprintf(option_text, 37, "-%s%s%s --%s%6s",
+                     def->short_name, short_val, comma,
                       def->long_name, long_val);
+        }
          else if (def->short_name)
              snprintf(option_text, 37, "-%s%s",
                       def->short_name, short_val);
diff --git a/build/make/Makefile b/build/make/Makefile

index 1ca747a2615bef08b6b23a09f1893dca210b772c..40fa6d50c46320b7fd358cde5262c9a28b0cd511 100755 (executable)
--- a/build/make/Makefile
+++ b/build/make/Makefile
@@ -65,7 +65,7 @@ endif
  BUILD_ROOT?=.
  VPATH=$(SRC_PATH_BARE)
  CFLAGS+=-I$(BUILD_PFX)$(BUILD_ROOT) -I$(SRC_PATH)
-ASFLAGS+=-I$(BUILD_PFX)$(BUILD_ROOT) -I$(SRC_PATH)
+ASFLAGS+=-I$(BUILD_PFX)$(BUILD_ROOT)/ -I$(SRC_PATH)/
  DIST_DIR?=dist
  HOSTCC?=gcc
  TGT_ISA:=$(word 1, $(subst -, ,$(TOOLCHAIN)))
diff --git a/build/make/configure.sh b/build/make/configure.sh

index e20f0d133a787a62d9707a60a2bfabf8f0c9e427..cdd55ebd851ec507af6b38156721e5fa564db3e9 100755 (executable)
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -532,6 +532,9 @@ process_common_toolchain() {
              *powerpc*)
                  tgt_isa=ppc32
                  ;;
+            *sparc*)
+                tgt_isa=sparc
+                ;;
          esac
  
          # detect tgt_os
@@ -551,6 +554,9 @@ process_common_toolchain() {
              *linux*|*bsd*)
                  tgt_os=linux
                  ;;
+            *solaris2.10)
+                tgt_os=solaris
+                ;;
          esac
  
          if [ -n "$tgt_isa" ] && [ -n "$tgt_os" ]; then
@@ -602,6 +608,13 @@ process_common_toolchain() {
              ;;
      esac
  
+    # Handle Solaris variants. Solaris 10 needs -lposix4
+    case ${toolchain} in
+        *-solaris-*)
+            add_extralibs -lposix4
+            ;;
+    esac
+
      # Process ARM architecture variants
      case ${toolchain} in
      arm*|iwmmxt*)
diff --git a/configure b/configure

index 0321e1abfaf34c3b5ba0285f871283dd119bfb66..39ef83ffa1327db765fbc02ff826cdbc60089efd 100755 (executable)
--- a/configure
+++ b/configure
@@ -101,6 +101,7 @@ all_platforms="${all_platforms} ppc32-linux-gcc"
  all_platforms="${all_platforms} ppc64-darwin8-gcc"
  all_platforms="${all_platforms} ppc64-darwin9-gcc"
  all_platforms="${all_platforms} ppc64-linux-gcc"
+all_platforms="${all_platforms} sparc-solaris-gcc"
  all_platforms="${all_platforms} x86-darwin8-gcc"
  all_platforms="${all_platforms} x86-darwin8-icc"
  all_platforms="${all_platforms} x86-darwin9-gcc"
diff --git a/examples.mk b/examples.mk

index 00ffc70378308fbff978b55e7bf1b5547ecc83f1..28fc6eaa9d81a54ea77c0545c8cf5977e9ccd2d0 100644 (file)
--- a/examples.mk
+++ b/examples.mk
@@ -12,19 +12,38 @@
  # List of examples to build. UTILS are files that are taken from the source
  # tree directly, and GEN_EXAMPLES are files that are created from the
  # examples folder.
-UTILS-$(CONFIG_DECODERS)    += ivfdec.c
-ivfdec.SRCS                 += md5_utils.c md5_utils.h
-ivfdec.SRCS                 += vpx_ports/vpx_timer.h
-ivfdec.SRCS                 += vpx/vpx_integer.h
-ivfdec.SRCS                 += args.c args.h vpx_ports/config.h
-ivfdec.GUID                  = BA5FE66F-38DD-E034-F542-B1578C5FB950
-ivfdec.DESCRIPTION           = Full featured decoder
-UTILS-$(CONFIG_ENCODERS)    += ivfenc.c
-ivfenc.SRCS                 += args.c args.h y4minput.c y4minput.h
-ivfenc.SRCS                 += vpx_ports/config.h vpx_ports/mem_ops.h
-ivfenc.SRCS                 += vpx_ports/mem_ops_aligned.h
-ivfenc.GUID                  = 548DEC74-7A15-4B2B-AFC3-AA102E7C25C1
-ivfenc.DESCRIPTION           = Full featured encoder
+UTILS-$(CONFIG_DECODERS)    += vpxdec.c
+vpxdec.SRCS                 += md5_utils.c md5_utils.h
+vpxdec.SRCS                 += vpx_ports/vpx_timer.h
+vpxdec.SRCS                 += vpx/vpx_integer.h
+vpxdec.SRCS                 += args.c args.h vpx_ports/config.h
+vpxdec.SRCS                 += nestegg/halloc/halloc.h
+vpxdec.SRCS                 += nestegg/halloc/src/align.h
+vpxdec.SRCS                 += nestegg/halloc/src/halloc.c
+vpxdec.SRCS                 += nestegg/halloc/src/hlist.h
+vpxdec.SRCS                 += nestegg/halloc/src/macros.h
+vpxdec.SRCS                 += nestegg/include/nestegg/nestegg.h
+vpxdec.SRCS                 += nestegg/src/nestegg.c
+vpxdec.GUID                  = BA5FE66F-38DD-E034-F542-B1578C5FB950
+vpxdec.DESCRIPTION           = Full featured decoder
+UTILS-$(CONFIG_ENCODERS)    += vpxenc.c
+vpxenc.SRCS                 += args.c args.h y4minput.c y4minput.h
+vpxenc.SRCS                 += vpx_ports/config.h vpx_ports/mem_ops.h
+vpxenc.SRCS                 += vpx_ports/mem_ops_aligned.h
+vpxenc.SRCS                 += libmkv/EbmlIDs.h
+vpxenc.SRCS                 += libmkv/EbmlWriter.c
+vpxenc.SRCS                 += libmkv/EbmlWriter.h
+vpxenc.GUID                  = 548DEC74-7A15-4B2B-AFC3-AA102E7C25C1
+vpxenc.DESCRIPTION           = Full featured encoder
+
+# Clean up old ivfenc, ivfdec binaries.
+ifeq ($(CONFIG_MSVS),yes)
+CLEAN-OBJS += $(foreach p,$(VS_PLATFORMS),$(p)/Release/ivfenc.exe)
+CLEAN-OBJS += $(foreach p,$(VS_PLATFORMS),$(p)/Release/ivfdec.exe)
+else
+CLEAN-OBJS += ivfenc{.c.o,.c.d,.dox,.exe}
+CLEAN-OBJS += ivfdec{.c.o,.c.d,.dox,.exe}
+endif
  
  # XMA example disabled for now, not used in VP8
  #UTILS-$(CONFIG_DECODERS)    += example_xma.c
diff --git a/ivfdec.c b/ivfdec.c

deleted file mode 100644 (file)

index 8065e70..0000000
--- a/ivfdec.c
+++ /dev/null
@@ -1,640 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-/* This is a simple program that reads ivf files and decodes them
- * using the new interface. Decoded frames are output as YV12 raw.
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdarg.h>
-#include <string.h>
-#define VPX_CODEC_DISABLE_COMPAT 1
-#include "vpx_config.h"
-#include "vpx/vpx_decoder.h"
-#include "vpx_ports/vpx_timer.h"
-#if CONFIG_VP8_DECODER
-#include "vpx/vp8dx.h"
-#endif
-#if CONFIG_MD5
-#include "md5_utils.h"
-#endif
-
-static const char *exec_name;
-
-static const struct
-{
-    char const *name;
-    const vpx_codec_iface_t *iface;
-    unsigned int             fourcc;
-    unsigned int             fourcc_mask;
-} ifaces[] =
-{
-#if CONFIG_VP8_DECODER
-    {"vp8",  &vpx_codec_vp8_dx_algo,   0x00385056, 0x00FFFFFF},
-#endif
-};
-
-#include "args.h"
-static const arg_def_t codecarg = ARG_DEF(NULL, "codec", 1,
-                                  "Codec to use");
-static const arg_def_t prefixarg = ARG_DEF("p", "prefix", 1,
-                                   "Prefix to use when saving frames");
-static const arg_def_t use_yv12 = ARG_DEF(NULL, "yv12", 0,
-                                  "Output file is YV12 ");
-static const arg_def_t use_i420 = ARG_DEF(NULL, "i420", 0,
-                                  "Output file is I420 (default)");
-static const arg_def_t flipuvarg = ARG_DEF(NULL, "flipuv", 0,
-                                   "Synonym for --yv12");
-static const arg_def_t noblitarg = ARG_DEF(NULL, "noblit", 0,
-                                   "Don't process the decoded frames");
-static const arg_def_t progressarg = ARG_DEF(NULL, "progress", 0,
-                                     "Show progress after each frame decodes");
-static const arg_def_t limitarg = ARG_DEF(NULL, "limit", 1,
-                                  "Stop decoding after n frames");
-static const arg_def_t postprocarg = ARG_DEF(NULL, "postproc", 0,
-                                     "Postprocess decoded frames");
-static const arg_def_t summaryarg = ARG_DEF(NULL, "summary", 0,
-                                    "Show timing summary");
-static const arg_def_t outputfile = ARG_DEF("o", "output", 1,
-                                    "Output raw yv12 file instead of images");
-static const arg_def_t usey4marg = ARG_DEF("y", "y4m", 0,
-                                    "Output file is YUV4MPEG2");
-static const arg_def_t threadsarg = ARG_DEF("t", "threads", 1,
-                                    "Max threads to use");
-static const arg_def_t quietarg = ARG_DEF("q", "quiet", 0,
-                                  "Suppress version string");
-
-#if CONFIG_MD5
-static const arg_def_t md5arg = ARG_DEF(NULL, "md5", 0,
-                                        "Compute the MD5 sum of the decoded frame");
-#endif
-static const arg_def_t *all_args[] =
-{
-    &codecarg, &prefixarg, &use_yv12, &use_i420, &flipuvarg, &noblitarg,
-    &progressarg, &limitarg, &postprocarg, &summaryarg, &outputfile,
-    &usey4marg, &threadsarg, &quietarg,
-#if CONFIG_MD5
-    &md5arg,
-#endif
-    NULL
-};
-
-#if CONFIG_VP8_DECODER
-static const arg_def_t addnoise_level = ARG_DEF(NULL, "noise-level", 1,
-                                        "Enable VP8 postproc add noise");
-static const arg_def_t deblock = ARG_DEF(NULL, "deblock", 0,
-                                 "Enable VP8 deblocking");
-static const arg_def_t demacroblock_level = ARG_DEF(NULL, "demacroblock-level", 1,
-        "Enable VP8 demacroblocking, w/ level");
-static const arg_def_t pp_debug_info = ARG_DEF(NULL, "pp-debug-info", 1,
-                                       "Enable VP8 visible debug info");
-
-
-static const arg_def_t *vp8_pp_args[] =
-{
-    &addnoise_level, &deblock, &demacroblock_level, &pp_debug_info,
-    NULL
-};
-#endif
-
-static void usage_exit()
-{
-    int i;
-
-    fprintf(stderr, "Usage: %s <options> filename\n\n"
-            "Options:\n", exec_name);
-    arg_show_usage(stderr, all_args);
-#if CONFIG_VP8_DECODER
-    fprintf(stderr, "\nVP8 Postprocessing Options:\n");
-    arg_show_usage(stderr, vp8_pp_args);
-#endif
-    fprintf(stderr, "\nIncluded decoders:\n\n");
-
-    for (i = 0; i < sizeof(ifaces) / sizeof(ifaces[0]); i++)
-        fprintf(stderr, "    %-6s - %s\n",
-                ifaces[i].name,
-                vpx_codec_iface_name(ifaces[i].iface));
-
-    exit(EXIT_FAILURE);
-}
-
-void die(const char *fmt, ...)
-{
-    va_list ap;
-    va_start(ap, fmt);
-    vfprintf(stderr, fmt, ap);
-    fprintf(stderr, "\n");
-    usage_exit();
-}
-
-static unsigned int mem_get_le16(const void *vmem)
-{
-    unsigned int  val;
-    const unsigned char *mem = (const unsigned char *)vmem;
-
-    val = mem[1] << 8;
-    val |= mem[0];
-    return val;
-}
-
-static unsigned int mem_get_le32(const void *vmem)
-{
-    unsigned int  val;
-    const unsigned char *mem = (const unsigned char *)vmem;
-
-    val = mem[3] << 24;
-    val |= mem[2] << 16;
-    val |= mem[1] << 8;
-    val |= mem[0];
-    return val;
-}
-
-#define IVF_FRAME_HDR_SZ (sizeof(uint32_t) + sizeof(uint64_t))
-#define RAW_FRAME_HDR_SZ (sizeof(uint32_t))
-static int read_frame(FILE                  *infile,
-                      uint8_t               **buf,
-                      uint32_t              *buf_sz,
-                      uint32_t              *buf_alloc_sz,
-                      int                    is_ivf)
-{
-    char     raw_hdr[IVF_FRAME_HDR_SZ];
-    uint32_t new_buf_sz;
-
-    /* For both the raw and ivf formats, the frame size is the first 4 bytes
-     * of the frame header. We just need to special case on the header
-     * size.
-     */
-    if (fread(raw_hdr, is_ivf ? IVF_FRAME_HDR_SZ : RAW_FRAME_HDR_SZ, 1,
-              infile) != 1)
-    {
-        if (!feof(infile))
-            fprintf(stderr, "Failed to read frame size\n");
-
-        new_buf_sz = 0;
-    }
-    else
-    {
-        new_buf_sz = mem_get_le32(raw_hdr);
-
-        if (new_buf_sz > 256 * 1024 * 1024)
-        {
-            fprintf(stderr, "Error: Read invalid frame size (%u)\n",
-                    new_buf_sz);
-            new_buf_sz = 0;
-        }
-
-        if (!is_ivf && new_buf_sz > 256 * 1024)
-            fprintf(stderr, "Warning: Read invalid frame size (%u)"
-                    " - not a raw file?\n", new_buf_sz);
-
-        if (new_buf_sz > *buf_alloc_sz)
-        {
-            uint8_t *new_buf = realloc(*buf, 2 * new_buf_sz);
-
-            if (new_buf)
-            {
-                *buf = new_buf;
-                *buf_alloc_sz = 2 * new_buf_sz;
-            }
-            else
-            {
-                fprintf(stderr, "Failed to allocate compressed data buffer\n");
-                new_buf_sz = 0;
-            }
-        }
-    }
-
-    *buf_sz = new_buf_sz;
-
-    if (*buf_sz)
-    {
-        if (fread(*buf, 1, *buf_sz, infile) != *buf_sz)
-        {
-            fprintf(stderr, "Failed to read full frame\n");
-            return 1;
-        }
-
-        return 0;
-    }
-
-    return 1;
-}
-
-void *out_open(const char *out_fn, int do_md5)
-{
-    void *out = NULL;
-
-    if (do_md5)
-    {
-#if CONFIG_MD5
-        MD5Context *md5_ctx = out = malloc(sizeof(MD5Context));
-        (void)out_fn;
-        MD5Init(md5_ctx);
-#endif
-    }
-    else
-    {
-        FILE *outfile = out = strcmp("-", out_fn) ? fopen(out_fn, "wb") : stdout;
-
-        if (!outfile)
-        {
-            fprintf(stderr, "Failed to output file");
-            exit(EXIT_FAILURE);
-        }
-    }
-
-    return out;
-}
-
-void out_put(void *out, const uint8_t *buf, unsigned int len, int do_md5)
-{
-    if (do_md5)
-    {
-#if CONFIG_MD5
-        MD5Update(out, buf, len);
-#endif
-    }
-    else
-    {
-        fwrite(buf, 1, len, out);
-    }
-}
-
-void out_close(void *out, const char *out_fn, int do_md5)
-{
-    if (do_md5)
-    {
-#if CONFIG_MD5
-        uint8_t md5[16];
-        int i;
-
-        MD5Final(md5, out);
-        free(out);
-
-        for (i = 0; i < 16; i++)
-            printf("%02x", md5[i]);
-
-        printf("  %s\n", out_fn);
-#endif
-    }
-    else
-    {
-        fclose(out);
-    }
-}
-
-unsigned int file_is_ivf(FILE *infile,
-                         unsigned int *fourcc,
-                         unsigned int *width,
-                         unsigned int *height,
-                         unsigned int *timebase_num,
-                         unsigned int *timebase_den)
-{
-    char raw_hdr[32];
-    int is_ivf = 0;
-
-    if (fread(raw_hdr, 1, 32, infile) == 32)
-    {
-        if (raw_hdr[0] == 'D' && raw_hdr[1] == 'K'
-            && raw_hdr[2] == 'I' && raw_hdr[3] == 'F')
-        {
-            is_ivf = 1;
-
-            if (mem_get_le16(raw_hdr + 4) != 0)
-                fprintf(stderr, "Error: Unrecognized IVF version! This file may not"
-                        " decode properly.");
-
-            *fourcc = mem_get_le32(raw_hdr + 8);
-            *width = mem_get_le16(raw_hdr + 12);
-            *height = mem_get_le16(raw_hdr + 14);
-            *timebase_den = mem_get_le32(raw_hdr + 16);
-            *timebase_num = mem_get_le32(raw_hdr + 20);
-        }
-    }
-
-    if (!is_ivf)
-        rewind(infile);
-
-    return is_ivf;
-}
-
-int main(int argc, const char **argv_)
-{
-    vpx_codec_ctx_t          decoder;
-    char                  *prefix = NULL, *fn = NULL;
-    int                    i;
-    uint8_t               *buf = NULL;
-    uint32_t               buf_sz = 0, buf_alloc_sz = 0;
-    FILE                  *infile;
-    int                    frame_in = 0, frame_out = 0, flipuv = 0, noblit = 0, do_md5 = 0, progress = 0;
-    int                    stop_after = 0, postproc = 0, summary = 0, quiet = 0;
-    vpx_codec_iface_t       *iface = NULL;
-    unsigned int           is_ivf, fourcc;
-    unsigned long          dx_time = 0;
-    struct arg               arg;
-    char                   **argv, **argi, **argj;
-    const char                   *fn2 = 0;
-    int                     use_y4m = 0;
-    unsigned int            width;
-    unsigned int            height;
-    unsigned int            timebase_num;
-    unsigned int            timebase_den;
-    void                   *out = NULL;
-    vpx_codec_dec_cfg_t     cfg = {0};
-#if CONFIG_VP8_DECODER
-    vp8_postproc_cfg_t      vp8_pp_cfg = {0};
-#endif
-
-    /* Parse command line */
-    exec_name = argv_[0];
-    argv = argv_dup(argc - 1, argv_ + 1);
-
-    for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step)
-    {
-        memset(&arg, 0, sizeof(arg));
-        arg.argv_step = 1;
-
-        if (arg_match(&arg, &codecarg, argi))
-        {
-            int j, k = -1;
-
-            for (j = 0; j < sizeof(ifaces) / sizeof(ifaces[0]); j++)
-                if (!strcmp(ifaces[j].name, arg.val))
-                    k = j;
-
-            if (k >= 0)
-                iface = ifaces[k].iface;
-            else
-                die("Error: Unrecognized argument (%s) to --codec\n",
-                    arg.val);
-        }
-        else if (arg_match(&arg, &outputfile, argi))
-            fn2 = arg.val;
-        else if (arg_match(&arg, &usey4marg, argi))
-            use_y4m = 1;
-        else if (arg_match(&arg, &prefixarg, argi))
-            prefix = strdup(arg.val);
-        else if (arg_match(&arg, &use_yv12, argi))
-            flipuv = 1;
-        else if (arg_match(&arg, &use_i420, argi))
-            flipuv = 0;
-        else if (arg_match(&arg, &flipuvarg, argi))
-            flipuv = 1;
-        else if (arg_match(&arg, &noblitarg, argi))
-            noblit = 1;
-        else if (arg_match(&arg, &progressarg, argi))
-            progress = 1;
-        else if (arg_match(&arg, &limitarg, argi))
-            stop_after = arg_parse_uint(&arg);
-        else if (arg_match(&arg, &postprocarg, argi))
-            postproc = 1;
-        else if (arg_match(&arg, &md5arg, argi))
-            do_md5 = 1;
-        else if (arg_match(&arg, &summaryarg, argi))
-            summary = 1;
-        else if (arg_match(&arg, &threadsarg, argi))
-            cfg.threads = arg_parse_uint(&arg);
-        else if (arg_match(&arg, &quietarg, argi))
-            quiet = 1;
-
-#if CONFIG_VP8_DECODER
-        else if (arg_match(&arg, &addnoise_level, argi))
-        {
-            postproc = 1;
-            vp8_pp_cfg.post_proc_flag |= VP8_ADDNOISE;
-            vp8_pp_cfg.noise_level = arg_parse_uint(&arg);
-        }
-        else if (arg_match(&arg, &demacroblock_level, argi))
-        {
-            postproc = 1;
-            vp8_pp_cfg.post_proc_flag |= VP8_DEMACROBLOCK;
-            vp8_pp_cfg.deblocking_level = arg_parse_uint(&arg);
-        }
-        else if (arg_match(&arg, &deblock, argi))
-        {
-            postproc = 1;
-            vp8_pp_cfg.post_proc_flag |= VP8_DEBLOCK;
-        }
-        else if (arg_match(&arg, &pp_debug_info, argi))
-        {
-            unsigned int level = arg_parse_uint(&arg);
-
-            postproc = 1;
-            vp8_pp_cfg.post_proc_flag &= ~0x7;
-
-            if (level)
-                vp8_pp_cfg.post_proc_flag |= 8 << (level - 1);
-        }
-
-#endif
-        else
-            argj++;
-    }
-
-    /* Check for unrecognized options */
-    for (argi = argv; *argi; argi++)
-        if (argi[0][0] == '-' && strlen(argi[0]) > 1)
-            die("Error: Unrecognized option %s\n", *argi);
-
-    /* Handle non-option arguments */
-    fn = argv[0];
-
-    if (!fn)
-        usage_exit();
-
-    if (!prefix)
-        prefix = strdup("img");
-
-    /* Open file */
-    infile = strcmp(fn, "-") ? fopen(fn, "rb") : stdin;
-
-    if (!infile)
-    {
-        fprintf(stderr, "Failed to open file");
-        return EXIT_FAILURE;
-    }
-
-    if (fn2)
-        out = out_open(fn2, do_md5);
-
-    is_ivf = file_is_ivf(infile, &fourcc, &width, &height,
-                         &timebase_num, &timebase_den);
-
-    if (is_ivf)
-    {
-        if (use_y4m)
-        {
-            char buffer[128];
-            if (!fn2)
-            {
-                fprintf(stderr, "YUV4MPEG2 output only supported with -o.\n");
-                return EXIT_FAILURE;
-            }
-            /*Correct for the factor of 2 applied to the timebase in the
-               encoder.*/
-            if(timebase_den&1)timebase_num<<=1;
-            else timebase_den>>=1;
-            /*Note: We can't output an aspect ratio here because IVF doesn't
-               store one, and neither does VP8.
-              That will have to wait until these tools support WebM natively.*/
-            sprintf(buffer, "YUV4MPEG2 C%s W%u H%u F%u:%u I%c\n",
-                    "420jpeg", width, height, timebase_den, timebase_num, 'p');
-            out_put(out, (unsigned char *)buffer, strlen(buffer), do_md5);
-        }
-
-        /* Try to determine the codec from the fourcc. */
-        for (i = 0; i < sizeof(ifaces) / sizeof(ifaces[0]); i++)
-            if ((fourcc & ifaces[i].fourcc_mask) == ifaces[i].fourcc)
-            {
-                vpx_codec_iface_t  *ivf_iface = ifaces[i].iface;
-
-                if (iface && iface != ivf_iface)
-                    fprintf(stderr, "Notice -- IVF header indicates codec: %s\n",
-                            ifaces[i].name);
-                else
-                    iface = ivf_iface;
-
-                break;
-            }
-    }
-    else if(use_y4m)
-    {
-        fprintf(stderr, "YUV4MPEG2 output only supported from IVF input.\n");
-        return EXIT_FAILURE;
-    }
-
-    if (vpx_codec_dec_init(&decoder, iface ? iface :  ifaces[0].iface, &cfg,
-                           postproc ? VPX_CODEC_USE_POSTPROC : 0))
-    {
-        fprintf(stderr, "Failed to initialize decoder: %s\n", vpx_codec_error(&decoder));
-        return EXIT_FAILURE;
-    }
-
-    if (!quiet)
-        fprintf(stderr, "%s\n", decoder.name);
-
-#if CONFIG_VP8_DECODER
-
-    if (vp8_pp_cfg.post_proc_flag
-        && vpx_codec_control(&decoder, VP8_SET_POSTPROC, &vp8_pp_cfg))
-    {
-        fprintf(stderr, "Failed to configure postproc: %s\n", vpx_codec_error(&decoder));
-        return EXIT_FAILURE;
-    }
-
-#endif
-
-    /* Decode file */
-    while (!read_frame(infile, &buf, &buf_sz, &buf_alloc_sz, is_ivf))
-    {
-        vpx_codec_iter_t  iter = NULL;
-        vpx_image_t    *img;
-        struct vpx_usec_timer timer;
-
-        vpx_usec_timer_start(&timer);
-
-        if (vpx_codec_decode(&decoder, buf, buf_sz, NULL, 0))
-        {
-            const char *detail = vpx_codec_error_detail(&decoder);
-            fprintf(stderr, "Failed to decode frame: %s\n", vpx_codec_error(&decoder));
-
-            if (detail)
-                fprintf(stderr, "  Additional information: %s\n", detail);
-
-            goto fail;
-        }
-
-        vpx_usec_timer_mark(&timer);
-        dx_time += vpx_usec_timer_elapsed(&timer);
-
-        ++frame_in;
-
-        if (progress)
-            fprintf(stderr, "decoded frame %d.\n", frame_in);
-
-        if ((img = vpx_codec_get_frame(&decoder, &iter)))
-            ++frame_out;
-
-        if (!noblit)
-        {
-            if (img)
-            {
-                unsigned int y;
-                char out_fn[128+24];
-                uint8_t *buf;
-                const char *sfx = flipuv ? "yv12" : "i420";
-
-                if (!fn2)
-                {
-                    sprintf(out_fn, "%s-%dx%d-%04d.%s",
-                            prefix, img->d_w, img->d_h, frame_in, sfx);
-                    out = out_open(out_fn, do_md5);
-                }
-                else if(use_y4m)
-                    out_put(out, (unsigned char *)"FRAME\n", 6, do_md5);
-
-                buf = img->planes[VPX_PLANE_Y];
-
-                for (y = 0; y < img->d_h; y++)
-                {
-                    out_put(out, buf, img->d_w, do_md5);
-                    buf += img->stride[VPX_PLANE_Y];
-                }
-
-                buf = img->planes[flipuv?VPX_PLANE_V:VPX_PLANE_U];
-
-                for (y = 0; y < (1 + img->d_h) / 2; y++)
-                {
-                    out_put(out, buf, (1 + img->d_w) / 2, do_md5);
-                    buf += img->stride[VPX_PLANE_U];
-                }
-
-                buf = img->planes[flipuv?VPX_PLANE_U:VPX_PLANE_V];
-
-                for (y = 0; y < (1 + img->d_h) / 2; y++)
-                {
-                    out_put(out, buf, (1 + img->d_w) / 2, do_md5);
-                    buf += img->stride[VPX_PLANE_V];
-                }
-
-                if (!fn2)
-                    out_close(out, out_fn, do_md5);
-            }
-        }
-
-        if (stop_after && frame_in >= stop_after)
-            break;
-    }
-
-    if (summary)
-    {
-        fprintf(stderr, "%d decoded frames/%d showed frames in %lu us (%.2f fps)\n",
-                frame_in, frame_out, dx_time, (float)frame_out * 1000000.0 / (float)dx_time);
-    }
-
-fail:
-
-    if (vpx_codec_destroy(&decoder))
-    {
-        fprintf(stderr, "Failed to destroy decoder: %s\n", vpx_codec_error(&decoder));
-        return EXIT_FAILURE;
-    }
-
-    if (fn2)
-        out_close(out, fn2, do_md5);
-
-    free(buf);
-    fclose(infile);
-    free(prefix);
-    free(argv);
-
-    return EXIT_SUCCESS;
-}
diff --git a/libmkv/EbmlBufferWriter.c b/libmkv/EbmlBufferWriter.c

new file mode 100644 (file)

index 0000000..d9b04a8
--- /dev/null
+++ b/libmkv/EbmlBufferWriter.c
@@ -0,0 +1,60 @@
+//#include <strmif.h>
+#include "EbmlBufferWriter.h"
+#include "EbmlWriter.h"
+//#include <cassert>
+//#include <limits>
+//#include <malloc.h>  //_alloca
+#include <stdlib.h>
+#include <wchar.h>
+#include <string.h>
+
+void Ebml_Write(EbmlGlobal *glob, const void *buffer_in, unsigned long len)
+{
+    unsigned char *src = glob->buf;
+    src += glob->offset;
+    memcpy(src, buffer_in, len);
+    glob->offset += len;
+}
+
+static void _Serialize(EbmlGlobal *glob, const unsigned char *p, const unsigned char *q)
+{
+    while (q != p)
+    {
+        --q;
+
+        unsigned long cbWritten;
+        memcpy(&(glob->buf[glob->offset]), q, 1);
+        glob->offset ++;
+    }
+}
+
+void Ebml_Serialize(EbmlGlobal *glob, const void *buffer_in, unsigned long len)
+{
+    //assert(buf);
+
+    const unsigned char *const p = (const unsigned char *)(buffer_in);
+    const unsigned char *const q = p + len;
+
+    _Serialize(glob, p, q);
+}
+
+
+void Ebml_StartSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc, unsigned long class_id)
+{
+    Ebml_WriteID(glob, class_id);
+    ebmlLoc->offset = glob->offset;
+    //todo this is always taking 8 bytes, this may need later optimization
+    unsigned long long unknownLen =  0x01FFFFFFFFFFFFFFLLU;
+    Ebml_Serialize(glob, (void *)&unknownLen, 8); //this is a key that says lenght unknown
+}
+
+void Ebml_EndSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc)
+{
+    unsigned long long size = glob->offset - ebmlLoc->offset - 8;
+    unsigned long long curOffset = glob->offset;
+    glob->offset = ebmlLoc->offset;
+    size |=  0x0100000000000000LLU;
+    Ebml_Serialize(glob, &size, 8);
+    glob->offset = curOffset;
+}
+
diff --git a/libmkv/EbmlBufferWriter.h b/libmkv/EbmlBufferWriter.h

new file mode 100644 (file)

index 0000000..ba0a9b3
--- /dev/null
+++ b/libmkv/EbmlBufferWriter.h
@@ -0,0 +1,21 @@
+#ifndef EBMLBUFFERWRITER_HPP
+#define EBMLBUFFERWRITER_HPP
+
+typedef struct
+{
+    unsigned long long offset;
+} EbmlLoc;
+
+typedef struct
+{
+    unsigned char *buf;
+    unsigned int length;
+    unsigned int offset;
+} EbmlGlobal;
+
+
+void Ebml_StartSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc, unsigned long class_id);
+void Ebml_EndSubElement(EbmlGlobal *glob,  EbmlLoc *ebmlLoc);
+
+
+#endif
diff --git a/libmkv/EbmlIDs.h b/libmkv/EbmlIDs.h

new file mode 100644 (file)

index 0000000..2719908
--- /dev/null
+++ b/libmkv/EbmlIDs.h
@@ -0,0 +1,231 @@
+// Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS.  All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+
+
+#ifndef MKV_DEFS_HPP
+#define MKV_DEFS_HPP 1
+
+//Commenting out values not available in webm, but available in matroska
+
+enum mkv
+{
+    EBML = 0x1A45DFA3,
+    EBMLVersion = 0x4286,
+    EBMLReadVersion = 0x42F7,
+    EBMLMaxIDLength = 0x42F2,
+    EBMLMaxSizeLength = 0x42F3,
+    DocType = 0x4282,
+    DocTypeVersion = 0x4287,
+    DocTypeReadVersion = 0x4285,
+//  CRC_32 = 0xBF,
+    Void = 0xEC,
+    SignatureSlot = 0x1B538667,
+    SignatureAlgo = 0x7E8A,
+    SignatureHash = 0x7E9A,
+    SignaturePublicKey = 0x7EA5,
+    Signature = 0x7EB5,
+    SignatureElements = 0x7E5B,
+    SignatureElementList = 0x7E7B,
+    SignedElement = 0x6532,
+    //segment
+    Segment = 0x18538067,
+    //Meta Seek Information
+    SeekHead = 0x114D9B74,
+    Seek = 0x4DBB,
+    SeekID = 0x53AB,
+    SeekPosition = 0x53AC,
+    //Segment Information
+    Info = 0x1549A966,
+//  SegmentUID = 0x73A4,
+//  SegmentFilename = 0x7384,
+//  PrevUID = 0x3CB923,
+//  PrevFilename = 0x3C83AB,
+//  NextUID = 0x3EB923,
+//  NextFilename = 0x3E83BB,
+//  SegmentFamily = 0x4444,
+//  ChapterTranslate = 0x6924,
+//  ChapterTranslateEditionUID = 0x69FC,
+//  ChapterTranslateCodec = 0x69BF,
+//  ChapterTranslateID = 0x69A5,
+    TimecodeScale = 0x2AD7B1,
+    Segment_Duration = 0x4489,
+    DateUTC = 0x4461,
+//  Title = 0x7BA9,
+    MuxingApp = 0x4D80,
+    WritingApp = 0x5741,
+    //Cluster
+    Cluster = 0x1F43B675,
+    Timecode = 0xE7,
+//  SilentTracks = 0x5854,
+//  SilentTrackNumber = 0x58D7,
+//  Position = 0xA7,
+    PrevSize = 0xAB,
+    BlockGroup = 0xA0,
+    Block = 0xA1,
+//  BlockVirtual = 0xA2,
+//  BlockAdditions = 0x75A1,
+//  BlockMore = 0xA6,
+//  BlockAddID = 0xEE,
+//  BlockAdditional = 0xA5,
+    BlockDuration = 0x9B,
+//  ReferencePriority = 0xFA,
+    ReferenceBlock = 0xFB,
+//  ReferenceVirtual = 0xFD,
+//  CodecState = 0xA4,
+//  Slices = 0x8E,
+//  TimeSlice = 0xE8,
+    LaceNumber = 0xCC,
+//  FrameNumber = 0xCD,
+//  BlockAdditionID = 0xCB,
+//  MkvDelay = 0xCE,
+//  Cluster_Duration = 0xCF,
+    SimpleBlock = 0xA3,
+//  EncryptedBlock = 0xAF,
+    //Track
+    Tracks = 0x1654AE6B,
+    TrackEntry = 0xAE,
+    TrackNumber = 0xD7,
+    TrackUID = 0x73C5,
+    TrackType = 0x83,
+    FlagEnabled = 0xB9,
+    FlagDefault = 0x88,
+    FlagForced = 0x55AA,
+    FlagLacing = 0x9C,
+//  MinCache = 0x6DE7,
+//  MaxCache = 0x6DF8,
+    DefaultDuration = 0x23E383,
+//  TrackTimecodeScale = 0x23314F,
+//  TrackOffset = 0x537F,
+//  MaxBlockAdditionID = 0x55EE,
+    Name = 0x536E,
+    Language = 0x22B59C,
+    CodecID = 0x86,
+    CodecPrivate = 0x63A2,
+    CodecName = 0x258688,
+//  AttachmentLink = 0x7446,
+//  CodecSettings = 0x3A9697,
+//  CodecInfoURL = 0x3B4040,
+//  CodecDownloadURL = 0x26B240,
+//  CodecDecodeAll = 0xAA,
+//  TrackOverlay = 0x6FAB,
+//  TrackTranslate = 0x6624,
+//  TrackTranslateEditionUID = 0x66FC,
+//  TrackTranslateCodec = 0x66BF,
+//  TrackTranslateTrackID = 0x66A5,
+    //video
+    Video = 0xE0,
+    FlagInterlaced = 0x9A,
+//  StereoMode = 0x53B8,
+    PixelWidth = 0xB0,
+    PixelHeight = 0xBA,
+    PixelCropBottom = 0x54AA,
+    PixelCropTop = 0x54BB,
+    PixelCropLeft = 0x54CC,
+    PixelCropRight = 0x54DD,
+    DisplayWidth = 0x54B0,
+    DisplayHeight = 0x54BA,
+    DisplayUnit = 0x54B2,
+    AspectRatioType = 0x54B3,
+//  ColourSpace = 0x2EB524,
+//  GammaValue = 0x2FB523,
+    FrameRate = 0x2383E3,
+    //end video
+    //audio
+    Audio = 0xE1,
+    SamplingFrequency = 0xB5,
+    OutputSamplingFrequency = 0x78B5,
+    Channels = 0x9F,
+//  ChannelPositions = 0x7D7B,
+    BitDepth = 0x6264,
+    //end audio
+    //content encoding
+//  ContentEncodings = 0x6d80,
+//  ContentEncoding = 0x6240,
+//  ContentEncodingOrder = 0x5031,
+//  ContentEncodingScope = 0x5032,
+//  ContentEncodingType = 0x5033,
+//  ContentCompression = 0x5034,
+//  ContentCompAlgo = 0x4254,
+//  ContentCompSettings = 0x4255,
+//  ContentEncryption = 0x5035,
+//  ContentEncAlgo = 0x47e1,
+//  ContentEncKeyID = 0x47e2,
+//  ContentSignature = 0x47e3,
+//  ContentSigKeyID = 0x47e4,
+//  ContentSigAlgo = 0x47e5,
+//  ContentSigHashAlgo = 0x47e6,
+    //end content encoding
+    //Cueing Data
+    Cues = 0x1C53BB6B,
+    CuePoint = 0xBB,
+    CueTime = 0xB3,
+    CueTrackPositions = 0xB7,
+    CueTrack = 0xF7,
+    CueClusterPosition = 0xF1,
+    CueBlockNumber = 0x5378,
+//  CueCodecState = 0xEA,
+//  CueReference = 0xDB,
+//  CueRefTime = 0x96,
+//  CueRefCluster = 0x97,
+//  CueRefNumber = 0x535F,
+//  CueRefCodecState = 0xEB,
+    //Attachment
+//  Attachments = 0x1941A469,
+//  AttachedFile = 0x61A7,
+//  FileDescription = 0x467E,
+//  FileName = 0x466E,
+//  FileMimeType = 0x4660,
+//  FileData = 0x465C,
+//  FileUID = 0x46AE,
+//  FileReferral = 0x4675,
+    //Chapters
+//  Chapters = 0x1043A770,
+//  EditionEntry = 0x45B9,
+//  EditionUID = 0x45BC,
+//  EditionFlagHidden = 0x45BD,
+//  EditionFlagDefault = 0x45DB,
+//  EditionFlagOrdered = 0x45DD,
+//  ChapterAtom = 0xB6,
+//  ChapterUID = 0x73C4,
+//  ChapterTimeStart = 0x91,
+//  ChapterTimeEnd = 0x92,
+//  ChapterFlagHidden = 0x98,
+//  ChapterFlagEnabled = 0x4598,
+//  ChapterSegmentUID = 0x6E67,
+//  ChapterSegmentEditionUID = 0x6EBC,
+//  ChapterPhysicalEquiv = 0x63C3,
+//  ChapterTrack = 0x8F,
+//  ChapterTrackNumber = 0x89,
+//  ChapterDisplay = 0x80,
+//  ChapString = 0x85,
+//  ChapLanguage = 0x437C,
+//  ChapCountry = 0x437E,
+//  ChapProcess = 0x6944,
+//  ChapProcessCodecID = 0x6955,
+//  ChapProcessPrivate = 0x450D,
+//  ChapProcessCommand = 0x6911,
+//  ChapProcessTime = 0x6922,
+//  ChapProcessData = 0x6933,
+    //Tagging
+//  Tags = 0x1254C367,
+//  Tag = 0x7373,
+//  Targets = 0x63C0,
+//  TargetTypeValue = 0x68CA,
+//  TargetType = 0x63CA,
+//  Tagging_TrackUID = 0x63C5,
+//  Tagging_EditionUID = 0x63C9,
+//  Tagging_ChapterUID = 0x63C4,
+//  AttachmentUID = 0x63C6,
+//  SimpleTag = 0x67C8,
+//  TagName = 0x45A3,
+//  TagLanguage = 0x447A,
+//  TagDefault = 0x4484,
+//  TagString = 0x4487,
+//  TagBinary = 0x4485,
+};
+#endif
+\ No newline at end of file
diff --git a/libmkv/EbmlWriter.c b/libmkv/EbmlWriter.c

new file mode 100644 (file)

index 0000000..9d564c1
--- /dev/null
+++ b/libmkv/EbmlWriter.c
@@ -0,0 +1,166 @@
+// Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS.  All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+
+
+#include "EbmlWriter.h"
+#include <stdlib.h>
+#include <wchar.h>
+#include <string.h>
+#if defined(_MSC_VER)
+#define LITERALU64(n) n
+#else
+#define LITERALU64(n) n##LLU
+#endif
+
+void Ebml_WriteLen(EbmlGlobal *glob, long long val)
+{
+    //TODO check and make sure we are not > than 0x0100000000000000LLU
+    unsigned char size = 8; //size in bytes to output
+    unsigned long long minVal = LITERALU64(0x00000000000000ff); //mask to compare for byte size
+
+    for (size = 1; size < 8; size ++)
+    {
+        if (val < minVal)
+            break;
+
+        minVal = (minVal << 7);
+    }
+
+    val |= (LITERALU64(0x000000000000080) << ((size - 1) * 7));
+
+    Ebml_Serialize(glob, (void *) &val, size);
+}
+
+void Ebml_WriteString(EbmlGlobal *glob, const char *str)
+{
+    const size_t size_ = strlen(str);
+    const unsigned long long  size = size_;
+    Ebml_WriteLen(glob, size);
+    //TODO: it's not clear from the spec whether the nul terminator
+    //should be serialized too.  For now we omit the null terminator.
+    Ebml_Write(glob, str, size);
+}
+
+void Ebml_WriteUTF8(EbmlGlobal *glob, const wchar_t *wstr)
+{
+    const size_t strlen = wcslen(wstr);
+
+    //TODO: it's not clear from the spec whether the nul terminator
+    //should be serialized too.  For now we include it.
+    const unsigned long long  size = strlen;
+
+    Ebml_WriteLen(glob, size);
+    Ebml_Write(glob, wstr, size);
+}
+
+void Ebml_WriteID(EbmlGlobal *glob, unsigned long class_id)
+{
+    if (class_id >= 0x01000000)
+        Ebml_Serialize(glob, (void *)&class_id, 4);
+    else if (class_id >= 0x00010000)
+        Ebml_Serialize(glob, (void *)&class_id, 3);
+    else if (class_id >= 0x00000100)
+        Ebml_Serialize(glob, (void *)&class_id, 2);
+    else
+        Ebml_Serialize(glob, (void *)&class_id, 1);
+}
+void Ebml_SerializeUnsigned64(EbmlGlobal *glob, unsigned long class_id, uint64_t ui)
+{
+    unsigned char sizeSerialized = 8 | 0x80;
+    Ebml_WriteID(glob, class_id);
+    Ebml_Serialize(glob, &sizeSerialized, 1);
+    Ebml_Serialize(glob, &ui, 8);
+}
+
+void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned long ui)
+{
+    unsigned char size = 8; //size in bytes to output
+    unsigned char sizeSerialized = 0;
+    unsigned long minVal;
+
+    Ebml_WriteID(glob, class_id);
+    minVal = 0x7fLU; //mask to compare for byte size
+
+    for (size = 1; size < 4; size ++)
+    {
+        if (ui < minVal)
+        {
+            break;
+        }
+
+        minVal <<= 7;
+    }
+
+    sizeSerialized = 0x80 | size;
+    Ebml_Serialize(glob, &sizeSerialized, 1);
+    Ebml_Serialize(glob, &ui, size);
+}
+//TODO: perhaps this is a poor name for this id serializer helper function
+void Ebml_SerializeBinary(EbmlGlobal *glob, unsigned long class_id, unsigned long bin)
+{
+    int size;
+    for (size=4; size > 1; size--)
+    {
+        if (bin & 0x000000ff << ((size-1) * 8))
+            break;
+    }
+    Ebml_WriteID(glob, class_id);
+    Ebml_WriteLen(glob, size);
+    Ebml_WriteID(glob, bin);
+}
+
+void Ebml_SerializeFloat(EbmlGlobal *glob, unsigned long class_id, double d)
+{
+    unsigned char len = 0x88;
+
+    Ebml_WriteID(glob, class_id);
+    Ebml_Serialize(glob, &len, 1);
+    Ebml_Serialize(glob,  &d, 8);
+}
+
+void Ebml_WriteSigned16(EbmlGlobal *glob, short val)
+{
+    signed long out = ((val & 0x003FFFFF) | 0x00200000) << 8;
+    Ebml_Serialize(glob, &out, 3);
+}
+
+void Ebml_SerializeString(EbmlGlobal *glob, unsigned long class_id, const char *s)
+{
+    Ebml_WriteID(glob, class_id);
+    Ebml_WriteString(glob, s);
+}
+
+void Ebml_SerializeUTF8(EbmlGlobal *glob, unsigned long class_id, wchar_t *s)
+{
+    Ebml_WriteID(glob,  class_id);
+    Ebml_WriteUTF8(glob,  s);
+}
+
+void Ebml_SerializeData(EbmlGlobal *glob, unsigned long class_id, unsigned char *data, unsigned long data_length)
+{
+    unsigned char size = 4;
+    Ebml_WriteID(glob, class_id);
+    Ebml_WriteLen(glob, data_length);
+    Ebml_Write(glob,  data, data_length);
+}
+
+void Ebml_WriteVoid(EbmlGlobal *glob, unsigned long vSize)
+{
+    unsigned char tmp = 0;
+    unsigned long i = 0;
+
+    Ebml_WriteID(glob, 0xEC);
+    Ebml_WriteLen(glob, vSize);
+
+    for (i = 0; i < vSize; i++)
+    {
+        Ebml_Write(glob, &tmp, 1);
+    }
+}
+
+//TODO Serialize Date
diff --git a/libmkv/EbmlWriter.h b/libmkv/EbmlWriter.h

new file mode 100644 (file)

index 0000000..8c7fe7c
--- /dev/null
+++ b/libmkv/EbmlWriter.h
@@ -0,0 +1,38 @@
+#ifndef EBMLWRITER_HPP
+#define EBMLWRITER_HPP
+
+// Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS.  All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+
+//note: you must define write and serialize functions as well as your own EBML_GLOBAL
+//These functions MUST be implemented
+#include <stddef.h>
+#include "vpx/vpx_integer.h"
+
+typedef struct EbmlGlobal EbmlGlobal;
+void  Ebml_Serialize(EbmlGlobal *glob, const void *, unsigned long);
+void  Ebml_Write(EbmlGlobal *glob, const void *, unsigned long);
+/////
+
+
+void Ebml_WriteLen(EbmlGlobal *glob, long long val);
+void Ebml_WriteString(EbmlGlobal *glob, const char *str);
+void Ebml_WriteUTF8(EbmlGlobal *glob, const wchar_t *wstr);
+void Ebml_WriteID(EbmlGlobal *glob, unsigned long class_id);
+void Ebml_SerializeUnsigned64(EbmlGlobal *glob, unsigned long class_id, uint64_t ui);
+void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned long ui);
+void Ebml_SerializeBinary(EbmlGlobal *glob, unsigned long class_id, unsigned long ui);
+void Ebml_SerializeFloat(EbmlGlobal *glob, unsigned long class_id, double d);
+//TODO make this more generic to signed
+void Ebml_WriteSigned16(EbmlGlobal *glob, short val);
+void Ebml_SerializeString(EbmlGlobal *glob, unsigned long class_id, const char *s);
+void Ebml_SerializeUTF8(EbmlGlobal *glob, unsigned long class_id, wchar_t *s);
+void Ebml_SerializeData(EbmlGlobal *glob, unsigned long class_id, unsigned char *data, unsigned long data_length);
+void Ebml_WriteVoid(EbmlGlobal *glob, unsigned long vSize);
+//TODO need date function
+#endif
diff --git a/libmkv/Makefile b/libmkv/Makefile

new file mode 100644 (file)

index 0000000..b53377b
--- /dev/null
+++ b/libmkv/Makefile
@@ -0,0 +1,25 @@
+#Variables
+CC=gcc
+LINKER=gcc
+FLAGS=
+
+
+#Build Targets
+EbmlWriter.o: EbmlWriter.c EbmlWriter.h
+       $(CC) $(FLAGS) -c EbmlWriter.c
+
+EbmlBufferWriter.o: EbmlBufferWriter.c EbmlBufferWriter.h
+       $(CC) $(FLAGS) -c EbmlBufferWriter.c
+       
+MkvElement.o: MkvElement.c WebMElement.h
+       $(CC) $(FLAGS) -c MkvElement.c
+       
+testlibmkv.o: testlibmkv.c
+       $(CC) $(FLAGS) -c testlibmkv.c
+       
+testlibmkv: testlibmkv.o MkvElement.o EbmlBufferWriter.o EbmlWriter.o
+       $(LINKER) $(FLAGS) testlibmkv.o MkvElement.o EbmlBufferWriter.o EbmlWriter.o -o testlibmkv
+
+clean:
+       rm -rf *.o testlibmkv
+       
+\ No newline at end of file
diff --git a/libmkv/WebMElement.c b/libmkv/WebMElement.c

new file mode 100644 (file)

index 0000000..25a9024
--- /dev/null
+++ b/libmkv/WebMElement.c
@@ -0,0 +1,220 @@
+// Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS.  All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+
+
+#include "EbmlBufferWriter.h"
+#include "EbmlIDs.h"
+#include "WebMElement.h"
+#include <stdio.h>
+
+#define kVorbisPrivateMaxSize  4000
+
+void writeHeader(EbmlGlobal *glob)
+{
+    EbmlLoc start;
+    Ebml_StartSubElement(glob, &start, EBML);
+    Ebml_SerializeUnsigned(glob, EBMLVersion, 1);
+    Ebml_SerializeUnsigned(glob, EBMLReadVersion, 1); //EBML Read Version
+    Ebml_SerializeUnsigned(glob, EBMLMaxIDLength, 4); //EBML Max ID Length
+    Ebml_SerializeUnsigned(glob, EBMLMaxSizeLength, 8); //EBML Max Size Length
+    Ebml_SerializeString(glob, DocType, "webm"); //Doc Type
+    Ebml_SerializeUnsigned(glob, DocTypeVersion, 2); //Doc Type Version
+    Ebml_SerializeUnsigned(glob, DocTypeReadVersion, 2); //Doc Type Read Version
+    Ebml_EndSubElement(glob, &start);
+}
+
+void writeSimpleBlock(EbmlGlobal *glob, unsigned char trackNumber, short timeCode,
+                      int isKeyframe, unsigned char lacingFlag, int discardable,
+                      unsigned char *data, unsigned long dataLength)
+{
+    Ebml_WriteID(glob, SimpleBlock);
+    unsigned long blockLength = 4 + dataLength;
+    blockLength |= 0x10000000; //TODO check length < 0x0FFFFFFFF
+    Ebml_Serialize(glob, &blockLength, 4);
+    trackNumber |= 0x80;  //TODO check track nubmer < 128
+    Ebml_Write(glob, &trackNumber, 1);
+    //Ebml_WriteSigned16(glob, timeCode,2); //this is 3 bytes
+    Ebml_Serialize(glob, &timeCode, 2);
+    unsigned char flags = 0x00 | (isKeyframe ? 0x80 : 0x00) | (lacingFlag << 1) | discardable;
+    Ebml_Write(glob, &flags, 1);
+    Ebml_Write(glob, data, dataLength);
+}
+
+static UInt64 generateTrackID(unsigned int trackNumber)
+{
+    UInt64 t = time(NULL) * trackNumber;
+    UInt64 r = rand();
+    r = r << 32;
+    r +=  rand();
+    UInt64 rval = t ^ r;
+    return rval;
+}
+
+void writeVideoTrack(EbmlGlobal *glob, unsigned int trackNumber, int flagLacing,
+                     char *codecId, unsigned int pixelWidth, unsigned int pixelHeight,
+                     double frameRate)
+{
+    EbmlLoc start;
+    Ebml_StartSubElement(glob, &start, TrackEntry);
+    Ebml_SerializeUnsigned(glob, TrackNumber, trackNumber);
+    UInt64 trackID = generateTrackID(trackNumber);
+    Ebml_SerializeUnsigned(glob, TrackUID, trackID);
+    Ebml_SerializeString(glob, CodecName, "VP8");  //TODO shouldn't be fixed
+
+    Ebml_SerializeUnsigned(glob, TrackType, 1); //video is always 1
+    Ebml_SerializeString(glob, CodecID, codecId);
+    {
+        EbmlLoc videoStart;
+        Ebml_StartSubElement(glob, &videoStart, Video);
+        Ebml_SerializeUnsigned(glob, PixelWidth, pixelWidth);
+        Ebml_SerializeUnsigned(glob, PixelHeight, pixelHeight);
+        Ebml_SerializeFloat(glob, FrameRate, frameRate);
+        Ebml_EndSubElement(glob, &videoStart); //Video
+    }
+    Ebml_EndSubElement(glob, &start); //Track Entry
+}
+void writeAudioTrack(EbmlGlobal *glob, unsigned int trackNumber, int flagLacing,
+                     char *codecId, double samplingFrequency, unsigned int channels,
+                     unsigned char *private, unsigned long privateSize)
+{
+    EbmlLoc start;
+    Ebml_StartSubElement(glob, &start, TrackEntry);
+    Ebml_SerializeUnsigned(glob, TrackNumber, trackNumber);
+    UInt64 trackID = generateTrackID(trackNumber);
+    Ebml_SerializeUnsigned(glob, TrackUID, trackID);
+    Ebml_SerializeUnsigned(glob, TrackType, 2); //audio is always 2
+    //I am using defaults for thesed required fields
+    /*  Ebml_SerializeUnsigned(glob, FlagEnabled, 1);
+        Ebml_SerializeUnsigned(glob, FlagDefault, 1);
+        Ebml_SerializeUnsigned(glob, FlagForced, 1);
+        Ebml_SerializeUnsigned(glob, FlagLacing, flagLacing);*/
+    Ebml_SerializeString(glob, CodecID, codecId);
+    Ebml_SerializeData(glob, CodecPrivate, private, privateSize);
+
+    Ebml_SerializeString(glob, CodecName, "VORBIS");  //fixed for now
+    {
+        EbmlLoc AudioStart;
+        Ebml_StartSubElement(glob, &AudioStart, Audio);
+        Ebml_SerializeFloat(glob, SamplingFrequency, samplingFrequency);
+        Ebml_SerializeUnsigned(glob, Channels, channels);
+        Ebml_EndSubElement(glob, &AudioStart);
+    }
+    Ebml_EndSubElement(glob, &start);
+}
+void writeSegmentInformation(EbmlGlobal *ebml, EbmlLoc* startInfo, unsigned long timeCodeScale, double duration)
+{
+    Ebml_StartSubElement(ebml, startInfo, Info);
+    Ebml_SerializeUnsigned(ebml, TimecodeScale, timeCodeScale);
+    Ebml_SerializeFloat(ebml, Segment_Duration, duration * 1000.0); //Currently fixed to using milliseconds
+    Ebml_SerializeString(ebml, 0x4D80, "QTmuxingAppLibWebM-0.0.1");
+    Ebml_SerializeString(ebml, 0x5741, "QTwritingAppLibWebM-0.0.1");
+    Ebml_EndSubElement(ebml, startInfo);
+}
+
+/*
+void Mkv_InitializeSegment(Ebml& ebml_out, EbmlLoc& ebmlLoc)
+{
+    Ebml_StartSubElement(ebml_out, ebmlLoc, 0x18538067);
+}
+
+void Mkv_InitializeSeek(Ebml& ebml_out, EbmlLoc& ebmlLoc)
+{
+    Ebml_StartSubElement(ebml_out, ebmlLoc, 0x114d9b74);
+}
+void Mkv_WriteSeekInformation(Ebml& ebml_out, SeekStruct& seekInformation)
+{
+    EbmlLoc ebmlLoc;
+    Ebml_StartSubElement(ebml_out, ebmlLoc, 0x4dbb);
+    Ebml_SerializeString(ebml_out, 0x53ab, seekInformation.SeekID);
+    Ebml_SerializeUnsigned(ebml_out, 0x53ac, seekInformation.SeekPosition);
+    Ebml_EndSubElement(ebml_out, ebmlLoc);
+}
+
+void Mkv_WriteSegmentInformation(Ebml& ebml_out, SegmentInformationStruct& segmentInformation)
+{
+    Ebml_SerializeUnsigned(ebml_out, 0x73a4, segmentInformation.segmentUID);
+    if (segmentInformation.filename != 0)
+        Ebml_SerializeString(ebml_out, 0x7384, segmentInformation.filename);
+    Ebml_SerializeUnsigned(ebml_out, 0x2AD7B1, segmentInformation.TimecodeScale);
+    Ebml_SerializeUnsigned(ebml_out, 0x4489, segmentInformation.Duration);
+    //TODO date
+    Ebml_SerializeWString(ebml_out, 0x4D80, L"MKVMUX");
+    Ebml_SerializeWString(ebml_out, 0x5741, segmentInformation.WritingApp);
+}
+
+void Mkv_InitializeTrack(Ebml& ebml_out, EbmlLoc& ebmlLoc)
+{
+    Ebml_StartSubElement(ebml_out, ebmlLoc, 0x1654AE6B);
+}
+
+static void Mkv_WriteGenericTrackData(Ebml& ebml_out, TrackStruct& track)
+{
+    Ebml_SerializeUnsigned(ebml_out, 0xD7, track.TrackNumber);
+    Ebml_SerializeUnsigned(ebml_out, 0x73C5, track.TrackUID);
+    Ebml_SerializeUnsigned(ebml_out, 0x83, track.TrackType);
+    Ebml_SerializeUnsigned(ebml_out, 0xB9, track.FlagEnabled ? 1 :0);
+    Ebml_SerializeUnsigned(ebml_out, 0x88, track.FlagDefault ? 1 :0);
+    Ebml_SerializeUnsigned(ebml_out, 0x55AA, track.FlagForced ? 1 :0);
+    if (track.Language != 0)
+        Ebml_SerializeString(ebml_out, 0x22B59C, track.Language);
+    if (track.CodecID != 0)
+        Ebml_SerializeString(ebml_out, 0x86, track.CodecID);
+    if (track.CodecPrivate != 0)
+        Ebml_SerializeData(ebml_out, 0x63A2, track.CodecPrivate, track.CodecPrivateLength);
+    if (track.CodecName != 0)
+        Ebml_SerializeWString(ebml_out, 0x258688, track.CodecName);
+}
+
+void Mkv_WriteVideoTrack(Ebml& ebml_out, TrackStruct & track, VideoTrackStruct& video)
+{
+    EbmlLoc trackHeadLoc, videoHeadLoc;
+    Ebml_StartSubElement(ebml_out, trackHeadLoc, 0xAE);  //start Track
+    Mkv_WriteGenericTrackData(ebml_out, track);
+    Ebml_StartSubElement(ebml_out, videoHeadLoc, 0xE0);  //start Video
+    Ebml_SerializeUnsigned(ebml_out, 0x9A, video.FlagInterlaced ? 1 :0);
+    Ebml_SerializeUnsigned(ebml_out, 0xB0, video.PixelWidth);
+    Ebml_SerializeUnsigned(ebml_out, 0xBA, video.PixelHeight);
+    Ebml_SerializeUnsigned(ebml_out, 0x54B0, video.PixelDisplayWidth);
+    Ebml_SerializeUnsigned(ebml_out, 0x54BA, video.PixelDisplayHeight);
+    Ebml_SerializeUnsigned(ebml_out, 0x54B2, video.displayUnit);
+    Ebml_SerializeFloat(ebml_out, 0x2383E3, video.FrameRate);
+    Ebml_EndSubElement(ebml_out, videoHeadLoc);
+    Ebml_EndSubElement(ebml_out, trackHeadLoc);
+
+}
+
+void Mkv_WriteAudioTrack(Ebml& ebml_out, TrackStruct & track, AudioTrackStruct& video)
+{
+    EbmlLoc trackHeadLoc, audioHeadLoc;
+    Ebml_StartSubElement(ebml_out, trackHeadLoc, 0xAE);
+    Mkv_WriteGenericTrackData(ebml_out, track);
+    Ebml_StartSubElement(ebml_out, audioHeadLoc, 0xE0);  //start Audio
+    Ebml_SerializeFloat(ebml_out, 0xB5, video.SamplingFrequency);
+    Ebml_SerializeUnsigned(ebml_out, 0x9F, video.Channels);
+    Ebml_SerializeUnsigned(ebml_out, 0x6264, video.BitDepth);
+    Ebml_EndSubElement(ebml_out, audioHeadLoc); // end audio
+    Ebml_EndSubElement(ebml_out, trackHeadLoc);
+}
+
+void Mkv_WriteEbmlClusterHead(Ebml& ebml_out,  EbmlLoc& ebmlLoc, ClusterHeadStruct & clusterHead)
+{
+    Ebml_StartSubElement(ebml_out, ebmlLoc, 0x1F43B675);
+    Ebml_SerializeUnsigned(ebml_out, 0x6264, clusterHead.TimeCode);
+}
+
+void Mkv_WriteSimpleBlockHead(Ebml& ebml_out,  EbmlLoc& ebmlLoc, SimpleBlockStruct& block)
+{
+    Ebml_StartSubElement(ebml_out, ebmlLoc, 0xA3);
+    Ebml_Write1UInt(ebml_out, block.TrackNumber);
+    Ebml_WriteSigned16(ebml_out,block.TimeCode);
+    unsigned char flags = 0x00 | (block.iskey ? 0x80:0x00) | (block.lacing << 1) | block.discardable;
+    Ebml_Write1UInt(ebml_out, flags);  //TODO this may be the wrong function
+    Ebml_Serialize(ebml_out, block.data, block.dataLength);
+    Ebml_EndSubElement(ebml_out,ebmlLoc);
+}
+*/
diff --git a/libmkv/WebMElement.h b/libmkv/WebMElement.h

new file mode 100644 (file)

index 0000000..b4208f2
--- /dev/null
+++ b/libmkv/WebMElement.h
@@ -0,0 +1,35 @@
+// Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS.  All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+
+
+#ifndef MKV_CONTEXT_HPP
+#define MKV_CONTEXT_HPP 1
+
+void writeSimpleBock(EbmlGlobal *ebml, unsigned char trackNumber, unsigned short timeCode,
+                     int isKeyframe, unsigned char lacingFlag, int  discardable,
+                     unsigned char *data, unsigned long dataLength);
+
+
+// these are helper functions
+void writeHeader(EbmlGlobal *ebml);
+void writeSegmentInformation(EbmlGlobal *ebml, EbmlLoc* startInfo , unsigned long timeCodeScale, double duration);
+//this function is a helper only, it assumes a lot of defaults
+void writeVideoTrack(EbmlGlobal *ebml, unsigned int trackNumber, int flagLacing,
+                     char *codecId, unsigned int pixelWidth, unsigned int pixelHeight,
+                     double frameRate);
+void writeAudioTrack(EbmlGlobal *glob, unsigned int trackNumber, int flagLacing,
+                     char *codecId, double samplingFrequency, unsigned int channels,
+                     unsigned char *private, unsigned long privateSize);
+
+void writeSimpleBlock(EbmlGlobal *ebml, unsigned char trackNumber, short timeCode,
+                      int isKeyframe, unsigned char lacingFlag, int discardable,
+                      unsigned char *data, unsigned long dataLength);
+
+
+
+#endif
+\ No newline at end of file
diff --git a/libmkv/testlibmkv.c b/libmkv/testlibmkv.c

new file mode 100644 (file)

index 0000000..7edfc43
--- /dev/null
+++ b/libmkv/testlibmkv.c
@@ -0,0 +1,63 @@
+// Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS.  All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+
+
+
+#include "EbmlIDs.h"
+#include "EbmlBufferWriter.h"
+#include "WebMElement.h"
+
+#include <stdio.h>
+int main(int argc, char *argv[])
+{
+    //init the datatype we're using for ebml output
+    unsigned char data[8192];
+    EbmlGlobal ebml;
+    ebml.buf = data;
+    ebml.offset = 0;
+    ebml.length = 8192;
+
+    writeHeader(&ebml);
+    {
+        EbmlLoc startSegment;
+        Ebml_StartSubElement(&ebml, &startSegment, Segment); //segment
+        {
+            //segment info
+            EbmlLoc startInfo;
+            Ebml_StartSubElement(&ebml, &startInfo, Info);
+            Ebml_SerializeString(&ebml, 0x4D80, "muxingAppLibMkv");
+            Ebml_SerializeString(&ebml, 0x5741, "writingAppLibMkv");
+            Ebml_EndSubElement(&ebml, &startInfo);
+        }
+
+        {
+            EbmlLoc trackStart;
+            Ebml_StartSubElement(&ebml, &trackStart, Tracks);
+            writeVideoTrack(&ebml, 1, 1, "V_MS/VFW/FOURCC", 320, 240, 29.97);
+            //writeAudioTrack(&ebml,2,1, "A_VORBIS", 32000, 1, NULL, 0);
+            Ebml_EndSubElement(&ebml, &trackStart);
+        }
+
+        {
+            EbmlLoc clusterStart;
+            Ebml_StartSubElement(&ebml, &clusterStart, Cluster); //cluster
+            Ebml_SerializeUnsigned(&ebml, Timecode, 0);
+
+            unsigned char someData[4] = {1, 2, 3, 4};
+            writeSimpleBlock(&ebml, 1, 0, 1, 0, 0, someData, 4);
+            Ebml_EndSubElement(&ebml, &clusterStart);
+        }    //end cluster
+        Ebml_EndSubElement(&ebml, &startSegment);
+    }
+
+    //dump ebml stuff to the file
+    FILE *file_out = fopen("test.mkv", "wb");
+    size_t bytesWritten = fwrite(data, 1, ebml.offset, file_out);
+    fclose(file_out);
+    return 0;
+}
+\ No newline at end of file
diff --git a/libs.mk b/libs.mk

index 4beaa50cbd9ed592311f9f0d43e7fdb34b8ee1d8..9ded3945a71074dab21c1b52cdea9a6ea28b4869 100644 (file)
--- a/libs.mk
+++ b/libs.mk
@@ -93,6 +93,7 @@ CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86.h
  CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86_abi_support.asm
  CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86_cpuid.c
  endif
+CODEC_SRCS-$(ARCH_ARM) += vpx_ports/arm_cpudetect.c
  CODEC_SRCS-$(ARCH_ARM) += $(BUILD_PFX)vpx_config.asm
  CODEC_EXPORTS-$(BUILD_LIBVPX) += vpx/exports_com
  CODEC_EXPORTS-$(CONFIG_ENCODERS) += vpx/exports_enc
diff --git a/nestegg/.gitignore b/nestegg/.gitignore

new file mode 100644 (file)

index 0000000..b2ba99c
--- /dev/null
+++ b/nestegg/.gitignore
@@ -0,0 +1,40 @@
+*.lo
+*.o
+*.swp
+*~
+.deps
+.dirstamp
+.libs
+Makefile
+Makefile.in
+_stdint.h
+aclocal.m4
+autom4te.cache
+compile
+config.guess
+config.h
+config.h.in
+config.log
+config.status
+config.sub
+configure
+depcomp
+docs/Doxyfile
+docs/doxygen-build.stamp
+docs/html
+install-sh
+libtool
+ltmain.sh
+m4/libtool.m4
+m4/ltoptions.m4
+m4/ltsugar.m4
+m4/ltversion.m4
+m4/lt~obsolete.m4
+missing
+nestegg-uninstalled.pc
+nestegg.pc
+src/.dirstamp
+src/libnestegg.la
+stamp-h1
+test/test
+include/nestegg/nestegg-stdint.h
diff --git a/nestegg/AUTHORS b/nestegg/AUTHORS

new file mode 100644 (file)

index 0000000..8204f40
--- /dev/null
+++ b/nestegg/AUTHORS
@@ -0,0 +1 @@
+Matthew Gregan <kinetik@flim.org>
diff --git a/nestegg/INSTALL b/nestegg/INSTALL

new file mode 100644 (file)

index 0000000..401df41
--- /dev/null
+++ b/nestegg/INSTALL
@@ -0,0 +1,8 @@
+Build instructions for libnestegg
+=================================
+
+0. Change directory into the source directory.
+1. Run |autoreconf --install| to generate configure.
+2. Run |./configure| to configure the build.
+3. Run |make| to build.
+4. Run |make check| to run the test suite.
diff --git a/nestegg/LICENSE b/nestegg/LICENSE

new file mode 100644 (file)

index 0000000..a67984a
--- /dev/null
+++ b/nestegg/LICENSE
@@ -0,0 +1,13 @@
+Copyright © 2010 Mozilla Foundation
+
+Permission to use, copy, modify, and distribute this software for any
+purpose with or without fee is hereby granted, provided that the above
+copyright notice and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
diff --git a/nestegg/Makefile.am b/nestegg/Makefile.am

new file mode 100644 (file)

index 0000000..5006991
--- /dev/null
+++ b/nestegg/Makefile.am
@@ -0,0 +1,51 @@
+AUTOMAKE_OPTIONS = foreign 1.11 no-dist-gzip dist-bzip2 subdir-objects
+ACLOCAL_AMFLAGS = -I m4
+
+INCLUDES = -I$(top_srcdir)/include -I. -I$(top_srcdir)/halloc
+AM_CFLAGS = -ansi -pedantic -Wall -Wextra -Wno-long-long -O0 -g
+
+SUBDIRS = docs
+
+EXTRA_DIST = \
+       AUTHORS README LICENSE \
+       nestegg-uninstalled.pc.in \
+       m4/as-ac-expand.m4 \
+       m4/pkg.m4 \
+       m4/ax_create_stdint_h.m4 \
+       halloc/src/halloc.c \
+       halloc/halloc.h \
+       halloc/src/align.h \
+       halloc/src/hlist.h \
+       halloc/src/macros.h
+
+pkgconfigdir = $(libdir)/pkgconfig
+pkgconfig_DATA = nestegg.pc
+
+nesteggincludedir = $(includedir)/nestegg
+nestegginclude_HEADERS = include/nestegg/nestegg.h include/nestegg/nestegg-stdint.h
+
+lib_LTLIBRARIES = src/libnestegg.la
+
+src_libnestegg_la_SOURCES = \
+       src/nestegg.c \
+       halloc/src/halloc.c \
+       halloc/halloc.h \
+       halloc/src/align.h \
+       halloc/src/hlist.h \
+       halloc/src/macros.h
+
+check_PROGRAMS = test/test
+
+test_test_SOURCES = test/test.c
+test_test_LDADD = src/libnestegg.la
+
+DISTCLEANFILES = include/nestegg/nestegg-stdint.h
+
+dist-hook:
+       find $(distdir) -type d -name '.git' | xargs rm -rf
+
+debug:
+       $(MAKE) all CFLAGS="@DEBUG@"
+
+profile:
+       $(MAKE) all CFLAGS="@PROFILE@"
diff --git a/nestegg/README b/nestegg/README

new file mode 100644 (file)

index 0000000..47c8237
--- /dev/null
+++ b/nestegg/README
@@ -0,0 +1,6 @@
+See INSTALL for build instructions.
+
+Licensed under an ISC-style license.  See LICENSE for details.
+
+The source under the halloc/ directory is licensed under a BSD license.  See
+halloc/halloc.h for details.
diff --git a/nestegg/TODO b/nestegg/TODO

new file mode 100644 (file)

index 0000000..bf0cb04
--- /dev/null
+++ b/nestegg/TODO
@@ -0,0 +1,21 @@
+- Document when read, seek, tell callbacks are used.
+- Add an automated testsuite.
+- Test (and fix, if necessary) support for unknown sizes.
+- Test (and fix, if necessary) support for large files.
+- Read past unknown elements rather than seeking.
+- Try to handle unknown elements with unknown sizes.
+- Formalize handling of default element values.
+- Try to resynchronize stream when read_block fails so that failure to parse
+  a single block can be treated as non-fatal.
+- Make logging more useful to API users.
+- Avoid reparsing Cues and ignore any SeekHead at end of file.
+- Optionally build a Cue index as Clusters are parsed.
+- Support seeking without Cues.
+- Avoid building a list of Clusters as they are parsed and retain only the
+  last one parsed.
+- Add an asynchronous error code to struct nestegg and ensure that API calls
+  continue to fail safely one a fatal error has been returned.
+- Modify parser/data structures to provide a clean separation.  Perhaps the
+  parser should return a generic tree of nodes that a second pass uses to
+  initialize the main data structures.
+- Use pool allocator for all allocations.
diff --git a/nestegg/configure.ac b/nestegg/configure.ac

new file mode 100644 (file)

index 0000000..70f6e0d
--- /dev/null
+++ b/nestegg/configure.ac
@@ -0,0 +1,124 @@
+dnl ------------------------------------------------
+dnl Initialization and Versioning
+dnl ------------------------------------------------
+
+AC_INIT(libnestegg,[0.1git])
+
+AC_CANONICAL_HOST
+AC_CANONICAL_TARGET
+
+AC_CONFIG_MACRO_DIR([m4])
+
+AM_CONFIG_HEADER([config.h])
+AC_CONFIG_SRCDIR([src/nestegg.c])
+AM_INIT_AUTOMAKE
+
+m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
+
+dnl Library versioning
+dnl CURRENT, REVISION, AGE
+dnl - library source changed -> increment REVISION
+dnl - interfaces added/removed/changed -> increment CURRENT, REVISION = 0
+dnl - interfaces added -> increment AGE
+dnl - interfaces removed -> AGE = 0
+
+NESTEGG_CURRENT=0
+NESTEGG_REVISION=0
+NESTEGG_AGE=1
+AC_SUBST(NESTEGG_CURRENT)
+AC_SUBST(NESTEGG_REVISION)
+AC_SUBST(NESTEGG_AGE)
+
+
+dnl --------------------------------------------------  
+dnl Check for programs
+dnl --------------------------------------------------  
+
+dnl save $CFLAGS since AC_PROG_CC likes to insert "-g -O2"
+dnl if $CFLAGS is blank
+cflags_save="$CFLAGS"
+AC_PROG_CC
+AC_PROG_CPP
+CFLAGS="$cflags_save"
+
+AM_PROG_CC_C_O
+AC_LIBTOOL_WIN32_DLL
+AM_PROG_LIBTOOL
+
+dnl Check for doxygen
+AC_ARG_ENABLE([doc],
+       AS_HELP_STRING([--enable-doc], [Build API documentation]),
+       [ac_enable_doc=$enableval], [ac_enable_doc=auto])
+
+if test "x$ac_enable_doc" != "xno"; then
+       AC_CHECK_PROG(HAVE_DOXYGEN, doxygen, true, false)
+
+       if test "x$HAVE_DOXYGEN" = "xfalse" -a "x$ac_enable_doc" = "xyes"; then
+               AC_MSG_ERROR([*** API documentation explicitly requested but Doxygen not found])
+       fi
+else
+       HAVE_DOXYGEN=false
+fi
+AM_CONDITIONAL(HAVE_DOXYGEN,$HAVE_DOXYGEN)
+if test $HAVE_DOXYGEN = "false"; then
+        AC_MSG_WARN([*** doxygen not found, API documentation will not be built])
+fi
+
+# Generate portable stdint.h replacement
+AX_CREATE_STDINT_H(include/nestegg/nestegg-stdint.h)
+
+# Test whenever ld supports -version-script
+AC_PROG_LD
+AC_PROG_LD_GNU
+AC_MSG_CHECKING([how to control symbol export])
+
+dnl --------------------------------------------------
+dnl Do substitutions
+dnl --------------------------------------------------
+
+AC_SUBST(DEBUG)
+AC_SUBST(PROFILE)
+
+AC_OUTPUT([
+  Makefile 
+  docs/Makefile
+  docs/Doxyfile
+  nestegg.pc
+  nestegg-uninstalled.pc
+])
+
+AS_AC_EXPAND(LIBDIR, ${libdir})
+AS_AC_EXPAND(INCLUDEDIR, ${includedir})
+AS_AC_EXPAND(BINDIR, ${bindir})
+AS_AC_EXPAND(DOCDIR, ${docdir})
+
+if test $HAVE_DOXYGEN = "false"; then
+  doc_build="no"
+else
+  doc_build="yes"
+fi
+
+AC_MSG_RESULT([
+------------------------------------------------------------------------
+  $PACKAGE $VERSION:  Automatic configuration OK.
+
+  General configuration:
+
+    API Documentation: .......... ${doc_build}
+
+  Installation paths:
+
+    libnestegg: .................. ${LIBDIR}
+    C header files: .............. ${INCLUDEDIR}/nestegg
+    Documentation: ............... ${DOCDIR}
+
+  Building:
+
+    Type 'make' to compile $PACKAGE.
+
+    Type 'make install' to install $PACKAGE.
+
+  Example programs will be built but not installed.
+------------------------------------------------------------------------
+])
+
diff --git a/nestegg/docs/Doxyfile.in b/nestegg/docs/Doxyfile.in

new file mode 100644 (file)

index 0000000..e0e9249
--- /dev/null
+++ b/nestegg/docs/Doxyfile.in
@@ -0,0 +1,1551 @@
+# Doxyfile 1.6.2
+
+# This file describes the settings to be used by the documentation system
+# doxygen (www.doxygen.org) for a project
+#
+# All text after a hash (#) is considered a comment and will be ignored
+# The format is:
+#       TAG = value [value, ...]
+# For lists items can also be appended using:
+#       TAG += value [value, ...]
+# Values that contain spaces should be placed between quotes (" ")
+
+#---------------------------------------------------------------------------
+# Project related configuration options
+#---------------------------------------------------------------------------
+
+# This tag specifies the encoding used for all characters in the config file
+# that follow. The default is UTF-8 which is also the encoding used for all
+# text before the first occurrence of this tag. Doxygen uses libiconv (or the
+# iconv built into libc) for the transcoding. See
+# http://www.gnu.org/software/libiconv for the list of possible encodings.
+
+DOXYFILE_ENCODING      = UTF-8
+
+# The PROJECT_NAME tag is a single word (or a sequence of words surrounded
+# by quotes) that should identify the project.
+
+PROJECT_NAME           = @PACKAGE@
+
+# The PROJECT_NUMBER tag can be used to enter a project or revision number.
+# This could be handy for archiving the generated documentation or
+# if some version control system is used.
+
+PROJECT_NUMBER         = @VERSION@
+
+# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
+# base path where the generated documentation will be put.
+# If a relative path is entered, it will be relative to the location
+# where doxygen was started. If left blank the current directory will be used.
+
+OUTPUT_DIRECTORY       = .
+
+# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create
+# 4096 sub-directories (in 2 levels) under the output directory of each output
+# format and will distribute the generated files over these directories.
+# Enabling this option can be useful when feeding doxygen a huge amount of
+# source files, where putting all generated files in the same directory would
+# otherwise cause performance problems for the file system.
+
+CREATE_SUBDIRS         = NO
+
+# The OUTPUT_LANGUAGE tag is used to specify the language in which all
+# documentation generated by doxygen is written. Doxygen will use this
+# information to generate all constant output in the proper language.
+# The default language is English, other supported languages are:
+# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional,
+# Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German,
+# Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English
+# messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian,
+# Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrilic, Slovak,
+# Slovene, Spanish, Swedish, Ukrainian, and Vietnamese.
+
+OUTPUT_LANGUAGE        = English
+
+# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will
+# include brief member descriptions after the members that are listed in
+# the file and class documentation (similar to JavaDoc).
+# Set to NO to disable this.
+
+BRIEF_MEMBER_DESC      = YES
+
+# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend
+# the brief description of a member or function before the detailed description.
+# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
+# brief descriptions will be completely suppressed.
+
+REPEAT_BRIEF           = YES
+
+# This tag implements a quasi-intelligent brief description abbreviator
+# that is used to form the text in various listings. Each string
+# in this list, if found as the leading text of the brief description, will be
+# stripped from the text and the result after processing the whole list, is
+# used as the annotated text. Otherwise, the brief description is used as-is.
+# If left blank, the following values are used ("$name" is automatically
+# replaced with the name of the entity): "The $name class" "The $name widget"
+# "The $name file" "is" "provides" "specifies" "contains"
+# "represents" "a" "an" "the"
+
+ABBREVIATE_BRIEF       =
+
+# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
+# Doxygen will generate a detailed section even if there is only a brief
+# description.
+
+ALWAYS_DETAILED_SEC    = NO
+
+# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
+# inherited members of a class in the documentation of that class as if those
+# members were ordinary class members. Constructors, destructors and assignment
+# operators of the base classes will not be shown.
+
+INLINE_INHERITED_MEMB  = NO
+
+# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full
+# path before files name in the file list and in the header files. If set
+# to NO the shortest path that makes the file name unique will be used.
+
+FULL_PATH_NAMES        = YES
+
+# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag
+# can be used to strip a user-defined part of the path. Stripping is
+# only done if one of the specified strings matches the left-hand part of
+# the path. The tag can be used to show relative paths in the file list.
+# If left blank the directory from which doxygen is run is used as the
+# path to strip.
+
+STRIP_FROM_PATH        =
+
+# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of
+# the path mentioned in the documentation of a class, which tells
+# the reader which header file to include in order to use a class.
+# If left blank only the name of the header file containing the class
+# definition is used. Otherwise one should specify the include paths that
+# are normally passed to the compiler using the -I flag.
+
+STRIP_FROM_INC_PATH    =
+
+# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter
+# (but less readable) file names. This can be useful is your file systems
+# doesn't support long names like on DOS, Mac, or CD-ROM.
+
+SHORT_NAMES            = NO
+
+# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen
+# will interpret the first line (until the first dot) of a JavaDoc-style
+# comment as the brief description. If set to NO, the JavaDoc
+# comments will behave just like regular Qt-style comments
+# (thus requiring an explicit @brief command for a brief description.)
+
+JAVADOC_AUTOBRIEF      = YES
+
+# If the QT_AUTOBRIEF tag is set to YES then Doxygen will
+# interpret the first line (until the first dot) of a Qt-style
+# comment as the brief description. If set to NO, the comments
+# will behave just like regular Qt-style comments (thus requiring
+# an explicit \brief command for a brief description.)
+
+QT_AUTOBRIEF           = NO
+
+# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen
+# treat a multi-line C++ special comment block (i.e. a block of //! or ///
+# comments) as a brief description. This used to be the default behaviour.
+# The new default is to treat a multi-line C++ comment block as a detailed
+# description. Set this tag to YES if you prefer the old behaviour instead.
+
+MULTILINE_CPP_IS_BRIEF = NO
+
+# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented
+# member inherits the documentation from any documented member that it
+# re-implements.
+
+INHERIT_DOCS           = YES
+
+# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce
+# a new page for each member. If set to NO, the documentation of a member will
+# be part of the file/class/namespace that contains it.
+
+SEPARATE_MEMBER_PAGES  = NO
+
+# The TAB_SIZE tag can be used to set the number of spaces in a tab.
+# Doxygen uses this value to replace tabs by spaces in code fragments.
+
+TAB_SIZE               = 8
+
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user-defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES                =
+
+# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C
+# sources only. Doxygen will then generate output that is more tailored for C.
+# For instance, some of the names that are used will be different. The list
+# of all members will be omitted, etc.
+
+OPTIMIZE_OUTPUT_FOR_C  = YES
+
+# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java
+# sources only. Doxygen will then generate output that is more tailored for
+# Java. For instance, namespaces will be presented as packages, qualified
+# scopes will look different, etc.
+
+OPTIMIZE_OUTPUT_JAVA   = NO
+
+# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
+# sources only. Doxygen will then generate output that is more tailored for
+# Fortran.
+
+OPTIMIZE_FOR_FORTRAN   = NO
+
+# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
+# sources. Doxygen will then generate output that is tailored for
+# VHDL.
+
+OPTIMIZE_OUTPUT_VHDL   = NO
+
+# Doxygen selects the parser to use depending on the extension of the files it parses.
+# With this tag you can assign which parser to use for a given extension.
+# Doxygen has a built-in mapping, but you can override or extend it using this tag.
+# The format is ext=language, where ext is a file extension, and language is one of
+# the parsers supported by doxygen: IDL, Java, Javascript, C#, C, C++, D, PHP,
+# Objective-C, Python, Fortran, VHDL, C, C++. For instance to make doxygen treat
+# .inc files as Fortran files (default is PHP), and .f files as C (default is Fortran),
+# use: inc=Fortran f=C. Note that for custom extensions you also need to set FILE_PATTERNS otherwise the files are not read by doxygen.
+
+EXTENSION_MAPPING      =
+
+# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
+# to include (a tag file for) the STL sources as input, then you should
+# set this tag to YES in order to let doxygen match functions declarations and
+# definitions whose arguments contain STL classes (e.g. func(std::string); v.s.
+# func(std::string) {}). This also make the inheritance and collaboration
+# diagrams that involve STL classes more complete and accurate.
+
+BUILTIN_STL_SUPPORT    = NO
+
+# If you use Microsoft's C++/CLI language, you should set this option to YES to
+# enable parsing support.
+
+CPP_CLI_SUPPORT        = NO
+
+# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only.
+# Doxygen will parse them like normal C++ but will assume all classes use public
+# instead of private inheritance when no explicit protection keyword is present.
+
+SIP_SUPPORT            = NO
+
+# For Microsoft's IDL there are propget and propput attributes to indicate getter
+# and setter methods for a property. Setting this option to YES (the default)
+# will make doxygen to replace the get and set methods by a property in the
+# documentation. This will only work if the methods are indeed getting or
+# setting a simple type. If this is not the case, or you want to show the
+# methods anyway, you should set this option to NO.
+
+IDL_PROPERTY_SUPPORT   = YES
+
+# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
+# tag is set to YES, then doxygen will reuse the documentation of the first
+# member in the group (if any) for the other members of the group. By default
+# all members of a group must be documented explicitly.
+
+DISTRIBUTE_GROUP_DOC   = NO
+
+# Set the SUBGROUPING tag to YES (the default) to allow class member groups of
+# the same type (for instance a group of public functions) to be put as a
+# subgroup of that type (e.g. under the Public Functions section). Set it to
+# NO to prevent subgrouping. Alternatively, this can be done per class using
+# the \nosubgrouping command.
+
+SUBGROUPING            = YES
+
+# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum
+# is documented as struct, union, or enum with the name of the typedef. So
+# typedef struct TypeS {} TypeT, will appear in the documentation as a struct
+# with name TypeT. When disabled the typedef will appear as a member of a file,
+# namespace, or class. And the struct will be named TypeS. This can typically
+# be useful for C code in case the coding convention dictates that all compound
+# types are typedef'ed and only the typedef is referenced, never the tag name.
+
+TYPEDEF_HIDES_STRUCT   = NO
+
+# The SYMBOL_CACHE_SIZE determines the size of the internal cache use to
+# determine which symbols to keep in memory and which to flush to disk.
+# When the cache is full, less often used symbols will be written to disk.
+# For small to medium size projects (<1000 input files) the default value is
+# probably good enough. For larger projects a too small cache size can cause
+# doxygen to be busy swapping symbols to and from disk most of the time
+# causing a significant performance penality.
+# If the system has enough physical memory increasing the cache will improve the
+# performance by keeping more symbols in memory. Note that the value works on
+# a logarithmic scale so increasing the size by one will rougly double the
+# memory usage. The cache size is given by this formula:
+# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0,
+# corresponding to a cache size of 2^16 = 65536 symbols
+
+SYMBOL_CACHE_SIZE      = 0
+
+#---------------------------------------------------------------------------
+# Build related configuration options
+#---------------------------------------------------------------------------
+
+# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in
+# documentation are documented, even if no documentation was available.
+# Private class members and static file members will be hidden unless
+# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES
+
+EXTRACT_ALL            = NO
+
+# If the EXTRACT_PRIVATE tag is set to YES all private members of a class
+# will be included in the documentation.
+
+EXTRACT_PRIVATE        = NO
+
+# If the EXTRACT_STATIC tag is set to YES all static members of a file
+# will be included in the documentation.
+
+EXTRACT_STATIC         = NO
+
+# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs)
+# defined locally in source files will be included in the documentation.
+# If set to NO only classes defined in header files are included.
+
+EXTRACT_LOCAL_CLASSES  = YES
+
+# This flag is only useful for Objective-C code. When set to YES local
+# methods, which are defined in the implementation section but not in
+# the interface are included in the documentation.
+# If set to NO (the default) only methods in the interface are included.
+
+EXTRACT_LOCAL_METHODS  = NO
+
+# If this flag is set to YES, the members of anonymous namespaces will be
+# extracted and appear in the documentation as a namespace called
+# 'anonymous_namespace{file}', where file will be replaced with the base
+# name of the file that contains the anonymous namespace. By default
+# anonymous namespace are hidden.
+
+EXTRACT_ANON_NSPACES   = NO
+
+# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all
+# undocumented members of documented classes, files or namespaces.
+# If set to NO (the default) these members will be included in the
+# various overviews, but no documentation section is generated.
+# This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_MEMBERS     = NO
+
+# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all
+# undocumented classes that are normally visible in the class hierarchy.
+# If set to NO (the default) these classes will be included in the various
+# overviews. This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_CLASSES     = NO
+
+# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all
+# friend (class|struct|union) declarations.
+# If set to NO (the default) these declarations will be included in the
+# documentation.
+
+HIDE_FRIEND_COMPOUNDS  = NO
+
+# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any
+# documentation blocks found inside the body of a function.
+# If set to NO (the default) these blocks will be appended to the
+# function's detailed documentation block.
+
+HIDE_IN_BODY_DOCS      = NO
+
+# The INTERNAL_DOCS tag determines if documentation
+# that is typed after a \internal command is included. If the tag is set
+# to NO (the default) then the documentation will be excluded.
+# Set it to YES to include the internal documentation.
+
+INTERNAL_DOCS          = NO
+
+# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate
+# file names in lower-case letters. If set to YES upper-case letters are also
+# allowed. This is useful if you have classes or files whose names only differ
+# in case and if your file system supports case sensitive file names. Windows
+# and Mac users are advised to set this option to NO.
+
+CASE_SENSE_NAMES       = NO
+
+# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen
+# will show members with their full class and namespace scopes in the
+# documentation. If set to YES the scope will be hidden.
+
+HIDE_SCOPE_NAMES       = NO
+
+# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen
+# will put a list of the files that are included by a file in the documentation
+# of that file.
+
+SHOW_INCLUDE_FILES     = YES
+
+# If the FORCE_LOCAL_INCLUDES tag is set to YES then Doxygen
+# will list include files with double quotes in the documentation
+# rather than with sharp brackets.
+
+FORCE_LOCAL_INCLUDES   = NO
+
+# If the INLINE_INFO tag is set to YES (the default) then a tag [inline]
+# is inserted in the documentation for inline members.
+
+INLINE_INFO            = YES
+
+# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen
+# will sort the (detailed) documentation of file and class members
+# alphabetically by member name. If set to NO the members will appear in
+# declaration order.
+
+SORT_MEMBER_DOCS       = NO
+
+# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the
+# brief documentation of file, namespace and class members alphabetically
+# by member name. If set to NO (the default) the members will appear in
+# declaration order.
+
+SORT_BRIEF_DOCS        = NO
+
+# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the (brief and detailed) documentation of class members so that constructors and destructors are listed first. If set to NO (the default) the constructors will appear in the respective orders defined by SORT_MEMBER_DOCS and SORT_BRIEF_DOCS. This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO.
+
+SORT_MEMBERS_CTORS_1ST = NO
+
+# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the
+# hierarchy of group names into alphabetical order. If set to NO (the default)
+# the group names will appear in their defined order.
+
+SORT_GROUP_NAMES       = NO
+
+# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be
+# sorted by fully-qualified names, including namespaces. If set to
+# NO (the default), the class list will be sorted only by class name,
+# not including the namespace part.
+# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
+# Note: This option applies only to the class list, not to the
+# alphabetical list.
+
+SORT_BY_SCOPE_NAME     = NO
+
+# The GENERATE_TODOLIST tag can be used to enable (YES) or
+# disable (NO) the todo list. This list is created by putting \todo
+# commands in the documentation.
+
+GENERATE_TODOLIST      = YES
+
+# The GENERATE_TESTLIST tag can be used to enable (YES) or
+# disable (NO) the test list. This list is created by putting \test
+# commands in the documentation.
+
+GENERATE_TESTLIST      = YES
+
+# The GENERATE_BUGLIST tag can be used to enable (YES) or
+# disable (NO) the bug list. This list is created by putting \bug
+# commands in the documentation.
+
+GENERATE_BUGLIST       = YES
+
+# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or
+# disable (NO) the deprecated list. This list is created by putting
+# \deprecated commands in the documentation.
+
+GENERATE_DEPRECATEDLIST= YES
+
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if sectionname ... \endif.
+
+ENABLED_SECTIONS       =
+
+# The MAX_INITIALIZER_LINES tag determines the maximum number of lines
+# the initial value of a variable or define consists of for it to appear in
+# the documentation. If the initializer consists of more lines than specified
+# here it will be hidden. Use a value of 0 to hide initializers completely.
+# The appearance of the initializer of individual variables and defines in the
+# documentation can be controlled using \showinitializer or \hideinitializer
+# command in the documentation regardless of this setting.
+
+MAX_INITIALIZER_LINES  = 30
+
+# Set the SHOW_USED_FILES tag to NO to disable the list of files generated
+# at the bottom of the documentation of classes and structs. If set to YES the
+# list will mention the files that were used to generate the documentation.
+
+SHOW_USED_FILES        = YES
+
+# If the sources in your project are distributed over multiple directories
+# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy
+# in the documentation. The default is NO.
+
+SHOW_DIRECTORIES       = NO
+
+# Set the SHOW_FILES tag to NO to disable the generation of the Files page.
+# This will remove the Files entry from the Quick Index and from the
+# Folder Tree View (if specified). The default is YES.
+
+SHOW_FILES             = YES
+
+# Set the SHOW_NAMESPACES tag to NO to disable the generation of the
+# Namespaces page.
+# This will remove the Namespaces entry from the Quick Index
+# and from the Folder Tree View (if specified). The default is YES.
+
+SHOW_NAMESPACES        = YES
+
+# The FILE_VERSION_FILTER tag can be used to specify a program or script that
+# doxygen should invoke to get the current version for each file (typically from
+# the version control system). Doxygen will invoke the program by executing (via
+# popen()) the command <command> <input-file>, where <command> is the value of
+# the FILE_VERSION_FILTER tag, and <input-file> is the name of an input file
+# provided by doxygen. Whatever the program writes to standard output
+# is used as the file version. See the manual for examples.
+
+FILE_VERSION_FILTER    =
+
+# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed by
+# doxygen. The layout file controls the global structure of the generated output files
+# in an output format independent way. The create the layout file that represents
+# doxygen's defaults, run doxygen with the -l option. You can optionally specify a
+# file name after the option, if omitted DoxygenLayout.xml will be used as the name
+# of the layout file.
+
+LAYOUT_FILE            =
+
+#---------------------------------------------------------------------------
+# configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+
+# The QUIET tag can be used to turn on/off the messages that are generated
+# by doxygen. Possible values are YES and NO. If left blank NO is used.
+
+QUIET                  = YES
+
+# The WARNINGS tag can be used to turn on/off the warning messages that are
+# generated by doxygen. Possible values are YES and NO. If left blank
+# NO is used.
+
+WARNINGS               = YES
+
+# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings
+# for undocumented members. If EXTRACT_ALL is set to YES then this flag will
+# automatically be disabled.
+
+WARN_IF_UNDOCUMENTED   = YES
+
+# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for
+# potential errors in the documentation, such as not documenting some
+# parameters in a documented function, or documenting parameters that
+# don't exist or using markup commands wrongly.
+
+WARN_IF_DOC_ERROR      = YES
+
+# This WARN_NO_PARAMDOC option can be abled to get warnings for
+# functions that are documented, but have no documentation for their parameters
+# or return value. If set to NO (the default) doxygen will only warn about
+# wrong or incomplete parameter documentation, but not about the absence of
+# documentation.
+
+WARN_NO_PARAMDOC       = YES
+
+# The WARN_FORMAT tag determines the format of the warning messages that
+# doxygen can produce. The string should contain the $file, $line, and $text
+# tags, which will be replaced by the file and line number from which the
+# warning originated and the warning text. Optionally the format may contain
+# $version, which will be replaced by the version of the file (if it could
+# be obtained via FILE_VERSION_FILTER)
+
+WARN_FORMAT            = "$file:$line: $text"
+
+# The WARN_LOGFILE tag can be used to specify a file to which warning
+# and error messages should be written. If left blank the output is written
+# to stderr.
+
+WARN_LOGFILE           =
+
+#---------------------------------------------------------------------------
+# configuration options related to the input files
+#---------------------------------------------------------------------------
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT                  = @top_srcdir@/include/nestegg
+
+# This tag can be used to specify the character encoding of the source files
+# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is
+# also the default input encoding. Doxygen uses libiconv (or the iconv built
+# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for
+# the list of possible encodings.
+
+INPUT_ENCODING         = UTF-8
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx
+# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90
+
+FILE_PATTERNS          =
+
+# The RECURSIVE tag can be used to turn specify whether or not subdirectories
+# should be searched for input files as well. Possible values are YES and NO.
+# If left blank NO is used.
+
+RECURSIVE              = NO
+
+# The EXCLUDE tag can be used to specify files and/or directories that should
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+
+EXCLUDE                =
+
+# The EXCLUDE_SYMLINKS tag can be used select whether or not files or
+# directories that are symbolic links (a Unix filesystem feature) are excluded
+# from the input.
+
+EXCLUDE_SYMLINKS       = NO
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories. Note that the wildcards are matched
+# against the file with absolute path, so to exclude all test directories
+# for example use the pattern */test/*
+
+EXCLUDE_PATTERNS       =
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+
+EXCLUDE_SYMBOLS        =
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+
+EXAMPLE_PATH           =
+
+# If the value of the EXAMPLE_PATH tag contains directories, you can use the
+# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank all files are included.
+
+EXAMPLE_PATTERNS       =
+
+# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
+# searched for input files to be used with the \include or \dontinclude
+# commands irrespective of the value of the RECURSIVE tag.
+# Possible values are YES and NO. If left blank NO is used.
+
+EXAMPLE_RECURSIVE      = NO
+
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+
+IMAGE_PATH             =
+
+# The INPUT_FILTER tag can be used to specify a program that doxygen should
+# invoke to filter for each input file. Doxygen will invoke the filter program
+# by executing (via popen()) the command <filter> <input-file>, where <filter>
+# is the value of the INPUT_FILTER tag, and <input-file> is the name of an
+# input file. Doxygen will then use the output that the filter program writes
+# to standard output.
+# If FILTER_PATTERNS is specified, this tag will be
+# ignored.
+
+INPUT_FILTER           =
+
+# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
+# basis.
+# Doxygen will compare the file name with each pattern and apply the
+# filter if there is a match.
+# The filters are a list of the form:
+# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further
+# info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER
+# is applied to all files.
+
+FILTER_PATTERNS        =
+
+# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
+# INPUT_FILTER) will be used to filter the input files when producing source
+# files to browse (i.e. when SOURCE_BROWSER is set to YES).
+
+FILTER_SOURCE_FILES    = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to source browsing
+#---------------------------------------------------------------------------
+
+# If the SOURCE_BROWSER tag is set to YES then a list of source files will
+# be generated. Documented entities will be cross-referenced with these sources.
+# Note: To get rid of all source code in the generated output, make sure also
+# VERBATIM_HEADERS is set to NO.
+
+SOURCE_BROWSER         = NO
+
+# Setting the INLINE_SOURCES tag to YES will include the body
+# of functions and classes directly in the documentation.
+
+INLINE_SOURCES         = NO
+
+# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct
+# doxygen to hide any special comment blocks from generated source code
+# fragments. Normal C and C++ comments will always remain visible.
+
+STRIP_CODE_COMMENTS    = YES
+
+# If the REFERENCED_BY_RELATION tag is set to YES
+# then for each documented function all documented
+# functions referencing it will be listed.
+
+REFERENCED_BY_RELATION = NO
+
+# If the REFERENCES_RELATION tag is set to YES
+# then for each documented function all documented entities
+# called/used by that function will be listed.
+
+REFERENCES_RELATION    = NO
+
+# If the REFERENCES_LINK_SOURCE tag is set to YES (the default)
+# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from
+# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will
+# link to the source code.
+# Otherwise they will link to the documentation.
+
+REFERENCES_LINK_SOURCE = YES
+
+# If the USE_HTAGS tag is set to YES then the references to source code
+# will point to the HTML generated by the htags(1) tool instead of doxygen
+# built-in source browser. The htags tool is part of GNU's global source
+# tagging system (see http://www.gnu.org/software/global/global.html). You
+# will need version 4.8.6 or higher.
+
+USE_HTAGS              = NO
+
+# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen
+# will generate a verbatim copy of the header file for each class for
+# which an include is specified. Set to NO to disable this.
+
+VERBATIM_HEADERS       = YES
+
+#---------------------------------------------------------------------------
+# configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+
+# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index
+# of all compounds will be generated. Enable this if the project
+# contains a lot of classes, structs, unions or interfaces.
+
+ALPHABETICAL_INDEX     = NO
+
+# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then
+# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns
+# in which this list will be split (can be a number in the range [1..20])
+
+COLS_IN_ALPHA_INDEX    = 5
+
+# In case all classes in a project start with a common prefix, all
+# classes will be put under the same header in the alphabetical index.
+# The IGNORE_PREFIX tag can be used to specify one or more prefixes that
+# should be ignored while generating the index headers.
+
+IGNORE_PREFIX          =
+
+#---------------------------------------------------------------------------
+# configuration options related to the HTML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_HTML tag is set to YES (the default) Doxygen will
+# generate HTML output.
+
+GENERATE_HTML          = YES
+
+# The HTML_OUTPUT tag is used to specify where the HTML docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `html' will be used as the default path.
+
+HTML_OUTPUT            = html
+
+# The HTML_FILE_EXTENSION tag can be used to specify the file extension for
+# each generated HTML page (for example: .htm,.php,.asp). If it is left blank
+# doxygen will generate files with .html extension.
+
+HTML_FILE_EXTENSION    = .html
+
+# The HTML_HEADER tag can be used to specify a personal HTML header for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard header.
+
+HTML_HEADER            =
+
+# The HTML_FOOTER tag can be used to specify a personal HTML footer for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard footer.
+
+HTML_FOOTER            =
+
+# The HTML_STYLESHEET tag can be used to specify a user-defined cascading
+# style sheet that is used by each HTML page. It can be used to
+# fine-tune the look of the HTML output. If the tag is left blank doxygen
+# will generate a default style sheet. Note that doxygen will try to copy
+# the style sheet file to the HTML output directory, so don't put your own
+# stylesheet in the HTML output directory as well, or it will be erased!
+
+HTML_STYLESHEET        =
+
+# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML
+# page will contain the date and time when the page was generated. Setting
+# this to NO can help when comparing the output of multiple runs.
+
+HTML_TIMESTAMP         = NO
+
+# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes,
+# files or namespaces will be aligned in HTML using tables. If set to
+# NO a bullet list will be used.
+
+HTML_ALIGN_MEMBERS     = YES
+
+# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
+# documentation will contain sections that can be hidden and shown after the
+# page has loaded. For this to work a browser that supports
+# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox
+# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari).
+
+HTML_DYNAMIC_SECTIONS  = NO
+
+# If the GENERATE_DOCSET tag is set to YES, additional index files
+# will be generated that can be used as input for Apple's Xcode 3
+# integrated development environment, introduced with OSX 10.5 (Leopard).
+# To create a documentation set, doxygen will generate a Makefile in the
+# HTML output directory. Running make will produce the docset in that
+# directory and running "make install" will install the docset in
+# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find
+# it at startup.
+# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html for more information.
+
+GENERATE_DOCSET        = NO
+
+# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the
+# feed. A documentation feed provides an umbrella under which multiple
+# documentation sets from a single provider (such as a company or product suite)
+# can be grouped.
+
+DOCSET_FEEDNAME        = "Doxygen generated docs"
+
+# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that
+# should uniquely identify the documentation set bundle. This should be a
+# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen
+# will append .docset to the name.
+
+DOCSET_BUNDLE_ID       = org.doxygen.Project
+
+# If the GENERATE_HTMLHELP tag is set to YES, additional index files
+# will be generated that can be used as input for tools like the
+# Microsoft HTML help workshop to generate a compiled HTML help file (.chm)
+# of the generated HTML documentation.
+
+GENERATE_HTMLHELP      = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can
+# be used to specify the file name of the resulting .chm file. You
+# can add a path in front of the file if the result should not be
+# written to the html output directory.
+
+CHM_FILE               =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can
+# be used to specify the location (absolute path including file name) of
+# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run
+# the HTML help compiler on the generated index.hhp.
+
+HHC_LOCATION           =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag
+# controls if a separate .chi index file is generated (YES) or that
+# it should be included in the master .chm file (NO).
+
+GENERATE_CHI           = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING
+# is used to encode HtmlHelp index (hhk), content (hhc) and project file
+# content.
+
+CHM_INDEX_ENCODING     =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag
+# controls whether a binary table of contents is generated (YES) or a
+# normal table of contents (NO) in the .chm file.
+
+BINARY_TOC             = NO
+
+# The TOC_EXPAND flag can be set to YES to add extra items for group members
+# to the contents of the HTML help documentation and to the tree view.
+
+TOC_EXPAND             = NO
+
+# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and QHP_VIRTUAL_FOLDER
+# are set, an additional index file will be generated that can be used as input for
+# Qt's qhelpgenerator to generate a Qt Compressed Help (.qch) of the generated
+# HTML documentation.
+
+GENERATE_QHP           = NO
+
+# If the QHG_LOCATION tag is specified, the QCH_FILE tag can
+# be used to specify the file name of the resulting .qch file.
+# The path specified is relative to the HTML output folder.
+
+QCH_FILE               =
+
+# The QHP_NAMESPACE tag specifies the namespace to use when generating
+# Qt Help Project output. For more information please see
+# http://doc.trolltech.com/qthelpproject.html#namespace
+
+QHP_NAMESPACE          = org.doxygen.Project
+
+# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating
+# Qt Help Project output. For more information please see
+# http://doc.trolltech.com/qthelpproject.html#virtual-folders
+
+QHP_VIRTUAL_FOLDER     = doc
+
+# If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to add.
+# For more information please see
+# http://doc.trolltech.com/qthelpproject.html#custom-filters
+
+QHP_CUST_FILTER_NAME   =
+
+# The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the custom filter to add.For more information please see
+# <a href="http://doc.trolltech.com/qthelpproject.html#custom-filters">Qt Help Project / Custom Filters</a>.
+
+QHP_CUST_FILTER_ATTRS  =
+
+# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this project's
+# filter section matches.
+# <a href="http://doc.trolltech.com/qthelpproject.html#filter-attributes">Qt Help Project / Filter Attributes</a>.
+
+QHP_SECT_FILTER_ATTRS  =
+
+# If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can
+# be used to specify the location of Qt's qhelpgenerator.
+# If non-empty doxygen will try to run qhelpgenerator on the generated
+# .qhp file.
+
+QHG_LOCATION           =
+
+# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files
+#  will be generated, which together with the HTML files, form an Eclipse help
+#  plugin. To install this plugin and make it available under the help contents
+# menu in Eclipse, the contents of the directory containing the HTML and XML
+# files needs to be copied into the plugins directory of eclipse. The name of
+# the directory within the plugins directory should be the same as
+# the ECLIPSE_DOC_ID value. After copying Eclipse needs to be restarted before the help appears.
+
+GENERATE_ECLIPSEHELP   = NO
+
+# A unique identifier for the eclipse help plugin. When installing the plugin
+# the directory name containing the HTML and XML files should also have
+# this name.
+
+ECLIPSE_DOC_ID         = org.doxygen.Project
+
+# The DISABLE_INDEX tag can be used to turn on/off the condensed index at
+# top of each HTML page. The value NO (the default) enables the index and
+# the value YES disables it.
+
+DISABLE_INDEX          = NO
+
+# This tag can be used to set the number of enum values (range [1..20])
+# that doxygen will group on one line in the generated HTML documentation.
+
+ENUM_VALUES_PER_LINE   = 4
+
+# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
+# structure should be generated to display hierarchical information.
+# If the tag value is set to YES, a side panel will be generated
+# containing a tree-like index structure (just like the one that
+# is generated for HTML Help). For this to work a browser that supports
+# JavaScript, DHTML, CSS and frames is required (i.e. any modern browser).
+# Windows users are probably better off using the HTML help feature.
+
+GENERATE_TREEVIEW      = NO
+
+# By enabling USE_INLINE_TREES, doxygen will generate the Groups, Directories,
+# and Class Hierarchy pages using a tree view instead of an ordered list.
+
+USE_INLINE_TREES       = NO
+
+# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be
+# used to set the initial width (in pixels) of the frame in which the tree
+# is shown.
+
+TREEVIEW_WIDTH         = 250
+
+# Use this tag to change the font size of Latex formulas included
+# as images in the HTML documentation. The default is 10. Note that
+# when you change the font size after a successful doxygen run you need
+# to manually remove any form_*.png images from the HTML output directory
+# to force them to be regenerated.
+
+FORMULA_FONTSIZE       = 10
+
+# When the SEARCHENGINE tag is enabled doxygen will generate a search box for the HTML output. The underlying search engine uses javascript
+# and DHTML and should work on any modern browser. Note that when using HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET) there is already a search function so this one should
+# typically be disabled. For large projects the javascript based search engine
+# can be slow, then enabling SERVER_BASED_SEARCH may provide a better solution.
+
+SEARCHENGINE           = YES
+
+# When the SERVER_BASED_SEARCH tag is enabled the search engine will be implemented using a PHP enabled web server instead of at the web client using Javascript. Doxygen will generate the search PHP script and index
+# file to put on the web server. The advantage of the server based approach is that it scales better to large projects and allows full text search. The disadvances is that it is more difficult to setup
+# and does not have live searching capabilities.
+
+SERVER_BASED_SEARCH    = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will
+# generate Latex output.
+
+GENERATE_LATEX         = NO
+
+# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `latex' will be used as the default path.
+
+LATEX_OUTPUT           = latex
+
+# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
+# invoked. If left blank `latex' will be used as the default command name.
+# Note that when enabling USE_PDFLATEX this option is only used for
+# generating bitmaps for formulas in the HTML output, but not in the
+# Makefile that is written to the output directory.
+
+LATEX_CMD_NAME         = latex
+
+# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to
+# generate index for LaTeX. If left blank `makeindex' will be used as the
+# default command name.
+
+MAKEINDEX_CMD_NAME     = makeindex
+
+# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact
+# LaTeX documents. This may be useful for small projects and may help to
+# save some trees in general.
+
+COMPACT_LATEX          = NO
+
+# The PAPER_TYPE tag can be used to set the paper type that is used
+# by the printer. Possible values are: a4, a4wide, letter, legal and
+# executive. If left blank a4wide will be used.
+
+PAPER_TYPE             = a4wide
+
+# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX
+# packages that should be included in the LaTeX output.
+
+EXTRA_PACKAGES         =
+
+# The LATEX_HEADER tag can be used to specify a personal LaTeX header for
+# the generated latex document. The header should contain everything until
+# the first chapter. If it is left blank doxygen will generate a
+# standard header. Notice: only use this tag if you know what you are doing!
+
+LATEX_HEADER           =
+
+# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated
+# is prepared for conversion to pdf (using ps2pdf). The pdf file will
+# contain links (just like the HTML output) instead of page references
+# This makes the output suitable for online browsing using a pdf viewer.
+
+PDF_HYPERLINKS         = YES
+
+# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of
+# plain latex in the generated Makefile. Set this option to YES to get a
+# higher quality PDF documentation.
+
+USE_PDFLATEX           = YES
+
+# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode.
+# command to the generated LaTeX files. This will instruct LaTeX to keep
+# running if errors occur, instead of asking the user for help.
+# This option is also used when generating formulas in HTML.
+
+LATEX_BATCHMODE        = NO
+
+# If LATEX_HIDE_INDICES is set to YES then doxygen will not
+# include the index chapters (such as File Index, Compound Index, etc.)
+# in the output.
+
+LATEX_HIDE_INDICES     = NO
+
+# If LATEX_SOURCE_CODE is set to YES then doxygen will include source code with syntax highlighting in the LaTeX output. Note that which sources are shown also depends on other settings such as SOURCE_BROWSER.
+
+LATEX_SOURCE_CODE      = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the RTF output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output
+# The RTF output is optimized for Word 97 and may not look very pretty with
+# other RTF readers or editors.
+
+GENERATE_RTF           = NO
+
+# The RTF_OUTPUT tag is used to specify where the RTF docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `rtf' will be used as the default path.
+
+RTF_OUTPUT             = rtf
+
+# If the COMPACT_RTF tag is set to YES Doxygen generates more compact
+# RTF documents. This may be useful for small projects and may help to
+# save some trees in general.
+
+COMPACT_RTF            = NO
+
+# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated
+# will contain hyperlink fields. The RTF file will
+# contain links (just like the HTML output) instead of page references.
+# This makes the output suitable for online browsing using WORD or other
+# programs which support those fields.
+# Note: wordpad (write) and others do not support links.
+
+RTF_HYPERLINKS         = NO
+
+# Load stylesheet definitions from file. Syntax is similar to doxygen's
+# config file, i.e. a series of assignments. You only have to provide
+# replacements, missing definitions are set to their default value.
+
+RTF_STYLESHEET_FILE    =
+
+# Set optional variables used in the generation of an rtf document.
+# Syntax is similar to doxygen's config file.
+
+RTF_EXTENSIONS_FILE    =
+
+#---------------------------------------------------------------------------
+# configuration options related to the man page output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_MAN tag is set to YES (the default) Doxygen will
+# generate man pages
+
+GENERATE_MAN           = NO
+
+# The MAN_OUTPUT tag is used to specify where the man pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `man' will be used as the default path.
+
+MAN_OUTPUT             = man
+
+# The MAN_EXTENSION tag determines the extension that is added to
+# the generated man pages (default is the subroutine's section .3)
+
+MAN_EXTENSION          = .3
+
+# If the MAN_LINKS tag is set to YES and Doxygen generates man output,
+# then it will generate one additional man file for each entity
+# documented in the real man page(s). These additional files
+# only source the real man page, but without them the man command
+# would be unable to find the correct page. The default is NO.
+
+MAN_LINKS              = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the XML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_XML tag is set to YES Doxygen will
+# generate an XML file that captures the structure of
+# the code including all documentation.
+
+GENERATE_XML           = NO
+
+# The XML_OUTPUT tag is used to specify where the XML pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `xml' will be used as the default path.
+
+XML_OUTPUT             = xml
+
+# The XML_SCHEMA tag can be used to specify an XML schema,
+# which can be used by a validating XML parser to check the
+# syntax of the XML files.
+
+XML_SCHEMA             =
+
+# The XML_DTD tag can be used to specify an XML DTD,
+# which can be used by a validating XML parser to check the
+# syntax of the XML files.
+
+XML_DTD                =
+
+# If the XML_PROGRAMLISTING tag is set to YES Doxygen will
+# dump the program listings (including syntax highlighting
+# and cross-referencing information) to the XML output. Note that
+# enabling this will significantly increase the size of the XML output.
+
+XML_PROGRAMLISTING     = YES
+
+#---------------------------------------------------------------------------
+# configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will
+# generate an AutoGen Definitions (see autogen.sf.net) file
+# that captures the structure of the code including all
+# documentation. Note that this feature is still experimental
+# and incomplete at the moment.
+
+GENERATE_AUTOGEN_DEF   = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the Perl module output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_PERLMOD tag is set to YES Doxygen will
+# generate a Perl module file that captures the structure of
+# the code including all documentation. Note that this
+# feature is still experimental and incomplete at the
+# moment.
+
+GENERATE_PERLMOD       = NO
+
+# If the PERLMOD_LATEX tag is set to YES Doxygen will generate
+# the necessary Makefile rules, Perl scripts and LaTeX code to be able
+# to generate PDF and DVI output from the Perl module output.
+
+PERLMOD_LATEX          = NO
+
+# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be
+# nicely formatted so it can be parsed by a human reader.
+# This is useful
+# if you want to understand what is going on.
+# On the other hand, if this
+# tag is set to NO the size of the Perl module output will be much smaller
+# and Perl will parse it just the same.
+
+PERLMOD_PRETTY         = YES
+
+# The names of the make variables in the generated doxyrules.make file
+# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX.
+# This is useful so different doxyrules.make files included by the same
+# Makefile don't overwrite each other's variables.
+
+PERLMOD_MAKEVAR_PREFIX =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor
+#---------------------------------------------------------------------------
+
+# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will
+# evaluate all C-preprocessor directives found in the sources and include
+# files.
+
+ENABLE_PREPROCESSING   = YES
+
+# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro
+# names in the source code. If set to NO (the default) only conditional
+# compilation will be performed. Macro expansion can be done in a controlled
+# way by setting EXPAND_ONLY_PREDEF to YES.
+
+MACRO_EXPANSION        = NO
+
+# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES
+# then the macro expansion is limited to the macros specified with the
+# PREDEFINED and EXPAND_AS_DEFINED tags.
+
+EXPAND_ONLY_PREDEF     = NO
+
+# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files
+# in the INCLUDE_PATH (see below) will be search if a #include is found.
+
+SEARCH_INCLUDES        = YES
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
+INCLUDE_PATH           =
+
+# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
+# patterns (like *.h and *.hpp) to filter out the header-files in the
+# directories. If left blank, the patterns specified with FILE_PATTERNS will
+# be used.
+
+INCLUDE_FILE_PATTERNS  =
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed. To prevent a macro definition from being
+# undefined via #undef or recursively expanded use the := operator
+# instead of the = operator.
+
+PREDEFINED             =
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition.
+
+EXPAND_AS_DEFINED      =
+
+# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then
+# doxygen's preprocessor will remove all function-like macros that are alone
+# on a line, have an all uppercase name, and do not end with a semicolon. Such
+# function macros are typically used for boiler-plate code, and will confuse
+# the parser if not removed.
+
+SKIP_FUNCTION_MACROS   = YES
+
+#---------------------------------------------------------------------------
+# Configuration::additions related to external references
+#---------------------------------------------------------------------------
+
+# The TAGFILES option can be used to specify one or more tagfiles.
+# Optionally an initial location of the external documentation
+# can be added for each tagfile. The format of a tag file without
+# this location is as follows:
+#
+# TAGFILES = file1 file2 ...
+# Adding location for the tag files is done as follows:
+#
+# TAGFILES = file1=loc1 "file2 = loc2" ...
+# where "loc1" and "loc2" can be relative or absolute paths or
+# URLs. If a location is present for each tag, the installdox tool
+# does not have to be run to correct the links.
+# Note that each tag file must have a unique name
+# (where the name does NOT include the path)
+# If a tag file is not located in the directory in which doxygen
+# is run, you must also specify the path to the tagfile here.
+
+TAGFILES               =
+
+# When a file name is specified after GENERATE_TAGFILE, doxygen will create
+# a tag file that is based on the input files it reads.
+
+GENERATE_TAGFILE       =
+
+# If the ALLEXTERNALS tag is set to YES all external classes will be listed
+# in the class index. If set to NO only the inherited external classes
+# will be listed.
+
+ALLEXTERNALS           = NO
+
+# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed
+# in the modules index. If set to NO, only the current project's groups will
+# be listed.
+
+EXTERNAL_GROUPS        = YES
+
+# The PERL_PATH should be the absolute path and name of the perl script
+# interpreter (i.e. the result of `which perl').
+
+PERL_PATH              = /usr/bin/perl
+
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool
+#---------------------------------------------------------------------------
+
+# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will
+# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base
+# or super classes. Setting the tag to NO turns the diagrams off. Note that
+# this option is superseded by the HAVE_DOT option below. This is only a
+# fallback. It is recommended to install and use dot, since it yields more
+# powerful graphs.
+
+CLASS_DIAGRAMS         = YES
+
+# You can define message sequence charts within doxygen comments using the \msc
+# command. Doxygen will then run the mscgen tool (see
+# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the
+# documentation. The MSCGEN_PATH tag allows you to specify the directory where
+# the mscgen tool resides. If left empty the tool is assumed to be found in the
+# default search path.
+
+MSCGEN_PATH            =
+
+# If set to YES, the inheritance and collaboration graphs will hide
+# inheritance and usage relations if the target is undocumented
+# or is not a class.
+
+HIDE_UNDOC_RELATIONS   = YES
+
+# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
+# available from the path. This tool is part of Graphviz, a graph visualization
+# toolkit from AT&T and Lucent Bell Labs. The other options in this section
+# have no effect if this option is set to NO (the default)
+
+HAVE_DOT               = NO
+
+# By default doxygen will write a font called FreeSans.ttf to the output
+# directory and reference it in all dot files that doxygen generates. This
+# font does not include all possible unicode characters however, so when you need
+# these (or just want a differently looking font) you can specify the font name
+# using DOT_FONTNAME. You need need to make sure dot is able to find the font,
+# which can be done by putting it in a standard location or by setting the
+# DOTFONTPATH environment variable or by setting DOT_FONTPATH to the directory
+# containing the font.
+
+DOT_FONTNAME           = FreeSans
+
+# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs.
+# The default size is 10pt.
+
+DOT_FONTSIZE           = 10
+
+# By default doxygen will tell dot to use the output directory to look for the
+# FreeSans.ttf font (which doxygen will put there itself). If you specify a
+# different font using DOT_FONTNAME you can set the path where dot
+# can find it using this tag.
+
+DOT_FONTPATH           =
+
+# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect inheritance relations. Setting this tag to YES will force the
+# the CLASS_DIAGRAMS tag to NO.
+
+CLASS_GRAPH            = YES
+
+# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect implementation dependencies (inheritance, containment, and
+# class references variables) of the class with other documented classes.
+
+COLLABORATION_GRAPH    = YES
+
+# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for groups, showing the direct groups dependencies
+
+GROUP_GRAPHS           = YES
+
+# If the UML_LOOK tag is set to YES doxygen will generate inheritance and
+# collaboration diagrams in a style similar to the OMG's Unified Modeling
+# Language.
+
+UML_LOOK               = NO
+
+# If set to YES, the inheritance and collaboration graphs will show the
+# relations between templates and their instances.
+
+TEMPLATE_RELATIONS     = NO
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT
+# tags are set to YES then doxygen will generate a graph for each documented
+# file showing the direct and indirect include dependencies of the file with
+# other documented files.
+
+INCLUDE_GRAPH          = YES
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and
+# HAVE_DOT tags are set to YES then doxygen will generate a graph for each
+# documented header file showing the documented files that directly or
+# indirectly include this file.
+
+INCLUDED_BY_GRAPH      = YES
+
+# If the CALL_GRAPH and HAVE_DOT options are set to YES then
+# doxygen will generate a call dependency graph for every global function
+# or class method. Note that enabling this option will significantly increase
+# the time of a run. So in most cases it will be better to enable call graphs
+# for selected functions only using the \callgraph command.
+
+CALL_GRAPH             = NO
+
+# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then
+# doxygen will generate a caller dependency graph for every global function
+# or class method. Note that enabling this option will significantly increase
+# the time of a run. So in most cases it will be better to enable caller
+# graphs for selected functions only using the \callergraph command.
+
+CALLER_GRAPH           = NO
+
+# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen
+# will graphical hierarchy of all classes instead of a textual one.
+
+GRAPHICAL_HIERARCHY    = YES
+
+# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES
+# then doxygen will show the dependencies a directory has on other directories
+# in a graphical way. The dependency relations are determined by the #include
+# relations between the files in the directories.
+
+DIRECTORY_GRAPH        = YES
+
+# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
+# generated by dot. Possible values are png, jpg, or gif
+# If left blank png will be used.
+
+DOT_IMAGE_FORMAT       = png
+
+# The tag DOT_PATH can be used to specify the path where the dot tool can be
+# found. If left blank, it is assumed the dot tool can be found in the path.
+
+DOT_PATH               =
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
+# \dotfile command).
+
+DOTFILE_DIRS           =
+
+# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of
+# nodes that will be shown in the graph. If the number of nodes in a graph
+# becomes larger than this value, doxygen will truncate the graph, which is
+# visualized by representing a node as a red box. Note that doxygen if the
+# number of direct children of the root node in a graph is already larger than
+# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note
+# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
+
+DOT_GRAPH_MAX_NODES    = 50
+
+# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the
+# graphs generated by dot. A depth value of 3 means that only nodes reachable
+# from the root by following a path via at most 3 edges will be shown. Nodes
+# that lay further from the root node will be omitted. Note that setting this
+# option to 1 or 2 may greatly reduce the computation time needed for large
+# code bases. Also note that the size of a graph can be further restricted by
+# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
+
+MAX_DOT_GRAPH_DEPTH    = 0
+
+# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
+# background. This is disabled by default, because dot on Windows does not
+# seem to support this out of the box. Warning: Depending on the platform used,
+# enabling this option may lead to badly anti-aliased labels on the edges of
+# a graph (i.e. they become hard to read).
+
+DOT_TRANSPARENT        = NO
+
+# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output
+# files in one run (i.e. multiple -o and -T options on the command line). This
+# makes dot run faster, but since only newer versions of dot (>1.8.10)
+# support this, this feature is disabled by default.
+
+DOT_MULTI_TARGETS      = NO
+
+# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will
+# generate a legend page explaining the meaning of the various boxes and
+# arrows in the dot generated graphs.
+
+GENERATE_LEGEND        = YES
+
+# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will
+# remove the intermediate dot files that are used to generate
+# the various graphs.
+
+DOT_CLEANUP            = YES
diff --git a/nestegg/docs/Makefile.am b/nestegg/docs/Makefile.am

new file mode 100644 (file)

index 0000000..42cf8ee
--- /dev/null
+++ b/nestegg/docs/Makefile.am
@@ -0,0 +1,38 @@
+doc_DATA = doxygen-build.stamp
+
+EXTRA_DIST = Doxyfile.in
+
+if HAVE_DOXYGEN
+doxygen-build.stamp: Doxyfile
+       doxygen
+       touch doxygen-build.stamp
+else
+doxygen-build.stamp:
+       echo "*** Warning: Doxygen not found; documentation will not be built."
+       touch doxygen-build.stamp
+endif
+
+dist_docdir = $(distdir)/libnestegg
+
+dist-hook:
+       if test -d html; then \
+         mkdir $(dist_docdir); \
+         echo -n "copying built documenation..."; \
+         cp -rp html $(dist_docdir)/html; \
+         echo "OK"; \
+       fi
+
+
+install-data-local: doxygen-build.stamp
+       $(mkinstalldirs) $(DESTDIR)$(docdir)
+       if test -d html; then \
+         cp -rp html $(DESTDIR)$(docdir)/html; \
+       fi
+
+uninstall-local:
+       rm -rf $(DESTDIR)$(docdir)
+
+clean-local:
+       if test -d html; then rm -rf html; fi
+       if test -f doxygen-build.stamp; then rm -f doxygen-build.stamp; fi
+
diff --git a/nestegg/halloc/README b/nestegg/halloc/README

new file mode 100644 (file)

index 0000000..380fba2
--- /dev/null
+++ b/nestegg/halloc/README
@@ -0,0 +1,45 @@
+halloc 1.2.1
+============
+      
+       Hierarchical memory heap interface - an extension to standard
+       malloc/free interface that simplifies tasks of memory disposal 
+       when allocated structures exhibit hierarchical properties.
+
+       http://swapped.cc/halloc
+=
+       To build libhalloc.a with GNU tools run
+               make
+
+       To install in /usr/include and /usr/lib
+               make install
+
+       To cleanup the build files 
+               make clean
+=
+       halloc-1.2.1
+               * fixed a double-free bug in _set_allocator() as per
+                 Matthew Gregan comments
+
+               * switched to using NULL instead of 0 where applicable
+
+       halloc-1.2.0
+               * added missing <string.h> include to halloc.c
+               
+               * improved standard compliance thanks to the feedback
+                 received from Stan Tobias. Two things were fixed -
+                 
+               - hblock_t structure no longer uses zero-sized 'data'
+                 array, which happened to be common, but non-standard
+                 extension; 
+                 
+               - secondly, added the code to test the behaviour of 
+                 realloc(ptr, 0). Standard allows it NOT to act as
+                 free(), in which case halloc will use its own version
+                 of allocator calling free() when neccessary.
+
+       halloc-1.1.0
+               * initial public release (rewrite of hhmalloc library)
+
+=============================================================================
+Copyright (c) 2004-2010, Alex Pankratov (ap@swapped.cc). All rights reserved.
+
diff --git a/nestegg/halloc/halloc.h b/nestegg/halloc/halloc.h

new file mode 100644 (file)

index 0000000..10af4e8
--- /dev/null
+++ b/nestegg/halloc/halloc.h
@@ -0,0 +1,43 @@
+/*
+ *     Copyright (c) 2004-2010 Alex Pankratov. All rights reserved.
+ *
+ *     Hierarchical memory allocator, 1.2.1
+ *     http://swapped.cc/halloc
+ */
+
+/*
+ *     The program is distributed under terms of BSD license. 
+ *     You can obtain the copy of the license by visiting:
+ *     
+ *     http://www.opensource.org/licenses/bsd-license.php
+ */
+
+#ifndef _LIBP_HALLOC_H_
+#define _LIBP_HALLOC_H_
+
+#include <stddef.h>  /* size_t */
+
+/*
+ *     Core API
+ */
+void * halloc (void * block, size_t len);
+void   hattach(void * block, void * parent);
+
+/*
+ *     standard malloc/free api
+ */
+void * h_malloc (size_t len);
+void * h_calloc (size_t n, size_t len);
+void * h_realloc(void * p, size_t len);
+void   h_free   (void * p);
+char * h_strdup (const char * str);
+
+/*
+ *     the underlying allocator
+ */
+typedef void * (* realloc_t)(void * ptr, size_t len);
+
+extern realloc_t halloc_allocator;
+
+#endif
+
diff --git a/nestegg/halloc/src/align.h b/nestegg/halloc/src/align.h

new file mode 100644 (file)

index 0000000..4c6e183
--- /dev/null
+++ b/nestegg/halloc/src/align.h
@@ -0,0 +1,36 @@
+/*
+ *     Copyright (c) 2004-2010 Alex Pankratov. All rights reserved.
+ *
+ *     Hierarchical memory allocator, 1.2.1
+ *     http://swapped.cc/halloc
+ */
+
+/*
+ *     The program is distributed under terms of BSD license. 
+ *     You can obtain the copy of the license by visiting:
+ *     
+ *     http://www.opensource.org/licenses/bsd-license.php
+ */
+
+#ifndef _LIBP_ALIGN_H_
+#define _LIBP_ALIGN_H_
+
+/*
+ *     a type with the most strict alignment requirements
+ */
+union max_align
+{
+       char   c;
+       short  s;
+       long   l;
+       int    i;
+       float  f;
+       double d;
+       void * v;
+       void (*q)(void);
+};
+
+typedef union max_align max_align_t;
+
+#endif
+
diff --git a/nestegg/halloc/src/halloc.c b/nestegg/halloc/src/halloc.c

new file mode 100644 (file)

index 0000000..38fd6c1
--- /dev/null
+++ b/nestegg/halloc/src/halloc.c
@@ -0,0 +1,254 @@
+/*
+ *     Copyright (c) 2004i-2010 Alex Pankratov. All rights reserved.
+ *
+ *     Hierarchical memory allocator, 1.2.1
+ *     http://swapped.cc/halloc
+ */
+
+/*
+ *     The program is distributed under terms of BSD license. 
+ *     You can obtain the copy of the license by visiting:
+ *     
+ *     http://www.opensource.org/licenses/bsd-license.php
+ */
+
+#include <stdlib.h>  /* realloc */
+#include <string.h>  /* memset & co */
+
+#include "../halloc.h"
+#include "align.h"
+#include "hlist.h"
+
+/*
+ *     block control header
+ */
+typedef struct hblock
+{
+#ifndef NDEBUG
+#define HH_MAGIC    0x20040518L
+       long          magic;
+#endif
+       hlist_item_t  siblings; /* 2 pointers */
+       hlist_head_t  children; /* 1 pointer  */
+       max_align_t   data[1];  /* not allocated, see below */
+       
+} hblock_t;
+
+#define sizeof_hblock offsetof(hblock_t, data)
+
+/*
+ *
+ */
+realloc_t halloc_allocator = NULL;
+
+#define allocator halloc_allocator
+
+/*
+ *     static methods
+ */
+static void _set_allocator(void);
+static void * _realloc(void * ptr, size_t n);
+
+static int  _relate(hblock_t * b, hblock_t * p);
+static void _free_children(hblock_t * p);
+
+/*
+ *     Core API
+ */
+void * halloc(void * ptr, size_t len)
+{
+       hblock_t * p;
+
+       /* set up default allocator */
+       if (! allocator)
+       {
+               _set_allocator();
+               assert(allocator);
+       }
+
+       /* calloc */
+       if (! ptr)
+       {
+               if (! len)
+                       return NULL;
+
+               p = allocator(0, len + sizeof_hblock);
+               if (! p)
+                       return NULL;
+#ifndef NDEBUG
+               p->magic = HH_MAGIC;
+#endif
+               hlist_init(&p->children);
+               hlist_init_item(&p->siblings);
+
+               return p->data;
+       }
+
+       p = structof(ptr, hblock_t, data);
+       assert(p->magic == HH_MAGIC);
+
+       /* realloc */
+       if (len)
+       {
+               p = allocator(p, len + sizeof_hblock);
+               if (! p)
+                       return NULL;
+
+               hlist_relink(&p->siblings);
+               hlist_relink_head(&p->children);
+               
+               return p->data;
+       }
+
+       /* free */
+       _free_children(p);
+       hlist_del(&p->siblings);
+       allocator(p, 0);
+
+       return NULL;
+}
+
+void hattach(void * block, void * parent)
+{
+       hblock_t * b, * p;
+       
+       if (! block)
+       {
+               assert(! parent);
+               return;
+       }
+
+       /* detach */
+       b = structof(block, hblock_t, data);
+       assert(b->magic == HH_MAGIC);
+
+       hlist_del(&b->siblings);
+
+       if (! parent)
+               return;
+
+       /* attach */
+       p = structof(parent, hblock_t, data);
+       assert(p->magic == HH_MAGIC);
+       
+       /* sanity checks */
+       assert(b != p);          /* trivial */
+       assert(! _relate(p, b)); /* heavy ! */
+
+       hlist_add(&p->children, &b->siblings);
+}
+
+/*
+ *     malloc/free api
+ */
+void * h_malloc(size_t len)
+{
+       return halloc(0, len);
+}
+
+void * h_calloc(size_t n, size_t len)
+{
+       void * ptr = halloc(0, len*=n);
+       return ptr ? memset(ptr, 0, len) : NULL;
+}
+
+void * h_realloc(void * ptr, size_t len)
+{
+       return halloc(ptr, len);
+}
+
+void   h_free(void * ptr)
+{
+       halloc(ptr, 0);
+}
+
+char * h_strdup(const char * str)
+{
+       size_t len = strlen(str);
+       char * ptr = halloc(0, len + 1);
+       return ptr ? (ptr[len] = 0, memcpy(ptr, str, len)) : NULL;
+}
+
+/*
+ *     static stuff
+ */
+static void _set_allocator(void)
+{
+       void * p;
+       assert(! allocator);
+       
+       /*
+        *      the purpose of the test below is to check the behaviour
+        *      of realloc(ptr, 0), which is defined in the standard
+        *      as an implementation-specific. if it returns zero,
+        *      then it's equivalent to free(). it can however return
+        *      non-zero, in which case it cannot be used for freeing
+        *      memory blocks and we'll need to supply our own version
+        *
+        *      Thanks to Stan Tobias for pointing this tricky part out.
+        */
+       allocator = realloc;
+       if (! (p = malloc(1)))
+               /* hmm */
+               return;
+               
+       if ((p = realloc(p, 0)))
+       {
+               /* realloc cannot be used as free() */
+               allocator = _realloc;
+               free(p);
+       }
+}
+
+static void * _realloc(void * ptr, size_t n)
+{
+       /*
+        *      free'ing realloc()
+        */
+       if (n)
+               return realloc(ptr, n);
+       free(ptr);
+       return NULL;
+}
+
+static int _relate(hblock_t * b, hblock_t * p)
+{
+       hlist_item_t * i;
+
+       if (!b || !p)
+               return 0;
+
+       /* 
+        *  since there is no 'parent' pointer, which would've allowed
+        *  O(log(n)) upward traversal, the check must use O(n) downward 
+        *  iteration of the entire hierarchy; and this can be VERY SLOW
+        */
+       hlist_for_each(i, &p->children)
+       {
+               hblock_t * q = structof(i, hblock_t, siblings);
+               if (q == b || _relate(b, q))
+                       return 1;
+       }
+       return 0;
+}
+
+static void _free_children(hblock_t * p)
+{
+       hlist_item_t * i, * tmp;
+       
+#ifndef NDEBUG
+       /*
+        *      this catches loops in hierarchy with almost zero 
+        *      overhead (compared to _relate() running time)
+        */
+       assert(p && p->magic == HH_MAGIC);
+       p->magic = 0; 
+#endif
+       hlist_for_each_safe(i, tmp, &p->children)
+       {
+               hblock_t * q = structof(i, hblock_t, siblings);
+               _free_children(q);
+               allocator(q, 0);
+       }
+}
+
diff --git a/nestegg/halloc/src/hlist.h b/nestegg/halloc/src/hlist.h

new file mode 100644 (file)

index 0000000..2791f78
--- /dev/null
+++ b/nestegg/halloc/src/hlist.h
@@ -0,0 +1,136 @@
+/*
+ *     Copyright (c) 2004-2010 Alex Pankratov. All rights reserved.
+ *
+ *     Hierarchical memory allocator, 1.2.1
+ *     http://swapped.cc/halloc
+ */
+
+/*
+ *     The program is distributed under terms of BSD license. 
+ *     You can obtain the copy of the license by visiting:
+ *     
+ *     http://www.opensource.org/licenses/bsd-license.php
+ */
+
+#ifndef _LIBP_HLIST_H_
+#define _LIBP_HLIST_H_
+
+#include <assert.h>
+#include "macros.h"  /* static_inline */
+
+/*
+ *     weak double-linked list w/ tail sentinel
+ */
+typedef struct hlist_head  hlist_head_t;
+typedef struct hlist_item  hlist_item_t;
+
+/*
+ *
+ */
+struct hlist_head
+{
+       hlist_item_t * next;
+};
+
+struct hlist_item
+{
+       hlist_item_t * next;
+       hlist_item_t ** prev;
+};
+
+/*
+ *     shared tail sentinel
+ */
+struct hlist_item hlist_null;
+
+/*
+ *
+ */
+#define __hlist_init(h)      { &hlist_null }
+#define __hlist_init_item(i) { &hlist_null, &(i).next }
+
+static_inline void hlist_init(hlist_head_t * h);
+static_inline void hlist_init_item(hlist_item_t * i);
+
+/* static_inline void hlist_purge(hlist_head_t * h); */
+
+/* static_inline bool_t hlist_empty(const hlist_head_t * h); */
+
+/* static_inline hlist_item_t * hlist_head(const hlist_head_t * h); */
+
+/* static_inline hlist_item_t * hlist_next(const hlist_item_t * i); */
+/* static_inline hlist_item_t * hlist_prev(const hlist_item_t * i, 
+                                           const hlist_head_t * h); */
+
+static_inline void hlist_add(hlist_head_t * h, hlist_item_t * i);
+
+/* static_inline void hlist_add_prev(hlist_item_t * l, hlist_item_t * i); */
+/* static_inline void hlist_add_next(hlist_item_t * l, hlist_item_t * i); */
+
+static_inline void hlist_del(hlist_item_t * i);
+
+static_inline void hlist_relink(hlist_item_t * i);
+static_inline void hlist_relink_head(hlist_head_t * h);
+
+#define hlist_for_each(i, h) \
+       for (i = (h)->next; i != &hlist_null; i = i->next)
+
+#define hlist_for_each_safe(i, tmp, h) \
+       for (i = (h)->next, tmp = i->next; \
+            i!= &hlist_null; \
+            i = tmp, tmp = i->next)
+
+/*
+ *     static
+ */
+static_inline void hlist_init(hlist_head_t * h)
+{
+       assert(h);
+       h->next = &hlist_null;
+}
+
+static_inline void hlist_init_item(hlist_item_t * i)
+{
+       assert(i);
+       i->prev = &i->next;
+       i->next = &hlist_null;
+}
+
+static_inline void hlist_add(hlist_head_t * h, hlist_item_t * i)
+{
+       hlist_item_t * next;
+       assert(h && i);
+       
+       next = i->next = h->next;
+       next->prev = &i->next;
+       h->next = i;
+       i->prev = &h->next;
+}
+
+static_inline void hlist_del(hlist_item_t * i)
+{
+       hlist_item_t * next;
+       assert(i);
+
+       next = i->next;
+       next->prev = i->prev;
+       *i->prev = next;
+       
+       hlist_init_item(i);
+}
+
+static_inline void hlist_relink(hlist_item_t * i)
+{
+       assert(i);
+       *i->prev = i;
+       i->next->prev = &i->next;
+}
+
+static_inline void hlist_relink_head(hlist_head_t * h)
+{
+       assert(h);
+       h->next->prev = &h->next;
+}
+
+#endif
+
diff --git a/nestegg/halloc/src/macros.h b/nestegg/halloc/src/macros.h

new file mode 100644 (file)

index 0000000..c36b516
--- /dev/null
+++ b/nestegg/halloc/src/macros.h
@@ -0,0 +1,36 @@
+/*
+ *     Copyright (c) 2004-2010 Alex Pankratov. All rights reserved.
+ *
+ *     Hierarchical memory allocator, 1.2.1
+ *     http://swapped.cc/halloc
+ */
+
+/*
+ *     The program is distributed under terms of BSD license. 
+ *     You can obtain the copy of the license by visiting:
+ *     
+ *     http://www.opensource.org/licenses/bsd-license.php
+ */
+
+#ifndef _LIBP_MACROS_H_
+#define _LIBP_MACROS_H_
+
+#include <stddef.h>  /* offsetof */
+
+/*
+       restore pointer to the structure by a pointer to its field
+ */
+#define structof(p,t,f) ((t*)(- offsetof(t,f) + (char*)(p)))
+
+/*
+ *     redefine for the target compiler
+ */
+#ifdef _WIN32
+#define static_inline static __inline
+#else
+#define static_inline static __inline__
+#endif
+
+
+#endif
+
diff --git a/nestegg/include/nestegg/nestegg.h b/nestegg/include/nestegg/nestegg.h

new file mode 100644 (file)

index 0000000..7447d14
--- /dev/null
+++ b/nestegg/include/nestegg/nestegg.h
@@ -0,0 +1,292 @@
+/*
+ * Copyright © 2010 Mozilla Foundation
+ *
+ * This program is made available under an ISC-style license.  See the
+ * accompanying file LICENSE for details.
+ */
+#ifndef   NESTEGG_671cac2a_365d_ed69_d7a3_4491d3538d79
+#define   NESTEGG_671cac2a_365d_ed69_d7a3_4491d3538d79
+
+#include "vpx/vpx_integer.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** @mainpage
+
+    @section intro Introduction
+
+    This is the documentation fot the <tt>libnestegg</tt> C API.
+    <tt>libnestegg</tt> is a demultiplexing library for <a
+    href="http://www.matroska.org/">Matroska</a> and <a
+    href="http://www.webmproject.org/">WebMedia</a> media files.
+
+    @section example Example code
+
+    @code
+    nestegg * demux_ctx;
+    nestegg_init(&demux_ctx, io, NULL);
+
+    nestegg_packet * pkt;
+    while ((r = nestegg_read_packet(demux_ctx, &pkt)) > 0) {
+      unsigned int track;
+
+      nestegg_packet_track(pkt, &track);
+
+      // This example decodes the first track only.
+      if (track == 0) {
+        unsigned int chunk, chunks;
+
+        nestegg_packet_count(pkt, &chunks);
+
+        // Decode each chunk of data.
+        for (chunk = 0; chunk < chunks; ++chunk) {
+          unsigned char * data;
+          size_t data_size;
+
+          nestegg_packet_data(pkt, chunk, &data, &data_size);
+
+          example_codec_decode(codec_ctx, data, data_size);
+        }
+      }
+
+      nestegg_free_packet(pkt);
+    }
+
+    nestegg_destroy(demux_ctx);
+    @endcode
+*/
+
+
+/** @file
+    The <tt>libnestegg</tt> C API. */
+
+#define NESTEGG_TRACK_VIDEO 0 /**< Track is of type video. */
+#define NESTEGG_TRACK_AUDIO 1 /**< Track is of type audio. */
+
+#define NESTEGG_CODEC_VP8    0 /**< Track uses Google On2 VP8 codec. */
+#define NESTEGG_CODEC_VORBIS 1 /**< Track uses Xiph Vorbis codec. */
+
+#define NESTEGG_SEEK_SET 0 /**< Seek offset relative to beginning of stream. */
+#define NESTEGG_SEEK_CUR 1 /**< Seek offset relative to current position in stream. */
+#define NESTEGG_SEEK_END 2 /**< Seek offset relative to end of stream. */
+
+#define NESTEGG_LOG_DEBUG    1     /**< Debug level log message. */
+#define NESTEGG_LOG_INFO     10    /**< Informational level log message. */
+#define NESTEGG_LOG_WARNING  100   /**< Warning level log message. */
+#define NESTEGG_LOG_ERROR    1000  /**< Error level log message. */
+#define NESTEGG_LOG_CRITICAL 10000 /**< Critical level log message. */
+
+typedef struct nestegg nestegg;               /**< Opaque handle referencing the stream state. */
+typedef struct nestegg_packet nestegg_packet; /**< Opaque handle referencing a packet of data. */
+
+/** User supplied IO context. */
+typedef struct {
+  /** User supplied read callback.
+      @param buffer   Buffer to read data into.
+      @param length   Length of supplied buffer in bytes.
+      @param userdata The #userdata supplied by the user.
+      @retval  1 Read succeeded.
+      @retval  0 End of stream.
+      @retval -1 Error. */
+  int (* read)(void * buffer, size_t length, void * userdata);
+
+  /** User supplied seek callback.
+      @param offset   Offset within the stream to seek to.
+      @param whence   Seek direction.  One of #NESTEGG_SEEK_SET,
+                      #NESTEGG_SEEK_CUR, or #NESTEGG_SEEK_END.
+      @param userdata The #userdata supplied by the user.
+      @retval  0 Seek succeeded.
+      @retval -1 Error. */
+  int (* seek)(int64_t offset, int whence, void * userdata);
+
+  /** User supplied tell callback.
+      @param userdata The #userdata supplied by the user.
+      @returns Current position within the stream.
+      @retval -1 Error. */
+  int64_t (* tell)(void * userdata);
+
+  /** User supplied pointer to be passed to the IO callbacks. */
+  void * userdata;
+} nestegg_io;
+
+/** Parameters specific to a video track. */
+typedef struct {
+  unsigned int width;          /**< Width of the video frame in pixels. */
+  unsigned int height;         /**< Height of the video frame in pixels. */
+  unsigned int display_width;  /**< Display width of the video frame in pixels. */
+  unsigned int display_height; /**< Display height of the video frame in pixels. */
+  unsigned int crop_bottom;    /**< Pixels to crop from the bottom of the frame. */
+  unsigned int crop_top;       /**< Pixels to crop from the top of the frame. */
+  unsigned int crop_left;      /**< Pixels to crop from the left of the frame. */
+  unsigned int crop_right;     /**< Pixels to crop from the right of the frame. */
+} nestegg_video_params;
+
+/** Parameters specific to an audio track. */
+typedef struct {
+  double rate;           /**< Sampling rate in Hz. */
+  unsigned int channels; /**< Number of audio channels. */
+  unsigned int depth;    /**< Bits per sample. */
+} nestegg_audio_params;
+
+/** Logging callback function pointer. */
+typedef void (* nestegg_log)(nestegg * context, unsigned int severity, char const * format, ...);
+
+/** Initialize a nestegg context.  During initialization the parser will
+    read forward in the stream processing all elements until the first
+    block of media is reached.  All track metadata has been processed at this point.
+    @param context  Storage for the new nestegg context.  @see nestegg_destroy
+    @param io       User supplied IO context.
+    @param callback Optional logging callback function pointer.  May be NULL.
+    @retval  0 Success.
+    @retval -1 Error. */
+int nestegg_init(nestegg ** context, nestegg_io io, nestegg_log callback);
+
+/** Destroy a nestegg context and free associated memory.
+    @param context #nestegg context to be freed.  @see nestegg_init */
+void nestegg_destroy(nestegg * context);
+
+/** Query the duration of the media stream in nanoseconds.
+    @param context  Stream context initialized by #nestegg_init.
+    @param duration Storage for the queried duration.
+    @retval  0 Success.
+    @retval -1 Error. */
+int nestegg_duration(nestegg * context, uint64_t * duration);
+
+/** Query the tstamp scale of the media stream in nanoseconds.
+    Timecodes presented by nestegg have been scaled by this value
+    before presentation to the caller.
+    @param context Stream context initialized by #nestegg_init.
+    @param scale   Storage for the queried scale factor.
+    @retval  0 Success.
+    @retval -1 Error. */
+int nestegg_tstamp_scale(nestegg * context, uint64_t * scale);
+
+/** Query the number of tracks in the media stream.
+    @param context Stream context initialized by #nestegg_init.
+    @param tracks  Storage for the queried track count.
+    @retval  0 Success.
+    @retval -1 Error. */
+int nestegg_track_count(nestegg * context, unsigned int * tracks);
+
+/** Seek @a track to @a tstamp.  Stream seek will terminate at the earliest
+    key point in the stream at or before @a tstamp.  Other tracks in the
+    stream will output packets with unspecified but nearby timestamps.
+    @param context Stream context initialized by #nestegg_init.
+    @param track   Zero based track number.
+    @param tstamp  Absolute timestamp in nanoseconds.
+    @retval  0 Success.
+    @retval -1 Error. */
+int nestegg_track_seek(nestegg * context, unsigned int track, uint64_t tstamp);
+
+/** Query the type specified by @a track.
+    @param context Stream context initialized by #nestegg_init.
+    @param track   Zero based track number.
+    @retval #NESTEGG_TRACK_VIDEO Track type is video.
+    @retval #NESTEGG_TRACK_AUDIO Track type is audio.
+    @retval -1 Error. */
+int nestegg_track_type(nestegg * context, unsigned int track);
+
+/** Query the codec ID specified by @a track.
+    @param context Stream context initialized by #nestegg_init.
+    @param track   Zero based track number.
+    @retval #NESTEGG_CODEC_VP8    Track codec is VP8.
+    @retval #NESTEGG_CODEC_VORBIS Track codec is Vorbis.
+    @retval -1 Error. */
+int nestegg_track_codec_id(nestegg * context, unsigned int track);
+
+/** Query the number of codec initialization chunks for @a track.  Each
+    chunk of data should be passed to the codec initialization functions in
+    the order returned.
+    @param context Stream context initialized by #nestegg_init.
+    @param track   Zero based track number.
+    @param count   Storage for the queried chunk count.
+    @retval  0 Success.
+    @retval -1 Error. */
+int nestegg_track_codec_data_count(nestegg * context, unsigned int track,
+                                   unsigned int * count);
+
+/** Get a pointer to chunk number @a item of codec initialization data for
+    @a track.
+    @param context Stream context initialized by #nestegg_init.
+    @param track   Zero based track number.
+    @param item    Zero based chunk item number.
+    @param data    Storage for the queried data pointer.
+                   The data is owned by the #nestegg context.
+    @param length  Storage for the queried data size.
+    @retval  0 Success.
+    @retval -1 Error. */
+int nestegg_track_codec_data(nestegg * context, unsigned int track, unsigned int item,
+                             unsigned char ** data, size_t * length);
+
+/** Query the video parameters specified by @a track.
+    @param context Stream context initialized by #nestegg_init.
+    @param track   Zero based track number.
+    @param params  Storage for the queried video parameters.
+    @retval  0 Success.
+    @retval -1 Error. */
+int nestegg_track_video_params(nestegg * context, unsigned int track,
+                               nestegg_video_params * params);
+
+/** Query the audio parameters specified by @a track.
+    @param context Stream context initialized by #nestegg_init.
+    @param track   Zero based track number.
+    @param params  Storage for the queried audio parameters.
+    @retval  0 Success.
+    @retval -1 Error. */
+int nestegg_track_audio_params(nestegg * context, unsigned int track,
+                               nestegg_audio_params * params);
+
+/** Read a packet of media data.  A packet consists of one or more chunks of
+    data associated with a single track.  nestegg_read_packet should be
+    called in a loop while the return value is 1 to drive the stream parser
+    forward.  @see nestegg_free_packet
+    @param context Context returned by #nestegg_init.
+    @param packet  Storage for the returned nestegg_packet.
+    @retval  1 Additional packets may be read in subsequent calls.
+    @retval  0 End of stream.
+    @retval -1 Error. */
+int nestegg_read_packet(nestegg * context, nestegg_packet ** packet);
+
+/** Destroy a nestegg_packet and free associated memory.
+    @param packet #nestegg_packet to be freed. @see nestegg_read_packet */
+void nestegg_free_packet(nestegg_packet * packet);
+
+/** Query the track number of @a packet.
+    @param packet Packet initialized by #nestegg_read_packet.
+    @param track  Storage for the queried zero based track index.
+    @retval  0 Success.
+    @retval -1 Error. */
+int nestegg_packet_track(nestegg_packet * packet, unsigned int * track);
+
+/** Query the time stamp in nanoseconds of @a packet.
+    @param packet Packet initialized by #nestegg_read_packet.
+    @param tstamp Storage for the queried timestamp in nanoseconds.
+    @retval  0 Success.
+    @retval -1 Error. */
+int nestegg_packet_tstamp(nestegg_packet * packet, uint64_t * tstamp);
+
+/** Query the number of data chunks contained in @a packet.
+    @param packet Packet initialized by #nestegg_read_packet.
+    @param count  Storage for the queried timestamp in nanoseconds.
+    @retval  0 Success.
+    @retval -1 Error. */
+int nestegg_packet_count(nestegg_packet * packet, unsigned int * count);
+
+/** Get a pointer to chunk number @a item of packet data.
+    @param packet  Packet initialized by #nestegg_read_packet.
+    @param item    Zero based chunk item number.
+    @param data    Storage for the queried data pointer.
+                   The data is owned by the #nestegg_packet packet.
+    @param length  Storage for the queried data size.
+    @retval  0 Success.
+    @retval -1 Error. */
+int nestegg_packet_data(nestegg_packet * packet, unsigned int item,
+                        unsigned char ** data, size_t * length);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* NESTEGG_671cac2a_365d_ed69_d7a3_4491d3538d79 */
diff --git a/nestegg/m4/as-ac-expand.m4 b/nestegg/m4/as-ac-expand.m4

new file mode 100644 (file)

index 0000000..d6c9e33
--- /dev/null
+++ b/nestegg/m4/as-ac-expand.m4
@@ -0,0 +1,43 @@
+dnl as-ac-expand.m4 0.2.0
+dnl autostars m4 macro for expanding directories using configure's prefix
+dnl thomas@apestaart.org
+
+dnl AS_AC_EXPAND(VAR, CONFIGURE_VAR)
+dnl example
+dnl AS_AC_EXPAND(SYSCONFDIR, $sysconfdir)
+dnl will set SYSCONFDIR to /usr/local/etc if prefix=/usr/local
+
+AC_DEFUN([AS_AC_EXPAND],
+[
+  EXP_VAR=[$1]
+  FROM_VAR=[$2]
+
+  dnl first expand prefix and exec_prefix if necessary
+  prefix_save=$prefix
+  exec_prefix_save=$exec_prefix
+
+  dnl if no prefix given, then use /usr/local, the default prefix
+  if test "x$prefix" = "xNONE"; then
+    prefix="$ac_default_prefix"
+  fi
+  dnl if no exec_prefix given, then use prefix
+  if test "x$exec_prefix" = "xNONE"; then
+    exec_prefix=$prefix
+  fi
+
+  full_var="$FROM_VAR"
+  dnl loop until it doesn't change anymore
+  while true; do
+    new_full_var="`eval echo $full_var`"
+    if test "x$new_full_var" = "x$full_var"; then break; fi
+    full_var=$new_full_var
+  done
+
+  dnl clean up
+  full_var=$new_full_var
+  AC_SUBST([$1], "$full_var")
+
+  dnl restore prefix and exec_prefix
+  prefix=$prefix_save
+  exec_prefix=$exec_prefix_save
+])
diff --git a/nestegg/m4/ax_create_stdint_h.m4 b/nestegg/m4/ax_create_stdint_h.m4

new file mode 100644 (file)

index 0000000..228105b
--- /dev/null
+++ b/nestegg/m4/ax_create_stdint_h.m4
@@ -0,0 +1,695 @@
+dnl @synopsis AX_CREATE_STDINT_H [( HEADER-TO-GENERATE [, HEDERS-TO-CHECK])]
+dnl
+dnl the "ISO C9X: 7.18 Integer types <stdint.h>" section requires the
+dnl existence of an include file <stdint.h> that defines a set of
+dnl typedefs, especially uint8_t,int32_t,uintptr_t. Many older
+dnl installations will not provide this file, but some will have the
+dnl very same definitions in <inttypes.h>. In other enviroments we can
+dnl use the inet-types in <sys/types.h> which would define the typedefs
+dnl int8_t and u_int8_t respectivly.
+dnl
+dnl This macros will create a local "_stdint.h" or the headerfile given
+dnl as an argument. In many cases that file will just "#include
+dnl <stdint.h>" or "#include <inttypes.h>", while in other environments
+dnl it will provide the set of basic 'stdint's definitions/typedefs:
+dnl
+dnl   int8_t,uint8_t,int16_t,uint16_t,int32_t,uint32_t,intptr_t,uintptr_t
+dnl   int_least32_t.. int_fast32_t.. intmax_t
+dnl
+dnl which may or may not rely on the definitions of other files, or
+dnl using the AC_CHECK_SIZEOF macro to determine the actual sizeof each
+dnl type.
+dnl
+dnl if your header files require the stdint-types you will want to
+dnl create an installable file mylib-int.h that all your other
+dnl installable header may include. So if you have a library package
+dnl named "mylib", just use
+dnl
+dnl      AX_CREATE_STDINT_H(mylib-int.h)
+dnl
+dnl in configure.ac and go to install that very header file in
+dnl Makefile.am along with the other headers (mylib.h) - and the
+dnl mylib-specific headers can simply use "#include <mylib-int.h>" to
+dnl obtain the stdint-types.
+dnl
+dnl Remember, if the system already had a valid <stdint.h>, the
+dnl generated file will include it directly. No need for fuzzy
+dnl HAVE_STDINT_H things... (oops, GCC 4.2.x has deliberatly disabled
+dnl its stdint.h for non-c99 compilation and the c99-mode is not the
+dnl default. Therefore this macro will not use the compiler's stdint.h
+dnl - please complain to the GCC developers).
+dnl
+dnl @category C
+dnl @author Guido U. Draheim <guidod@gmx.de>
+dnl @version 2006-10-13
+dnl @license GPLWithACException
+
+AC_DEFUN([AX_CHECK_DATA_MODEL],[
+   AC_CHECK_SIZEOF(char)
+   AC_CHECK_SIZEOF(short)
+   AC_CHECK_SIZEOF(int)
+   AC_CHECK_SIZEOF(long)
+   AC_CHECK_SIZEOF(void*)
+   ac_cv_char_data_model=""
+   ac_cv_char_data_model="$ac_cv_char_data_model$ac_cv_sizeof_char"
+   ac_cv_char_data_model="$ac_cv_char_data_model$ac_cv_sizeof_short"
+   ac_cv_char_data_model="$ac_cv_char_data_model$ac_cv_sizeof_int"
+   ac_cv_long_data_model=""
+   ac_cv_long_data_model="$ac_cv_long_data_model$ac_cv_sizeof_int"
+   ac_cv_long_data_model="$ac_cv_long_data_model$ac_cv_sizeof_long"
+   ac_cv_long_data_model="$ac_cv_long_data_model$ac_cv_sizeof_voidp"
+   AC_MSG_CHECKING([data model])
+   case "$ac_cv_char_data_model/$ac_cv_long_data_model" in
+    122/242)     ac_cv_data_model="IP16"  ; n="standard 16bit machine" ;;
+    122/244)     ac_cv_data_model="LP32"  ; n="standard 32bit machine" ;;
+    122/*)       ac_cv_data_model="i16"   ; n="unusual int16 model" ;;
+    124/444)     ac_cv_data_model="ILP32" ; n="standard 32bit unixish" ;;
+    124/488)     ac_cv_data_model="LP64"  ; n="standard 64bit unixish" ;;
+    124/448)     ac_cv_data_model="LLP64" ; n="unusual 64bit unixish" ;;
+    124/*)       ac_cv_data_model="i32"   ; n="unusual int32 model" ;;
+    128/888)     ac_cv_data_model="ILP64" ; n="unusual 64bit numeric" ;;
+    128/*)       ac_cv_data_model="i64"   ; n="unusual int64 model" ;;
+    222/*2)      ac_cv_data_model="DSP16" ; n="strict 16bit dsptype" ;;
+    333/*3)      ac_cv_data_model="DSP24" ; n="strict 24bit dsptype" ;;
+    444/*4)      ac_cv_data_model="DSP32" ; n="strict 32bit dsptype" ;;
+    666/*6)      ac_cv_data_model="DSP48" ; n="strict 48bit dsptype" ;;
+    888/*8)      ac_cv_data_model="DSP64" ; n="strict 64bit dsptype" ;;
+    222/*|333/*|444/*|666/*|888/*) :
+                 ac_cv_data_model="iDSP"  ; n="unusual dsptype" ;;
+     *)          ac_cv_data_model="none"  ; n="very unusual model" ;;
+   esac
+   AC_MSG_RESULT([$ac_cv_data_model ($ac_cv_long_data_model, $n)])
+])
+
+dnl AX_CHECK_HEADER_STDINT_X([HEADERLIST][,ACTION-IF])
+AC_DEFUN([AX_CHECK_HEADER_STDINT_X],[
+AC_CACHE_CHECK([for stdint uintptr_t], [ac_cv_header_stdint_x],[
+ ac_cv_header_stdint_x="" # the 1997 typedefs (inttypes.h)
+  AC_MSG_RESULT([(..)])
+  for i in m4_ifval([$1],[$1],[stdint.h inttypes.h sys/inttypes.h sys/types.h])
+  do
+   unset ac_cv_type_uintptr_t
+   unset ac_cv_type_uint64_t
+   AC_CHECK_TYPE(uintptr_t,[ac_cv_header_stdint_x=$i],continue,[#include <$i>])
+   AC_CHECK_TYPE(uint64_t,[and64="/uint64_t"],[and64=""],[#include<$i>])
+   m4_ifvaln([$1],[$1]) break
+  done
+  AC_MSG_CHECKING([for stdint uintptr_t])
+ ])
+])
+
+AC_DEFUN([AX_CHECK_HEADER_STDINT_O],[
+AC_CACHE_CHECK([for stdint uint32_t], [ac_cv_header_stdint_o],[
+ ac_cv_header_stdint_o="" # the 1995 typedefs (sys/inttypes.h)
+  AC_MSG_RESULT([(..)])
+  for i in m4_ifval([$1],[$1],[inttypes.h sys/inttypes.h sys/types.h stdint.h])
+  do
+   unset ac_cv_type_uint32_t
+   unset ac_cv_type_uint64_t
+   AC_CHECK_TYPE(uint32_t,[ac_cv_header_stdint_o=$i],continue,[#include <$i>])
+   AC_CHECK_TYPE(uint64_t,[and64="/uint64_t"],[and64=""],[#include<$i>])
+   m4_ifvaln([$1],[$1]) break
+   break;
+  done
+  AC_MSG_CHECKING([for stdint uint32_t])
+ ])
+])
+
+AC_DEFUN([AX_CHECK_HEADER_STDINT_U],[
+AC_CACHE_CHECK([for stdint u_int32_t], [ac_cv_header_stdint_u],[
+ ac_cv_header_stdint_u="" # the BSD typedefs (sys/types.h)
+  AC_MSG_RESULT([(..)])
+  for i in m4_ifval([$1],[$1],[sys/types.h inttypes.h sys/inttypes.h]) ; do
+   unset ac_cv_type_u_int32_t
+   unset ac_cv_type_u_int64_t
+   AC_CHECK_TYPE(u_int32_t,[ac_cv_header_stdint_u=$i],continue,[#include <$i>])
+   AC_CHECK_TYPE(u_int64_t,[and64="/u_int64_t"],[and64=""],[#include<$i>])
+   m4_ifvaln([$1],[$1]) break
+   break;
+  done
+  AC_MSG_CHECKING([for stdint u_int32_t])
+ ])
+])
+
+AC_DEFUN([AX_CREATE_STDINT_H],
+[# ------ AX CREATE STDINT H -------------------------------------
+AC_MSG_CHECKING([for stdint types])
+ac_stdint_h=`echo ifelse($1, , _stdint.h, $1)`
+# try to shortcircuit - if the default include path of the compiler
+# can find a "stdint.h" header then we assume that all compilers can.
+AC_CACHE_VAL([ac_cv_header_stdint_t],[
+old_CXXFLAGS="$CXXFLAGS" ; CXXFLAGS=""
+old_CPPFLAGS="$CPPFLAGS" ; CPPFLAGS=""
+old_CFLAGS="$CFLAGS"     ; CFLAGS=""
+AC_TRY_COMPILE([#include <stdint.h>],[int_least32_t v = 0;],
+[ac_cv_stdint_result="(assuming C99 compatible system)"
+ ac_cv_header_stdint_t="stdint.h"; ],
+[ac_cv_header_stdint_t=""])
+if test "$GCC" = "yes" && test ".$ac_cv_header_stdint_t" = "."; then
+CFLAGS="-std=c99"
+AC_TRY_COMPILE([#include <stdint.h>],[int_least32_t v = 0;],
+[AC_MSG_WARN(your GCC compiler has a defunct stdint.h for its default-mode)])
+fi
+CXXFLAGS="$old_CXXFLAGS"
+CPPFLAGS="$old_CPPFLAGS"
+CFLAGS="$old_CFLAGS" ])
+
+v="... $ac_cv_header_stdint_h"
+if test "$ac_stdint_h" = "stdint.h" ; then
+ AC_MSG_RESULT([(are you sure you want them in ./stdint.h?)])
+elif test "$ac_stdint_h" = "inttypes.h" ; then
+ AC_MSG_RESULT([(are you sure you want them in ./inttypes.h?)])
+elif test "_$ac_cv_header_stdint_t" = "_" ; then
+ AC_MSG_RESULT([(putting them into $ac_stdint_h)$v])
+else
+ ac_cv_header_stdint="$ac_cv_header_stdint_t"
+ AC_MSG_RESULT([$ac_cv_header_stdint (shortcircuit)])
+fi
+
+if test "_$ac_cv_header_stdint_t" = "_" ; then # can not shortcircuit..
+
+dnl .....intro message done, now do a few system checks.....
+dnl btw, all old CHECK_TYPE macros do automatically "DEFINE" a type,
+dnl therefore we use the autoconf implementation detail CHECK_TYPE_NEW
+dnl instead that is triggered with 3 or more arguments (see types.m4)
+
+inttype_headers=`echo $2 | sed -e 's/,/ /g'`
+
+ac_cv_stdint_result="(no helpful system typedefs seen)"
+AX_CHECK_HEADER_STDINT_X(dnl
+   stdint.h inttypes.h sys/inttypes.h $inttype_headers,
+   ac_cv_stdint_result="(seen uintptr_t$and64 in $i)")
+
+if test "_$ac_cv_header_stdint_x" = "_" ; then
+AX_CHECK_HEADER_STDINT_O(dnl,
+   inttypes.h sys/inttypes.h stdint.h $inttype_headers,
+   ac_cv_stdint_result="(seen uint32_t$and64 in $i)")
+fi
+
+if test "_$ac_cv_header_stdint_x" = "_" ; then
+if test "_$ac_cv_header_stdint_o" = "_" ; then
+AX_CHECK_HEADER_STDINT_U(dnl,
+   sys/types.h inttypes.h sys/inttypes.h $inttype_headers,
+   ac_cv_stdint_result="(seen u_int32_t$and64 in $i)")
+fi fi
+
+dnl if there was no good C99 header file, do some typedef checks...
+if test "_$ac_cv_header_stdint_x" = "_" ; then
+   AC_MSG_CHECKING([for stdint datatype model])
+   AC_MSG_RESULT([(..)])
+   AX_CHECK_DATA_MODEL
+fi
+
+if test "_$ac_cv_header_stdint_x" != "_" ; then
+   ac_cv_header_stdint="$ac_cv_header_stdint_x"
+elif  test "_$ac_cv_header_stdint_o" != "_" ; then
+   ac_cv_header_stdint="$ac_cv_header_stdint_o"
+elif  test "_$ac_cv_header_stdint_u" != "_" ; then
+   ac_cv_header_stdint="$ac_cv_header_stdint_u"
+else
+   ac_cv_header_stdint="stddef.h"
+fi
+
+AC_MSG_CHECKING([for extra inttypes in chosen header])
+AC_MSG_RESULT([($ac_cv_header_stdint)])
+dnl see if int_least and int_fast types are present in _this_ header.
+unset ac_cv_type_int_least32_t
+unset ac_cv_type_int_fast32_t
+AC_CHECK_TYPE(int_least32_t,,,[#include <$ac_cv_header_stdint>])
+AC_CHECK_TYPE(int_fast32_t,,,[#include<$ac_cv_header_stdint>])
+AC_CHECK_TYPE(intmax_t,,,[#include <$ac_cv_header_stdint>])
+
+fi # shortcircut to system "stdint.h"
+# ------------------ PREPARE VARIABLES ------------------------------
+if test "$GCC" = "yes" ; then
+ac_cv_stdint_message="using gnu compiler "`$CC --version | head -1`
+else
+ac_cv_stdint_message="using $CC"
+fi
+
+AC_MSG_RESULT([make use of $ac_cv_header_stdint in $ac_stdint_h dnl
+$ac_cv_stdint_result])
+
+dnl -----------------------------------------------------------------
+# ----------------- DONE inttypes.h checks START header -------------
+AC_CONFIG_COMMANDS([$ac_stdint_h],[
+AC_MSG_NOTICE(creating $ac_stdint_h : $_ac_stdint_h)
+ac_stdint=$tmp/_stdint.h
+
+echo "#ifndef" $_ac_stdint_h >$ac_stdint
+echo "#define" $_ac_stdint_h "1" >>$ac_stdint
+echo "#ifndef" _GENERATED_STDINT_H >>$ac_stdint
+echo "#define" _GENERATED_STDINT_H '"'$PACKAGE $VERSION'"' >>$ac_stdint
+echo "/* generated $ac_cv_stdint_message */" >>$ac_stdint
+if test "_$ac_cv_header_stdint_t" != "_" ; then
+echo "#define _STDINT_HAVE_STDINT_H" "1" >>$ac_stdint
+echo "#include <stdint.h>" >>$ac_stdint
+echo "#endif" >>$ac_stdint
+echo "#endif" >>$ac_stdint
+else
+
+cat >>$ac_stdint <<STDINT_EOF
+
+/* ................... shortcircuit part ........................... */
+
+#if defined HAVE_STDINT_H || defined _STDINT_HAVE_STDINT_H
+#include <stdint.h>
+#else
+#include <stddef.h>
+
+/* .................... configured part ............................ */
+
+STDINT_EOF
+
+echo "/* whether we have a C99 compatible stdint header file */" >>$ac_stdint
+if test "_$ac_cv_header_stdint_x" != "_" ; then
+  ac_header="$ac_cv_header_stdint_x"
+  echo "#define _STDINT_HEADER_INTPTR" '"'"$ac_header"'"' >>$ac_stdint
+else
+  echo "/* #undef _STDINT_HEADER_INTPTR */" >>$ac_stdint
+fi
+
+echo "/* whether we have a C96 compatible inttypes header file */" >>$ac_stdint
+if  test "_$ac_cv_header_stdint_o" != "_" ; then
+  ac_header="$ac_cv_header_stdint_o"
+  echo "#define _STDINT_HEADER_UINT32" '"'"$ac_header"'"' >>$ac_stdint
+else
+  echo "/* #undef _STDINT_HEADER_UINT32 */" >>$ac_stdint
+fi
+
+echo "/* whether we have a BSD compatible inet types header */" >>$ac_stdint
+if  test "_$ac_cv_header_stdint_u" != "_" ; then
+  ac_header="$ac_cv_header_stdint_u"
+  echo "#define _STDINT_HEADER_U_INT32" '"'"$ac_header"'"' >>$ac_stdint
+else
+  echo "/* #undef _STDINT_HEADER_U_INT32 */" >>$ac_stdint
+fi
+
+echo "" >>$ac_stdint
+
+if test "_$ac_header" != "_" ; then if test "$ac_header" != "stddef.h" ; then
+  echo "#include <$ac_header>" >>$ac_stdint
+  echo "" >>$ac_stdint
+fi fi
+
+echo "/* which 64bit typedef has been found */" >>$ac_stdint
+if test "$ac_cv_type_uint64_t" = "yes" ; then
+echo "#define   _STDINT_HAVE_UINT64_T" "1"  >>$ac_stdint
+else
+echo "/* #undef _STDINT_HAVE_UINT64_T */" >>$ac_stdint
+fi
+if test "$ac_cv_type_u_int64_t" = "yes" ; then
+echo "#define   _STDINT_HAVE_U_INT64_T" "1"  >>$ac_stdint
+else
+echo "/* #undef _STDINT_HAVE_U_INT64_T */" >>$ac_stdint
+fi
+echo "" >>$ac_stdint
+
+echo "/* which type model has been detected */" >>$ac_stdint
+if test "_$ac_cv_char_data_model" != "_" ; then
+echo "#define   _STDINT_CHAR_MODEL" "$ac_cv_char_data_model" >>$ac_stdint
+echo "#define   _STDINT_LONG_MODEL" "$ac_cv_long_data_model" >>$ac_stdint
+else
+echo "/* #undef _STDINT_CHAR_MODEL // skipped */" >>$ac_stdint
+echo "/* #undef _STDINT_LONG_MODEL // skipped */" >>$ac_stdint
+fi
+echo "" >>$ac_stdint
+
+echo "/* whether int_least types were detected */" >>$ac_stdint
+if test "$ac_cv_type_int_least32_t" = "yes"; then
+echo "#define   _STDINT_HAVE_INT_LEAST32_T" "1"  >>$ac_stdint
+else
+echo "/* #undef _STDINT_HAVE_INT_LEAST32_T */" >>$ac_stdint
+fi
+echo "/* whether int_fast types were detected */" >>$ac_stdint
+if test "$ac_cv_type_int_fast32_t" = "yes"; then
+echo "#define   _STDINT_HAVE_INT_FAST32_T" "1" >>$ac_stdint
+else
+echo "/* #undef _STDINT_HAVE_INT_FAST32_T */" >>$ac_stdint
+fi
+echo "/* whether intmax_t type was detected */" >>$ac_stdint
+if test "$ac_cv_type_intmax_t" = "yes"; then
+echo "#define   _STDINT_HAVE_INTMAX_T" "1" >>$ac_stdint
+else
+echo "/* #undef _STDINT_HAVE_INTMAX_T */" >>$ac_stdint
+fi
+echo "" >>$ac_stdint
+
+  cat >>$ac_stdint <<STDINT_EOF
+/* .................... detections part ............................ */
+
+/* whether we need to define bitspecific types from compiler base types */
+#ifndef _STDINT_HEADER_INTPTR
+#ifndef _STDINT_HEADER_UINT32
+#ifndef _STDINT_HEADER_U_INT32
+#define _STDINT_NEED_INT_MODEL_T
+#else
+#define _STDINT_HAVE_U_INT_TYPES
+#endif
+#endif
+#endif
+
+#ifdef _STDINT_HAVE_U_INT_TYPES
+#undef _STDINT_NEED_INT_MODEL_T
+#endif
+
+#ifdef  _STDINT_CHAR_MODEL
+#if     _STDINT_CHAR_MODEL+0 == 122 || _STDINT_CHAR_MODEL+0 == 124
+#ifndef _STDINT_BYTE_MODEL
+#define _STDINT_BYTE_MODEL 12
+#endif
+#endif
+#endif
+
+#ifndef _STDINT_HAVE_INT_LEAST32_T
+#define _STDINT_NEED_INT_LEAST_T
+#endif
+
+#ifndef _STDINT_HAVE_INT_FAST32_T
+#define _STDINT_NEED_INT_FAST_T
+#endif
+
+#ifndef _STDINT_HEADER_INTPTR
+#define _STDINT_NEED_INTPTR_T
+#ifndef _STDINT_HAVE_INTMAX_T
+#define _STDINT_NEED_INTMAX_T
+#endif
+#endif
+
+
+/* .................... definition part ............................ */
+
+/* some system headers have good uint64_t */
+#ifndef _HAVE_UINT64_T
+#if     defined _STDINT_HAVE_UINT64_T  || defined HAVE_UINT64_T
+#define _HAVE_UINT64_T
+#elif   defined _STDINT_HAVE_U_INT64_T || defined HAVE_U_INT64_T
+#define _HAVE_UINT64_T
+typedef u_int64_t uint64_t;
+#endif
+#endif
+
+#ifndef _HAVE_UINT64_T
+/* .. here are some common heuristics using compiler runtime specifics */
+#if defined __STDC_VERSION__ && defined __STDC_VERSION__ >= 199901L
+#define _HAVE_UINT64_T
+#define _HAVE_LONGLONG_UINT64_T
+typedef long long int64_t;
+typedef unsigned long long uint64_t;
+
+#elif !defined __STRICT_ANSI__
+#if defined _MSC_VER || defined __WATCOMC__ || defined __BORLANDC__
+#define _HAVE_UINT64_T
+typedef __int64 int64_t;
+typedef unsigned __int64 uint64_t;
+
+#elif defined __GNUC__ || defined __MWERKS__ || defined __ELF__
+/* note: all ELF-systems seem to have loff-support which needs 64-bit */
+#if !defined _NO_LONGLONG
+#define _HAVE_UINT64_T
+#define _HAVE_LONGLONG_UINT64_T
+typedef long long int64_t;
+typedef unsigned long long uint64_t;
+#endif
+
+#elif defined __alpha || (defined __mips && defined _ABIN32)
+#if !defined _NO_LONGLONG
+typedef long int64_t;
+typedef unsigned long uint64_t;
+#endif
+  /* compiler/cpu type to define int64_t */
+#endif
+#endif
+#endif
+
+#if defined _STDINT_HAVE_U_INT_TYPES
+/* int8_t int16_t int32_t defined by inet code, redeclare the u_intXX types */
+typedef u_int8_t uint8_t;
+typedef u_int16_t uint16_t;
+typedef u_int32_t uint32_t;
+
+/* glibc compatibility */
+#ifndef __int8_t_defined
+#define __int8_t_defined
+#endif
+#endif
+
+#ifdef _STDINT_NEED_INT_MODEL_T
+/* we must guess all the basic types. Apart from byte-adressable system, */
+/* there a few 32-bit-only dsp-systems that we guard with BYTE_MODEL 8-} */
+/* (btw, those nibble-addressable systems are way off, or so we assume) */
+
+dnl   /* have a look at "64bit and data size neutrality" at */
+dnl   /* http://unix.org/version2/whatsnew/login_64bit.html */
+dnl   /* (the shorthand "ILP" types always have a "P" part) */
+
+#if defined _STDINT_BYTE_MODEL
+#if _STDINT_LONG_MODEL+0 == 242
+/* 2:4:2 =  IP16 = a normal 16-bit system                */
+typedef unsigned char   uint8_t;
+typedef unsigned short  uint16_t;
+typedef unsigned long   uint32_t;
+#ifndef __int8_t_defined
+#define __int8_t_defined
+typedef          char    int8_t;
+typedef          short   int16_t;
+typedef          long    int32_t;
+#endif
+#elif _STDINT_LONG_MODEL+0 == 244 || _STDINT_LONG_MODEL == 444
+/* 2:4:4 =  LP32 = a 32-bit system derived from a 16-bit */
+/* 4:4:4 = ILP32 = a normal 32-bit system                */
+typedef unsigned char   uint8_t;
+typedef unsigned short  uint16_t;
+typedef unsigned int    uint32_t;
+#ifndef __int8_t_defined
+#define __int8_t_defined
+typedef          char    int8_t;
+typedef          short   int16_t;
+typedef          int     int32_t;
+#endif
+#elif _STDINT_LONG_MODEL+0 == 484 || _STDINT_LONG_MODEL+0 == 488
+/* 4:8:4 =  IP32 = a 32-bit system prepared for 64-bit    */
+/* 4:8:8 =  LP64 = a normal 64-bit system                 */
+typedef unsigned char   uint8_t;
+typedef unsigned short  uint16_t;
+typedef unsigned int    uint32_t;
+#ifndef __int8_t_defined
+#define __int8_t_defined
+typedef          char    int8_t;
+typedef          short   int16_t;
+typedef          int     int32_t;
+#endif
+/* this system has a "long" of 64bit */
+#ifndef _HAVE_UINT64_T
+#define _HAVE_UINT64_T
+typedef unsigned long   uint64_t;
+typedef          long    int64_t;
+#endif
+#elif _STDINT_LONG_MODEL+0 == 448
+/*      LLP64   a 64-bit system derived from a 32-bit system */
+typedef unsigned char   uint8_t;
+typedef unsigned short  uint16_t;
+typedef unsigned int    uint32_t;
+#ifndef __int8_t_defined
+#define __int8_t_defined
+typedef          char    int8_t;
+typedef          short   int16_t;
+typedef          int     int32_t;
+#endif
+/* assuming the system has a "long long" */
+#ifndef _HAVE_UINT64_T
+#define _HAVE_UINT64_T
+#define _HAVE_LONGLONG_UINT64_T
+typedef unsigned long long uint64_t;
+typedef          long long  int64_t;
+#endif
+#else
+#define _STDINT_NO_INT32_T
+#endif
+#else
+#define _STDINT_NO_INT8_T
+#define _STDINT_NO_INT32_T
+#endif
+#endif
+
+/*
+ * quote from SunOS-5.8 sys/inttypes.h:
+ * Use at your own risk.  As of February 1996, the committee is squarely
+ * behind the fixed sized types; the "least" and "fast" types are still being
+ * discussed.  The probability that the "fast" types may be removed before
+ * the standard is finalized is high enough that they are not currently
+ * implemented.
+ */
+
+#if defined _STDINT_NEED_INT_LEAST_T
+typedef  int8_t    int_least8_t;
+typedef  int16_t   int_least16_t;
+typedef  int32_t   int_least32_t;
+#ifdef _HAVE_UINT64_T
+typedef  int64_t   int_least64_t;
+#endif
+
+typedef uint8_t   uint_least8_t;
+typedef uint16_t  uint_least16_t;
+typedef uint32_t  uint_least32_t;
+#ifdef _HAVE_UINT64_T
+typedef uint64_t  uint_least64_t;
+#endif
+  /* least types */
+#endif
+
+#if defined _STDINT_NEED_INT_FAST_T
+typedef  int8_t    int_fast8_t;
+typedef  int       int_fast16_t;
+typedef  int32_t   int_fast32_t;
+#ifdef _HAVE_UINT64_T
+typedef  int64_t   int_fast64_t;
+#endif
+
+typedef uint8_t   uint_fast8_t;
+typedef unsigned  uint_fast16_t;
+typedef uint32_t  uint_fast32_t;
+#ifdef _HAVE_UINT64_T
+typedef uint64_t  uint_fast64_t;
+#endif
+  /* fast types */
+#endif
+
+#ifdef _STDINT_NEED_INTMAX_T
+#ifdef _HAVE_UINT64_T
+typedef  int64_t       intmax_t;
+typedef uint64_t      uintmax_t;
+#else
+typedef          long  intmax_t;
+typedef unsigned long uintmax_t;
+#endif
+#endif
+
+#ifdef _STDINT_NEED_INTPTR_T
+#ifndef __intptr_t_defined
+#define __intptr_t_defined
+/* we encourage using "long" to store pointer values, never use "int" ! */
+#if   _STDINT_LONG_MODEL+0 == 242 || _STDINT_LONG_MODEL+0 == 484
+typedef  unsigned int   uintptr_t;
+typedef           int    intptr_t;
+#elif _STDINT_LONG_MODEL+0 == 244 || _STDINT_LONG_MODEL+0 == 444
+typedef  unsigned long  uintptr_t;
+typedef           long   intptr_t;
+#elif _STDINT_LONG_MODEL+0 == 448 && defined _HAVE_UINT64_T
+typedef        uint64_t uintptr_t;
+typedef         int64_t  intptr_t;
+#else /* matches typical system types ILP32 and LP64 - but not IP16 or LLP64 */
+typedef  unsigned long  uintptr_t;
+typedef           long   intptr_t;
+#endif
+#endif
+#endif
+
+/* The ISO C99 standard specifies that in C++ implementations these
+   should only be defined if explicitly requested.  */
+#if !defined __cplusplus || defined __STDC_CONSTANT_MACROS
+#ifndef UINT32_C
+
+/* Signed.  */
+# define INT8_C(c)      c
+# define INT16_C(c)     c
+# define INT32_C(c)     c
+# ifdef _HAVE_LONGLONG_UINT64_T
+#  define INT64_C(c)    c ## L
+# else
+#  define INT64_C(c)    c ## LL
+# endif
+
+/* Unsigned.  */
+# define UINT8_C(c)     c ## U
+# define UINT16_C(c)    c ## U
+# define UINT32_C(c)    c ## U
+# ifdef _HAVE_LONGLONG_UINT64_T
+#  define UINT64_C(c)   c ## UL
+# else
+#  define UINT64_C(c)   c ## ULL
+# endif
+
+/* Maximal type.  */
+# ifdef _HAVE_LONGLONG_UINT64_T
+#  define INTMAX_C(c)   c ## L
+#  define UINTMAX_C(c)  c ## UL
+# else
+#  define INTMAX_C(c)   c ## LL
+#  define UINTMAX_C(c)  c ## ULL
+# endif
+
+  /* literalnumbers */
+#endif
+#endif
+
+/* These limits are merily those of a two complement byte-oriented system */
+
+/* Minimum of signed integral types.  */
+# define INT8_MIN               (-128)
+# define INT16_MIN              (-32767-1)
+# define INT32_MIN              (-2147483647-1)
+# define INT64_MIN              (-__INT64_C(9223372036854775807)-1)
+/* Maximum of signed integral types.  */
+# define INT8_MAX               (127)
+# define INT16_MAX              (32767)
+# define INT32_MAX              (2147483647)
+# define INT64_MAX              (__INT64_C(9223372036854775807))
+
+/* Maximum of unsigned integral types.  */
+# define UINT8_MAX              (255)
+# define UINT16_MAX             (65535)
+# define UINT32_MAX             (4294967295U)
+# define UINT64_MAX             (__UINT64_C(18446744073709551615))
+
+/* Minimum of signed integral types having a minimum size.  */
+# define INT_LEAST8_MIN         INT8_MIN
+# define INT_LEAST16_MIN        INT16_MIN
+# define INT_LEAST32_MIN        INT32_MIN
+# define INT_LEAST64_MIN        INT64_MIN
+/* Maximum of signed integral types having a minimum size.  */
+# define INT_LEAST8_MAX         INT8_MAX
+# define INT_LEAST16_MAX        INT16_MAX
+# define INT_LEAST32_MAX        INT32_MAX
+# define INT_LEAST64_MAX        INT64_MAX
+
+/* Maximum of unsigned integral types having a minimum size.  */
+# define UINT_LEAST8_MAX        UINT8_MAX
+# define UINT_LEAST16_MAX       UINT16_MAX
+# define UINT_LEAST32_MAX       UINT32_MAX
+# define UINT_LEAST64_MAX       UINT64_MAX
+
+  /* shortcircuit*/
+#endif
+  /* once */
+#endif
+#endif
+STDINT_EOF
+fi
+    if cmp -s $ac_stdint_h $ac_stdint 2>/dev/null; then
+      AC_MSG_NOTICE([$ac_stdint_h is unchanged])
+    else
+      ac_dir=`AS_DIRNAME(["$ac_stdint_h"])`
+      AS_MKDIR_P(["$ac_dir"])
+      rm -f $ac_stdint_h
+      mv $ac_stdint $ac_stdint_h
+    fi
+],[# variables for create stdint.h replacement
+PACKAGE="$PACKAGE"
+VERSION="$VERSION"
+ac_stdint_h="$ac_stdint_h"
+_ac_stdint_h=AS_TR_CPP(_$PACKAGE-$ac_stdint_h)
+ac_cv_stdint_message="$ac_cv_stdint_message"
+ac_cv_header_stdint_t="$ac_cv_header_stdint_t"
+ac_cv_header_stdint_x="$ac_cv_header_stdint_x"
+ac_cv_header_stdint_o="$ac_cv_header_stdint_o"
+ac_cv_header_stdint_u="$ac_cv_header_stdint_u"
+ac_cv_type_uint64_t="$ac_cv_type_uint64_t"
+ac_cv_type_u_int64_t="$ac_cv_type_u_int64_t"
+ac_cv_char_data_model="$ac_cv_char_data_model"
+ac_cv_long_data_model="$ac_cv_long_data_model"
+ac_cv_type_int_least32_t="$ac_cv_type_int_least32_t"
+ac_cv_type_int_fast32_t="$ac_cv_type_int_fast32_t"
+ac_cv_type_intmax_t="$ac_cv_type_intmax_t"
+])
+])
diff --git a/nestegg/m4/pkg.m4 b/nestegg/m4/pkg.m4

new file mode 100644 (file)

index 0000000..996e294
--- /dev/null
+++ b/nestegg/m4/pkg.m4
@@ -0,0 +1,157 @@
+# pkg.m4 - Macros to locate and utilise pkg-config.            -*- Autoconf -*-
+#
+# Copyright © 2004 Scott James Remnant <scott@netsplit.com>.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+# PKG_PROG_PKG_CONFIG([MIN-VERSION])
+# ----------------------------------
+AC_DEFUN([PKG_PROG_PKG_CONFIG],
+[m4_pattern_forbid([^_?PKG_[A-Z_]+$])
+m4_pattern_allow([^PKG_CONFIG(_PATH)?$])
+AC_ARG_VAR([PKG_CONFIG], [path to pkg-config utility])dnl
+if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then
+       AC_PATH_TOOL([PKG_CONFIG], [pkg-config])
+fi
+if test -n "$PKG_CONFIG"; then
+       _pkg_min_version=m4_default([$1], [0.9.0])
+       AC_MSG_CHECKING([pkg-config is at least version $_pkg_min_version])
+       if $PKG_CONFIG --atleast-pkgconfig-version $_pkg_min_version; then
+               AC_MSG_RESULT([yes])
+       else
+               AC_MSG_RESULT([no])
+               PKG_CONFIG=""
+       fi
+
+fi[]dnl
+])# PKG_PROG_PKG_CONFIG
+
+# PKG_CHECK_EXISTS(MODULES, [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND])
+#
+# Check to see whether a particular set of modules exists.  Similar
+# to PKG_CHECK_MODULES(), but does not set variables or print errors.
+#
+#
+# Similar to PKG_CHECK_MODULES, make sure that the first instance of
+# this or PKG_CHECK_MODULES is called, or make sure to call
+# PKG_CHECK_EXISTS manually
+# --------------------------------------------------------------
+AC_DEFUN([PKG_CHECK_EXISTS],
+[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl
+if test -n "$PKG_CONFIG" && \
+    AC_RUN_LOG([$PKG_CONFIG --exists --print-errors "$1"]); then
+  m4_ifval([$2], [$2], [:])
+m4_ifvaln([$3], [else
+  $3])dnl
+fi])
+
+
+# _PKG_CONFIG([VARIABLE], [COMMAND], [MODULES])
+# ---------------------------------------------
+m4_define([_PKG_CONFIG],
+[if test -n "$PKG_CONFIG"; then
+    if test -n "$$1"; then
+        pkg_cv_[]$1="$$1"
+    else
+        PKG_CHECK_EXISTS([$3],
+                         [pkg_cv_[]$1=`$PKG_CONFIG --[]$2 "$3" 2>/dev/null`],
+                        [pkg_failed=yes])
+    fi
+else
+       pkg_failed=untried
+fi[]dnl
+])# _PKG_CONFIG
+
+# _PKG_SHORT_ERRORS_SUPPORTED
+# -----------------------------
+AC_DEFUN([_PKG_SHORT_ERRORS_SUPPORTED],
+[AC_REQUIRE([PKG_PROG_PKG_CONFIG])
+if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then
+        _pkg_short_errors_supported=yes
+else
+        _pkg_short_errors_supported=no
+fi[]dnl
+])# _PKG_SHORT_ERRORS_SUPPORTED
+
+
+# PKG_CHECK_MODULES(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND],
+# [ACTION-IF-NOT-FOUND])
+#
+#
+# Note that if there is a possibility the first call to
+# PKG_CHECK_MODULES might not happen, you should be sure to include an
+# explicit call to PKG_PROG_PKG_CONFIG in your configure.ac
+#
+#
+# --------------------------------------------------------------
+AC_DEFUN([PKG_CHECK_MODULES],
+[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl
+AC_ARG_VAR([$1][_CFLAGS], [C compiler flags for $1, overriding pkg-config])dnl
+AC_ARG_VAR([$1][_LIBS], [linker flags for $1, overriding pkg-config])dnl
+
+pkg_failed=no
+AC_MSG_CHECKING([for $1])
+
+_PKG_CONFIG([$1][_CFLAGS], [cflags], [$2])
+_PKG_CONFIG([$1][_LIBS], [libs], [$2])
+
+m4_define([_PKG_TEXT], [Alternatively, you may set the environment variables $1[]_CFLAGS
+and $1[]_LIBS to avoid the need to call pkg-config.
+See the pkg-config man page for more details.])
+
+if test $pkg_failed = yes; then
+        _PKG_SHORT_ERRORS_SUPPORTED
+        if test $_pkg_short_errors_supported = yes; then
+               $1[]_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "$2"`
+        else
+               $1[]_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "$2"`
+        fi
+       # Put the nasty error message in config.log where it belongs
+       echo "$$1[]_PKG_ERRORS" >&AS_MESSAGE_LOG_FD
+
+       ifelse([$4], , [AC_MSG_ERROR(dnl
+[Package requirements ($2) were not met:
+
+$$1_PKG_ERRORS
+
+Consider adjusting the PKG_CONFIG_PATH environment variable if you
+installed software in a non-standard prefix.
+
+_PKG_TEXT
+])],
+               [AC_MSG_RESULT([no])
+                $4])
+elif test $pkg_failed = untried; then
+       ifelse([$4], , [AC_MSG_FAILURE(dnl
+[The pkg-config script could not be found or is too old.  Make sure it
+is in your PATH or set the PKG_CONFIG environment variable to the full
+path to pkg-config.
+
+_PKG_TEXT
+
+To get pkg-config, see <http://pkg-config.freedesktop.org/>.])],
+               [$4])
+else
+       $1[]_CFLAGS=$pkg_cv_[]$1[]_CFLAGS
+       $1[]_LIBS=$pkg_cv_[]$1[]_LIBS
+        AC_MSG_RESULT([yes])
+       ifelse([$3], , :, [$3])
+fi[]dnl
+])# PKG_CHECK_MODULES
diff --git a/nestegg/nestegg-uninstalled.pc.in b/nestegg/nestegg-uninstalled.pc.in

new file mode 100644 (file)

index 0000000..19bb680
--- /dev/null
+++ b/nestegg/nestegg-uninstalled.pc.in
@@ -0,0 +1,13 @@
+# nestegg uninstalled pkg-config file
+
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+Name: nestegg
+Description: WebM/Matroska demuxer
+Version: @VERSION@
+Conflicts:
+Libs: -L${libdir} -lnestegg
+Cflags: -I${includedir}
diff --git a/nestegg/nestegg.pc.in b/nestegg/nestegg.pc.in

new file mode 100644 (file)

index 0000000..32c09d7
--- /dev/null
+++ b/nestegg/nestegg.pc.in
@@ -0,0 +1,13 @@
+# nestegg installed pkg-config file
+
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+Name: nestegg
+Description: WebM/Matroska demuxer
+Version: @VERSION@
+Conflicts:
+Libs: -L${libdir} -lnestegg
+Cflags: -I${includedir}
diff --git a/nestegg/src/nestegg.c b/nestegg/src/nestegg.c

new file mode 100644 (file)

index 0000000..63a0e83
--- /dev/null
+++ b/nestegg/src/nestegg.c
@@ -0,0 +1,1938 @@
+/*
+ * Copyright © 2010 Mozilla Foundation
+ *
+ * This program is made available under an ISC-style license.  See the
+ * accompanying file LICENSE for details.
+ */
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "nestegg/halloc/halloc.h"
+#include "nestegg/include/nestegg/nestegg.h"
+
+/* EBML Elements */
+#define ID_EBML                 0x1a45dfa3
+#define ID_EBML_VERSION         0x4286
+#define ID_EBML_READ_VERSION    0x42f7
+#define ID_EBML_MAX_ID_LENGTH   0x42f2
+#define ID_EBML_MAX_SIZE_LENGTH 0x42f3
+#define ID_DOCTYPE              0x4282
+#define ID_DOCTYPE_VERSION      0x4287
+#define ID_DOCTYPE_READ_VERSION 0x4285
+
+/* Global Elements */
+#define ID_VOID                 0xec
+#define ID_CRC32                0xbf
+
+/* WebMedia Elements */
+#define ID_SEGMENT              0x18538067
+
+/* Seek Head Elements */
+#define ID_SEEK_HEAD            0x114d9b74
+#define ID_SEEK                 0x4dbb
+#define ID_SEEK_ID              0x53ab
+#define ID_SEEK_POSITION        0x53ac
+
+/* Info Elements */
+#define ID_INFO                 0x1549a966
+#define ID_TIMECODE_SCALE       0x2ad7b1
+#define ID_DURATION             0x4489
+
+/* Cluster Elements */
+#define ID_CLUSTER              0x1f43b675
+#define ID_TIMECODE             0xe7
+#define ID_BLOCK_GROUP          0xa0
+#define ID_SIMPLE_BLOCK         0xa3
+
+/* BlockGroup Elements */
+#define ID_BLOCK                0xa1
+#define ID_BLOCK_DURATION       0x9b
+#define ID_REFERENCE_BLOCK      0xfb
+
+/* Tracks Elements */
+#define ID_TRACKS               0x1654ae6b
+#define ID_TRACK_ENTRY          0xae
+#define ID_TRACK_NUMBER         0xd7
+#define ID_TRACK_UID            0x73c5
+#define ID_TRACK_TYPE           0x83
+#define ID_FLAG_ENABLED         0xb9
+#define ID_FLAG_DEFAULT         0x88
+#define ID_FLAG_LACING          0x9c
+#define ID_TRACK_TIMECODE_SCALE 0x23314f
+#define ID_LANGUAGE             0x22b59c
+#define ID_CODEC_ID             0x86
+#define ID_CODEC_PRIVATE        0x63a2
+
+/* Video Elements */
+#define ID_VIDEO                0xe0
+#define ID_PIXEL_WIDTH          0xb0
+#define ID_PIXEL_HEIGHT         0xba
+#define ID_PIXEL_CROP_BOTTOM    0x54aa
+#define ID_PIXEL_CROP_TOP       0x54bb
+#define ID_PIXEL_CROP_LEFT      0x54cc
+#define ID_PIXEL_CROP_RIGHT     0x54dd
+#define ID_DISPLAY_WIDTH        0x54b0
+#define ID_DISPLAY_HEIGHT       0x54ba
+
+/* Audio Elements */
+#define ID_AUDIO                0xe1
+#define ID_SAMPLING_FREQUENCY   0xb5
+#define ID_CHANNELS             0x9f
+#define ID_BIT_DEPTH            0x6264
+
+/* Cues Elements */
+#define ID_CUES                 0x1c53bb6b
+#define ID_CUE_POINT            0xbb
+#define ID_CUE_TIME             0xb3
+#define ID_CUE_TRACK_POSITIONS  0xb7
+#define ID_CUE_TRACK            0xf7
+#define ID_CUE_CLUSTER_POSITION 0xf1
+#define ID_CUE_BLOCK_NUMBER     0x5378
+
+/* EBML Types */
+enum ebml_type_enum {
+  TYPE_UNKNOWN,
+  TYPE_MASTER,
+  TYPE_UINT,
+  TYPE_FLOAT,
+  TYPE_INT,
+  TYPE_STRING,
+  TYPE_BINARY
+};
+
+#define LIMIT_STRING            (1 << 20)
+#define LIMIT_BINARY            (1 << 24)
+#define LIMIT_BLOCK             (1 << 30)
+#define LIMIT_FRAME             (1 << 28)
+
+/* Field Flags */
+#define DESC_FLAG_NONE          0
+#define DESC_FLAG_MULTI         (1 << 0)
+#define DESC_FLAG_SUSPEND       (1 << 1)
+#define DESC_FLAG_OFFSET        (1 << 2)
+
+/* Block Header Flags */
+#define BLOCK_FLAGS_LACING      6
+
+/* Lacing Constants */
+#define LACING_NONE             0
+#define LACING_XIPH             1
+#define LACING_FIXED            2
+#define LACING_EBML             3
+
+/* Track Types */
+#define TRACK_TYPE_VIDEO        1
+#define TRACK_TYPE_AUDIO        2
+
+/* Track IDs */
+#define TRACK_ID_VP8            "V_VP8"
+#define TRACK_ID_VORBIS         "A_VORBIS"
+
+enum vint_mask {
+  MASK_NONE,
+  MASK_FIRST_BIT
+};
+
+struct ebml_binary {
+  unsigned char * data;
+  size_t length;
+};
+
+struct ebml_list_node {
+  struct ebml_list_node * next;
+  uint64_t id;
+  void * data;
+};
+
+struct ebml_list {
+  struct ebml_list_node * head;
+  struct ebml_list_node * tail;
+};
+
+struct ebml_type {
+  union ebml_value {
+    uint64_t u;
+    double f;
+    int64_t i;
+    char * s;
+    struct ebml_binary b;
+  } v;
+  enum ebml_type_enum type;
+  int read;
+};
+
+/* EBML Definitions */
+struct ebml {
+  struct ebml_type ebml_version;
+  struct ebml_type ebml_read_version;
+  struct ebml_type ebml_max_id_length;
+  struct ebml_type ebml_max_size_length;
+  struct ebml_type doctype;
+  struct ebml_type doctype_version;
+  struct ebml_type doctype_read_version;
+};
+
+/* Matroksa Definitions */
+struct seek {
+  struct ebml_type id;
+  struct ebml_type position;
+};
+
+struct seek_head {
+  struct ebml_list seek;
+};
+
+struct info {
+  struct ebml_type timecode_scale;
+  struct ebml_type duration;
+};
+
+struct block_group {
+  struct ebml_type duration;
+  struct ebml_type reference_block;
+};
+
+struct cluster {
+  struct ebml_type timecode;
+  struct ebml_list block_group;
+};
+
+struct video {
+  struct ebml_type pixel_width;
+  struct ebml_type pixel_height;
+  struct ebml_type pixel_crop_bottom;
+  struct ebml_type pixel_crop_top;
+  struct ebml_type pixel_crop_left;
+  struct ebml_type pixel_crop_right;
+  struct ebml_type display_width;
+  struct ebml_type display_height;
+};
+
+struct audio {
+  struct ebml_type sampling_frequency;
+  struct ebml_type channels;
+  struct ebml_type bit_depth;
+};
+
+struct track_entry {
+  struct ebml_type number;
+  struct ebml_type uid;
+  struct ebml_type type;
+  struct ebml_type flag_enabled;
+  struct ebml_type flag_default;
+  struct ebml_type flag_lacing;
+  struct ebml_type track_timecode_scale;
+  struct ebml_type language;
+  struct ebml_type codec_id;
+  struct ebml_type codec_private;
+  struct video video;
+  struct audio audio;
+};
+
+struct tracks {
+  struct ebml_list track_entry;
+};
+
+struct cue_track_positions {
+  struct ebml_type track;
+  struct ebml_type cluster_position;
+  struct ebml_type block_number;
+};
+
+struct cue_point {
+  struct ebml_type time;
+  struct ebml_list cue_track_positions;
+};
+
+struct cues {
+  struct ebml_list cue_point;
+};
+
+struct segment {
+  struct ebml_list seek_head;
+  struct info info;
+  struct ebml_list cluster;
+  struct tracks tracks;
+  struct cues cues;
+};
+
+/* Misc. */
+struct pool_ctx {
+  char dummy;
+};
+
+struct list_node {
+  struct list_node * previous;
+  struct ebml_element_desc * node;
+  unsigned char * data;
+};
+
+struct saved_state {
+  int64_t stream_offset;
+  struct list_node * ancestor;
+  uint64_t last_id;
+  uint64_t last_size;
+};
+
+struct frame {
+  unsigned char * data;
+  size_t length;
+  struct frame * next;
+};
+
+/* Public (opaque) Structures */
+struct nestegg {
+  nestegg_io * io;
+  nestegg_log log;
+  struct pool_ctx * alloc_pool;
+  uint64_t last_id;
+  uint64_t last_size;
+  struct list_node * ancestor;
+  struct ebml ebml;
+  struct segment segment;
+  int64_t segment_offset;
+  unsigned int track_count;
+};
+
+struct nestegg_packet {
+  uint64_t track;
+  uint64_t timecode;
+  struct frame * frame;
+};
+
+/* Element Descriptor */
+struct ebml_element_desc {
+  char const * name;
+  uint64_t id;
+  enum ebml_type_enum type;
+  size_t offset;
+  unsigned int flags;
+  struct ebml_element_desc * children;
+  size_t size;
+  size_t data_offset;
+};
+
+#define E_FIELD(ID, TYPE, STRUCT, FIELD) \
+  { #ID, ID, TYPE, offsetof(STRUCT, FIELD), DESC_FLAG_NONE, NULL, 0, 0 }
+#define E_MASTER(ID, TYPE, STRUCT, FIELD) \
+  { #ID, ID, TYPE, offsetof(STRUCT, FIELD), DESC_FLAG_MULTI, ne_ ## FIELD ## _elements, \
+      sizeof(struct FIELD), 0 }
+#define E_SINGLE_MASTER_O(ID, TYPE, STRUCT, FIELD) \
+  { #ID, ID, TYPE, offsetof(STRUCT, FIELD), DESC_FLAG_OFFSET, ne_ ## FIELD ## _elements, 0, \
+      offsetof(STRUCT, FIELD ## _offset) }
+#define E_SINGLE_MASTER(ID, TYPE, STRUCT, FIELD) \
+  { #ID, ID, TYPE, offsetof(STRUCT, FIELD), DESC_FLAG_NONE, ne_ ## FIELD ## _elements, 0, 0 }
+#define E_SUSPEND(ID, TYPE) \
+  { #ID, ID, TYPE, 0, DESC_FLAG_SUSPEND, NULL, 0, 0 }
+#define E_LAST \
+  { NULL, 0, 0, 0, DESC_FLAG_NONE, NULL, 0, 0 }
+
+/* EBML Element Lists */
+static struct ebml_element_desc ne_ebml_elements[] = {
+  E_FIELD(ID_EBML_VERSION, TYPE_UINT, struct ebml, ebml_version),
+  E_FIELD(ID_EBML_READ_VERSION, TYPE_UINT, struct ebml, ebml_read_version),
+  E_FIELD(ID_EBML_MAX_ID_LENGTH, TYPE_UINT, struct ebml, ebml_max_id_length),
+  E_FIELD(ID_EBML_MAX_SIZE_LENGTH, TYPE_UINT, struct ebml, ebml_max_size_length),
+  E_FIELD(ID_DOCTYPE, TYPE_STRING, struct ebml, doctype),
+  E_FIELD(ID_DOCTYPE_VERSION, TYPE_UINT, struct ebml, doctype_version),
+  E_FIELD(ID_DOCTYPE_READ_VERSION, TYPE_UINT, struct ebml, doctype_read_version),
+  E_LAST
+};
+
+/* WebMedia Element Lists */
+static struct ebml_element_desc ne_seek_elements[] = {
+  E_FIELD(ID_SEEK_ID, TYPE_BINARY, struct seek, id),
+  E_FIELD(ID_SEEK_POSITION, TYPE_UINT, struct seek, position),
+  E_LAST
+};
+
+static struct ebml_element_desc ne_seek_head_elements[] = {
+  E_MASTER(ID_SEEK, TYPE_MASTER, struct seek_head, seek),
+  E_LAST
+};
+
+static struct ebml_element_desc ne_info_elements[] = {
+  E_FIELD(ID_TIMECODE_SCALE, TYPE_UINT, struct info, timecode_scale),
+  E_FIELD(ID_DURATION, TYPE_FLOAT, struct info, duration),
+  E_LAST
+};
+
+static struct ebml_element_desc ne_block_group_elements[] = {
+  E_SUSPEND(ID_BLOCK, TYPE_BINARY),
+  E_FIELD(ID_BLOCK_DURATION, TYPE_UINT, struct block_group, duration),
+  E_FIELD(ID_REFERENCE_BLOCK, TYPE_INT, struct block_group, reference_block),
+  E_LAST
+};
+
+static struct ebml_element_desc ne_cluster_elements[] = {
+  E_FIELD(ID_TIMECODE, TYPE_UINT, struct cluster, timecode),
+  E_MASTER(ID_BLOCK_GROUP, TYPE_MASTER, struct cluster, block_group),
+  E_SUSPEND(ID_SIMPLE_BLOCK, TYPE_BINARY),
+  E_LAST
+};
+
+static struct ebml_element_desc ne_video_elements[] = {
+  E_FIELD(ID_PIXEL_WIDTH, TYPE_UINT, struct video, pixel_width),
+  E_FIELD(ID_PIXEL_HEIGHT, TYPE_UINT, struct video, pixel_height),
+  E_FIELD(ID_PIXEL_CROP_BOTTOM, TYPE_UINT, struct video, pixel_crop_bottom),
+  E_FIELD(ID_PIXEL_CROP_TOP, TYPE_UINT, struct video, pixel_crop_top),
+  E_FIELD(ID_PIXEL_CROP_LEFT, TYPE_UINT, struct video, pixel_crop_left),
+  E_FIELD(ID_PIXEL_CROP_RIGHT, TYPE_UINT, struct video, pixel_crop_right),
+  E_FIELD(ID_DISPLAY_WIDTH, TYPE_UINT, struct video, display_width),
+  E_FIELD(ID_DISPLAY_HEIGHT, TYPE_UINT, struct video, display_height),
+  E_LAST
+};
+
+static struct ebml_element_desc ne_audio_elements[] = {
+  E_FIELD(ID_SAMPLING_FREQUENCY, TYPE_FLOAT, struct audio, sampling_frequency),
+  E_FIELD(ID_CHANNELS, TYPE_UINT, struct audio, channels),
+  E_FIELD(ID_BIT_DEPTH, TYPE_UINT, struct audio, bit_depth),
+  E_LAST
+};
+
+static struct ebml_element_desc ne_track_entry_elements[] = {
+  E_FIELD(ID_TRACK_NUMBER, TYPE_UINT, struct track_entry, number),
+  E_FIELD(ID_TRACK_UID, TYPE_UINT, struct track_entry, uid),
+  E_FIELD(ID_TRACK_TYPE, TYPE_UINT, struct track_entry, type),
+  E_FIELD(ID_FLAG_ENABLED, TYPE_UINT, struct track_entry, flag_enabled),
+  E_FIELD(ID_FLAG_DEFAULT, TYPE_UINT, struct track_entry, flag_default),
+  E_FIELD(ID_FLAG_LACING, TYPE_UINT, struct track_entry, flag_lacing),
+  E_FIELD(ID_TRACK_TIMECODE_SCALE, TYPE_FLOAT, struct track_entry, track_timecode_scale),
+  E_FIELD(ID_LANGUAGE, TYPE_STRING, struct track_entry, language),
+  E_FIELD(ID_CODEC_ID, TYPE_STRING, struct track_entry, codec_id),
+  E_FIELD(ID_CODEC_PRIVATE, TYPE_BINARY, struct track_entry, codec_private),
+  E_SINGLE_MASTER(ID_VIDEO, TYPE_MASTER, struct track_entry, video),
+  E_SINGLE_MASTER(ID_AUDIO, TYPE_MASTER, struct track_entry, audio),
+  E_LAST
+};
+
+static struct ebml_element_desc ne_tracks_elements[] = {
+  E_MASTER(ID_TRACK_ENTRY, TYPE_MASTER, struct tracks, track_entry),
+  E_LAST
+};
+
+static struct ebml_element_desc ne_cue_track_positions_elements[] = {
+  E_FIELD(ID_CUE_TRACK, TYPE_UINT, struct cue_track_positions, track),
+  E_FIELD(ID_CUE_CLUSTER_POSITION, TYPE_UINT, struct cue_track_positions, cluster_position),
+  E_FIELD(ID_CUE_BLOCK_NUMBER, TYPE_UINT, struct cue_track_positions, block_number),
+  E_LAST
+};
+
+static struct ebml_element_desc ne_cue_point_elements[] = {
+  E_FIELD(ID_CUE_TIME, TYPE_UINT, struct cue_point, time),
+  E_MASTER(ID_CUE_TRACK_POSITIONS, TYPE_MASTER, struct cue_point, cue_track_positions),
+  E_LAST
+};
+
+static struct ebml_element_desc ne_cues_elements[] = {
+  E_MASTER(ID_CUE_POINT, TYPE_MASTER, struct cues, cue_point),
+  E_LAST
+};
+
+static struct ebml_element_desc ne_segment_elements[] = {
+  E_MASTER(ID_SEEK_HEAD, TYPE_MASTER, struct segment, seek_head),
+  E_SINGLE_MASTER(ID_INFO, TYPE_MASTER, struct segment, info),
+  E_MASTER(ID_CLUSTER, TYPE_MASTER, struct segment, cluster),
+  E_SINGLE_MASTER(ID_TRACKS, TYPE_MASTER, struct segment, tracks),
+  E_SINGLE_MASTER(ID_CUES, TYPE_MASTER, struct segment, cues),
+  E_LAST
+};
+
+static struct ebml_element_desc ne_top_level_elements[] = {
+  E_SINGLE_MASTER(ID_EBML, TYPE_MASTER, nestegg, ebml),
+  E_SINGLE_MASTER_O(ID_SEGMENT, TYPE_MASTER, nestegg, segment),
+  E_LAST
+};
+
+#undef E_FIELD
+#undef E_MASTER
+#undef E_SINGLE_MASTER_O
+#undef E_SINGLE_MASTER
+#undef E_SUSPEND
+#undef E_LAST
+
+static struct pool_ctx *
+ne_pool_init(void)
+{
+  struct pool_ctx * pool;
+
+  pool = h_malloc(sizeof(*pool));
+  if (!pool)
+    abort();
+  return pool;
+}
+
+static void
+ne_pool_destroy(struct pool_ctx * pool)
+{
+  h_free(pool);
+}
+
+static void *
+ne_pool_alloc(size_t size, struct pool_ctx * pool)
+{
+  void * p;
+
+  p = h_malloc(size);
+  if (!p)
+    abort();
+  hattach(p, pool);
+  memset(p, 0, size);
+  return p;
+}
+
+static void *
+ne_alloc(size_t size)
+{
+  void * p;
+
+  p = calloc(1, size);
+  if (!p)
+    abort();
+  return p;
+}
+
+static int
+ne_io_read(nestegg_io * io, void * buffer, size_t length)
+{
+  return io->read(buffer, length, io->userdata);
+}
+
+static int
+ne_io_seek(nestegg_io * io, int64_t offset, int whence)
+{
+  return io->seek(offset, whence, io->userdata);
+}
+
+static int
+ne_io_read_skip(nestegg_io * io, size_t length)
+{
+  size_t get;
+  unsigned char buf[8192];
+  int r = 1;
+
+  while (length > 0) {
+    get = length < sizeof(buf) ? length : sizeof(buf);
+    r = ne_io_read(io, buf, get);
+    if (r != 1)
+      break;
+    length -= get;
+  }
+
+  return r;
+}
+
+static int64_t
+ne_io_tell(nestegg_io * io)
+{
+  return io->tell(io->userdata);
+}
+
+static int
+ne_bare_read_vint(nestegg_io * io, uint64_t * value, uint64_t * length, enum vint_mask maskflag)
+{
+  int r;
+  unsigned char b;
+  size_t maxlen = 8;
+  unsigned int count = 1, mask = 1 << 7;
+
+  r = ne_io_read(io, &b, 1);
+  if (r != 1)
+    return r;
+
+  while (count < maxlen) {
+    if ((b & mask) != 0)
+      break;
+    mask >>= 1;
+    count += 1;
+  }
+
+  if (length)
+    *length = count;
+  *value = b;
+
+  if (maskflag == MASK_FIRST_BIT)
+    *value = b & ~mask;
+
+  while (--count) {
+    r = ne_io_read(io, &b, 1);
+    if (r != 1)
+      return r;
+    *value <<= 8;
+    *value |= b;
+  }
+
+  return 1;
+}
+
+static int
+ne_read_id(nestegg_io * io, uint64_t * value, uint64_t * length)
+{
+  return ne_bare_read_vint(io, value, length, MASK_NONE);
+}
+
+static int
+ne_read_vint(nestegg_io * io, uint64_t * value, uint64_t * length)
+{
+  return ne_bare_read_vint(io, value, length, MASK_FIRST_BIT);
+}
+
+static int
+ne_read_svint(nestegg_io * io, int64_t * value, uint64_t * length)
+{
+  int r;
+  uint64_t uvalue;
+  uint64_t ulength;
+  int64_t svint_subtr[] = {
+    0x3f, 0x1fff,
+    0xfffff, 0x7ffffff,
+    0x3ffffffffLL, 0x1ffffffffffLL,
+    0xffffffffffffLL, 0x7fffffffffffffLL
+  };
+
+  r = ne_bare_read_vint(io, &uvalue, &ulength, MASK_FIRST_BIT);
+  if (r != 1)
+    return r;
+  *value = uvalue - svint_subtr[ulength - 1];
+  if (length)
+    *length = ulength;
+  return r;
+}
+
+static int
+ne_read_uint(nestegg_io * io, uint64_t * val, uint64_t length)
+{
+  unsigned char b;
+  int r;
+
+  if (length == 0 || length > 8)
+    return -1;
+  r = ne_io_read(io, &b, 1);
+  if (r != 1)
+    return r;
+  *val = b;
+  while (--length) {
+    r = ne_io_read(io, &b, 1);
+    if (r != 1)
+      return r;
+    *val <<= 8;
+    *val |= b;
+  }
+  return 1;
+}
+
+static int
+ne_read_int(nestegg_io * io, int64_t * val, uint64_t length)
+{
+  int r;
+  uint64_t uval, base;
+
+  r = ne_read_uint(io, &uval, length);
+  if (r != 1)
+    return r;
+
+  if (length < sizeof(int64_t)) {
+    base = 1;
+    base <<= length * 8 - 1;
+    if (uval >= base) {
+        base = 1;
+        base <<= length * 8;
+    } else {
+      base = 0;
+    }
+    *val = uval - base;
+  } else {
+    *val = (int64_t) uval;
+  }
+
+  return 1;
+}
+
+static int
+ne_read_float(nestegg_io * io, double * val, uint64_t length)
+{
+  union {
+    uint64_t u;
+    float f;
+    double d;
+  } value;
+  int r;
+
+  /* length == 10 not implemented */
+  if (length != 4 && length != 8)
+    return -1;
+  r = ne_read_uint(io, &value.u, length);
+  if (r != 1)
+    return r;
+  if (length == 4)
+    *val = value.f;
+  else
+    *val = value.d;
+  return 1;
+}
+
+static int
+ne_read_string(nestegg * ctx, char ** val, uint64_t length)
+{
+  char * str;
+  int r;
+
+  if (length == 0 || length > LIMIT_STRING)
+    return -1;
+  str = ne_pool_alloc(length + 1, ctx->alloc_pool);
+  r = ne_io_read(ctx->io, (unsigned char *) str, length);
+  if (r != 1)
+    return r;
+  str[length] = '\0';
+  *val = str;
+  return 1;
+}
+
+static int
+ne_read_binary(nestegg * ctx, struct ebml_binary * val, uint64_t length)
+{
+  if (length == 0 || length > LIMIT_BINARY)
+    return -1;
+  val->data = ne_pool_alloc(length, ctx->alloc_pool);
+  val->length = length;
+  return ne_io_read(ctx->io, val->data, length);
+}
+
+static int
+ne_get_uint(struct ebml_type type, uint64_t * value)
+{
+  if (!type.read)
+    return -1;
+
+  assert(type.type == TYPE_UINT);
+
+  *value = type.v.u;
+
+  return 0;
+}
+
+static int
+ne_get_float(struct ebml_type type, double * value)
+{
+  if (!type.read)
+    return -1;
+
+  assert(type.type == TYPE_FLOAT);
+
+  *value = type.v.f;
+
+  return 0;
+}
+
+static int
+ne_get_string(struct ebml_type type, char ** value)
+{
+  if (!type.read)
+    return -1;
+
+  assert(type.type == TYPE_STRING);
+
+  *value = type.v.s;
+
+  return 0;
+}
+
+static int
+ne_get_binary(struct ebml_type type, struct ebml_binary * value)
+{
+  if (!type.read)
+    return -1;
+
+  assert(type.type == TYPE_BINARY);
+
+  *value = type.v.b;
+
+  return 0;
+}
+
+static int
+ne_is_ancestor_element(uint64_t id, struct list_node * ancestor)
+{
+  struct ebml_element_desc * element;
+
+  for (; ancestor; ancestor = ancestor->previous)
+    for (element = ancestor->node; element->id; ++element)
+      if (element->id == id)
+        return 1;
+
+  return 0;
+}
+
+static struct ebml_element_desc *
+ne_find_element(uint64_t id, struct ebml_element_desc * elements)
+{
+  struct ebml_element_desc * element;
+
+  for (element = elements; element->id; ++element)
+    if (element->id == id)
+      return element;
+
+  return NULL;
+}
+
+static void
+ne_ctx_push(nestegg * ctx, struct ebml_element_desc * ancestor, void * data)
+{
+  struct list_node * item;
+
+  item = ne_alloc(sizeof(*item));
+  item->previous = ctx->ancestor;
+  item->node = ancestor;
+  item->data = data;
+  ctx->ancestor = item;
+}
+
+static void
+ne_ctx_pop(nestegg * ctx)
+{
+  struct list_node * item;
+
+  item = ctx->ancestor;
+  ctx->ancestor = item->previous;
+  free(item);
+}
+
+static int
+ne_ctx_save(nestegg * ctx, struct saved_state * s)
+{
+  s->stream_offset = ne_io_tell(ctx->io);
+  if (s->stream_offset < 0)
+    return -1;
+  s->ancestor = ctx->ancestor;
+  s->last_id = ctx->last_id;
+  s->last_size = ctx->last_size;
+  return 0;
+}
+
+static int
+ne_ctx_restore(nestegg * ctx, struct saved_state * s)
+{
+  int r;
+
+  r = ne_io_seek(ctx->io, s->stream_offset, NESTEGG_SEEK_SET);
+  if (r != 0)
+    return -1;
+  ctx->ancestor = s->ancestor;
+  ctx->last_id = s->last_id;
+  ctx->last_size = s->last_size;
+  return 0;
+}
+
+static int
+ne_peek_element(nestegg * ctx, uint64_t * id, uint64_t * size)
+{
+  int r;
+
+  if (ctx->last_id && ctx->last_size) {
+    if (id)
+      *id = ctx->last_id;
+    if (size)
+      *size = ctx->last_size;
+    return 1;
+  }
+
+  r = ne_read_id(ctx->io, &ctx->last_id, NULL);
+  if (r != 1)
+    return r;
+
+  r = ne_read_vint(ctx->io, &ctx->last_size, NULL);
+  if (r != 1)
+    return r;
+
+  if (id)
+    *id = ctx->last_id;
+  if (size)
+    *size = ctx->last_size;
+
+  return 1;
+}
+
+static int
+ne_read_element(nestegg * ctx, uint64_t * id, uint64_t * size)
+{
+  int r;
+
+  r = ne_peek_element(ctx, id, size);
+  if (r != 1)
+    return r;
+
+  ctx->last_id = 0;
+  ctx->last_size = 0;
+
+  return 1;
+}
+
+static void
+ne_read_master(nestegg * ctx, struct ebml_element_desc * desc)
+{
+  struct ebml_list * list;
+  struct ebml_list_node * node, * oldtail;
+
+  assert(desc->type == TYPE_MASTER && desc->flags & DESC_FLAG_MULTI);
+
+  ctx->log(ctx, NESTEGG_LOG_DEBUG, "multi master element %llx (%s)",
+           desc->id, desc->name);
+
+  list = (struct ebml_list *) (ctx->ancestor->data + desc->offset);
+
+  node = ne_pool_alloc(sizeof(*node), ctx->alloc_pool);
+  node->id = desc->id;
+  node->data = ne_pool_alloc(desc->size, ctx->alloc_pool);
+
+  oldtail = list->tail;
+  if (oldtail)
+    oldtail->next = node;
+  list->tail = node;
+  if (!list->head)
+    list->head = node;
+
+  ctx->log(ctx, NESTEGG_LOG_DEBUG, " -> using data %p", node->data);
+
+  ne_ctx_push(ctx, desc->children, node->data);
+}
+
+static void
+ne_read_single_master(nestegg * ctx, struct ebml_element_desc * desc)
+{
+  assert(desc->type == TYPE_MASTER && !(desc->flags & DESC_FLAG_MULTI));
+
+  ctx->log(ctx, NESTEGG_LOG_DEBUG, "single master element %llx (%s)",
+           desc->id, desc->name);
+  ctx->log(ctx, NESTEGG_LOG_DEBUG, " -> using data %p (%u)",
+           ctx->ancestor->data + desc->offset, desc->offset);
+
+  ne_ctx_push(ctx, desc->children, ctx->ancestor->data + desc->offset);
+}
+
+static int
+ne_read_simple(nestegg * ctx, struct ebml_element_desc * desc, size_t length)
+{
+  struct ebml_type * storage;
+  int r;
+
+  storage = (struct ebml_type *) (ctx->ancestor->data + desc->offset);
+
+  if (storage->read) {
+    ctx->log(ctx, NESTEGG_LOG_DEBUG, "element %llx (%s) already read, skipping",
+             desc->id, desc->name);
+    return 0;
+  }
+
+  storage->type = desc->type;
+
+  ctx->log(ctx, NESTEGG_LOG_DEBUG, "element %llx (%s) -> %p (%u)",
+           desc->id, desc->name, storage, desc->offset);
+
+  r = -1;
+
+  switch (desc->type) {
+  case TYPE_UINT:
+    r = ne_read_uint(ctx->io, &storage->v.u, length);
+    break;
+  case TYPE_FLOAT:
+    r = ne_read_float(ctx->io, &storage->v.f, length);
+    break;
+  case TYPE_INT:
+    r = ne_read_int(ctx->io, &storage->v.i, length);
+    break;
+  case TYPE_STRING:
+    r = ne_read_string(ctx, &storage->v.s, length);
+    break;
+  case TYPE_BINARY:
+    r = ne_read_binary(ctx, &storage->v.b, length);
+    break;
+  case TYPE_MASTER:
+  case TYPE_UNKNOWN:
+    assert(0);
+    break;
+  }
+
+  if (r == 1)
+    storage->read = 1;
+
+  return r;
+}
+
+static int
+ne_parse(nestegg * ctx, struct ebml_element_desc * top_level)
+{
+  int r;
+  int64_t * data_offset;
+  uint64_t id, size;
+  struct ebml_element_desc * element;
+
+  /* loop until we need to return:
+     - hit suspend point
+     - parse complete
+     - error occurred */
+
+  /* loop over elements at current level reading them if sublevel found,
+     push ctx onto stack and continue if sublevel ended, pop ctx off stack
+     and continue */
+
+  if (!ctx->ancestor)
+    return -1;
+
+  for (;;) {
+    r = ne_peek_element(ctx, &id, &size);
+    if (r != 1)
+      break;
+
+    element = ne_find_element(id, ctx->ancestor->node);
+    if (element) {
+      if (element->flags & DESC_FLAG_SUSPEND) {
+        assert(element->type == TYPE_BINARY);
+        ctx->log(ctx, NESTEGG_LOG_DEBUG, "suspend parse at %llx", id);
+        r = 1;
+        break;
+      }
+
+      r = ne_read_element(ctx, &id, &size);
+      if (r != 1)
+        break;
+
+      if (element->flags & DESC_FLAG_OFFSET) {
+        data_offset = (int64_t *) (ctx->ancestor->data + element->data_offset);
+        *data_offset = ne_io_tell(ctx->io);
+        if (*data_offset < 0) {
+          r = -1;
+          break;
+        }
+      }
+
+      if (element->type == TYPE_MASTER) {
+        if (element->flags & DESC_FLAG_MULTI)
+          ne_read_master(ctx, element);
+        else
+          ne_read_single_master(ctx, element);
+        continue;
+      } else {
+        r = ne_read_simple(ctx, element, size);
+        if (r < 0)
+          break;
+      }
+    } else if (ne_is_ancestor_element(id, ctx->ancestor->previous)) {
+      ctx->log(ctx, NESTEGG_LOG_DEBUG, "parent element %llx", id);
+      if (top_level && ctx->ancestor->node == top_level) {
+        ctx->log(ctx, NESTEGG_LOG_DEBUG, "*** parse about to back up past top_level");
+        r = 1;
+        break;
+      }
+      ne_ctx_pop(ctx);
+    } else {
+      r = ne_read_element(ctx, &id, &size);
+      if (r != 1)
+        break;
+
+      if (id != ID_VOID && id != ID_CRC32)
+        ctx->log(ctx, NESTEGG_LOG_DEBUG, "unknown element %llx", id);
+      r = ne_io_read_skip(ctx->io, size);
+      if (r != 1)
+        break;
+    }
+  }
+
+  if (r != 1)
+    while (ctx->ancestor)
+      ne_ctx_pop(ctx);
+
+  return r;
+}
+
+static uint64_t
+ne_xiph_lace_value(unsigned char ** np)
+{
+  uint64_t lace;
+  uint64_t value;
+  unsigned char * p = *np;
+
+  lace = *p++;
+  value = lace;
+  while (lace == 255) {
+    lace = *p++;
+    value += lace;
+  }
+
+  *np = p;
+
+  return value;
+}
+
+static int
+ne_read_xiph_lace_value(nestegg_io * io, uint64_t * value, size_t * consumed)
+{
+  int r;
+  uint64_t lace;
+
+  r = ne_read_uint(io, &lace, 1);
+  if (r != 1)
+    return r;
+  *consumed += 1;
+
+  *value = lace;
+  while (lace == 255) {
+    r = ne_read_uint(io, &lace, 1);
+    if (r != 1)
+      return r;
+    *consumed += 1;
+    *value += lace;
+  }
+
+  return 1;
+}
+
+static int
+ne_read_xiph_lacing(nestegg_io * io, size_t block, size_t * read, uint64_t n, uint64_t * sizes)
+{
+  int r;
+  size_t i = 0;
+  uint64_t sum = 0;
+
+  while (--n) {
+    r = ne_read_xiph_lace_value(io, &sizes[i], read);
+    if (r != 1)
+      return r;
+    sum += sizes[i];
+    i += 1;
+  }
+
+  if (*read + sum > block)
+    return -1;
+
+  /* last frame is the remainder of the block */
+  sizes[i] = block - *read - sum;
+  return 1;
+}
+
+static int
+ne_read_ebml_lacing(nestegg_io * io, size_t block, size_t * read, uint64_t n, uint64_t * sizes)
+{
+  int r;
+  uint64_t lace, sum, length;
+  int64_t slace;
+  size_t i = 0;
+
+  r = ne_read_vint(io, &lace, &length);
+  if (r != 1)
+    return r;
+  *read += length;
+
+  sizes[i] = lace;
+  sum = sizes[i];
+
+  i += 1;
+  n -= 1;
+
+  while (--n) {
+    r = ne_read_svint(io, &slace, &length);
+    if (r != 1)
+      return r;
+    *read += length;
+    sizes[i] = sizes[i - 1] + slace;
+    sum += sizes[i];
+    i += 1;
+  }
+
+  if (*read + sum > block)
+    return -1;
+
+  /* last frame is the remainder of the block */
+  sizes[i] = block - *read - sum;
+  return 1;
+}
+
+static uint64_t
+ne_get_timecode_scale(nestegg * ctx)
+{
+  uint64_t scale;
+
+  if (ne_get_uint(ctx->segment.info.timecode_scale, &scale) != 0)
+    scale = 1000000;
+
+  return scale;
+}
+
+static struct track_entry *
+ne_find_track_entry(nestegg * ctx, unsigned int track)
+{
+  struct ebml_list_node * node;
+  unsigned int tracks = 0;
+
+  node = ctx->segment.tracks.track_entry.head;
+  while (node) {
+    assert(node->id == ID_TRACK_ENTRY);
+    if (track == tracks)
+      return node->data;
+    tracks += 1;
+    node = node->next;
+  }
+
+  return NULL;
+}
+
+static int
+ne_read_block(nestegg * ctx, uint64_t block_id, uint64_t block_size, nestegg_packet ** data)
+{
+  int r;
+  int64_t timecode, abs_timecode;
+  nestegg_packet * pkt;
+  struct cluster * cluster;
+  struct frame * f, * last;
+  struct track_entry * entry;
+  double track_scale;
+  uint64_t track, length, frame_sizes[256], cluster_tc, flags, frames, tc_scale, total;
+  unsigned int i, lacing;
+  size_t consumed = 0;
+
+  *data = NULL;
+
+  if (block_size > LIMIT_BLOCK)
+    return -1;
+
+  r = ne_read_vint(ctx->io, &track, &length);
+  if (r != 1)
+    return r;
+
+  if (track == 0 || track > ctx->track_count)
+    return -1;
+
+  consumed += length;
+
+  r = ne_read_int(ctx->io, &timecode, 2);
+  if (r != 1)
+    return r;
+
+  consumed += 2;
+
+  r = ne_read_uint(ctx->io, &flags, 1);
+  if (r != 1)
+    return r;
+
+  consumed += 1;
+
+  frames = 0;
+
+  /* flags are different between block and simpleblock, but lacing is
+     encoded the same way */
+  lacing = (flags & BLOCK_FLAGS_LACING) >> 1;
+
+  switch (lacing) {
+  case LACING_NONE:
+    frames = 1;
+    break;
+  case LACING_XIPH:
+  case LACING_FIXED:
+  case LACING_EBML:
+    r = ne_read_uint(ctx->io, &frames, 1);
+    if (r != 1)
+      return r;
+    consumed += 1;
+    frames += 1;
+  }
+
+  if (frames > 256)
+    return -1;
+
+  switch (lacing) {
+  case LACING_NONE:
+    frame_sizes[0] = block_size - consumed;
+    break;
+  case LACING_XIPH:
+    if (frames == 1)
+      return -1;
+    r = ne_read_xiph_lacing(ctx->io, block_size, &consumed, frames, frame_sizes);
+    if (r != 1)
+      return r;
+    break;
+  case LACING_FIXED:
+    if ((block_size - consumed) % frames)
+      return -1;
+    for (i = 0; i < frames; ++i)
+      frame_sizes[i] = (block_size - consumed) / frames;
+    break;
+  case LACING_EBML:
+    if (frames == 1)
+      return -1;
+    r = ne_read_ebml_lacing(ctx->io, block_size, &consumed, frames, frame_sizes);
+    if (r != 1)
+      return r;
+    break;
+  }
+
+  /* sanity check unlaced frame sizes against total block size. */
+  total = consumed;
+  for (i = 0; i < frames; ++i)
+    total += frame_sizes[i];
+  if (total > block_size)
+    return -1;
+
+  entry = ne_find_track_entry(ctx, track - 1);
+  if (!entry)
+    return -1;
+
+  track_scale = 1.0;
+
+  tc_scale = ne_get_timecode_scale(ctx);
+
+  assert(ctx->segment.cluster.tail->id == ID_CLUSTER);
+  cluster = ctx->segment.cluster.tail->data;
+  if (ne_get_uint(cluster->timecode, &cluster_tc) != 0)
+    return -1;
+
+  abs_timecode = timecode + cluster_tc;
+  if (abs_timecode < 0)
+    return -1;
+
+  pkt = ne_alloc(sizeof(*pkt));
+  pkt->track = track - 1;
+  pkt->timecode = abs_timecode * tc_scale * track_scale;
+
+  ctx->log(ctx, NESTEGG_LOG_DEBUG, "%sblock t %lld pts %f f %llx frames: %llu",
+           block_id == ID_BLOCK ? "" : "simple", pkt->track, pkt->timecode / 1e9, flags, frames);
+
+  last = NULL;
+  for (i = 0; i < frames; ++i) {
+    if (frame_sizes[i] > LIMIT_FRAME) {
+      nestegg_free_packet(pkt);
+      return -1;
+    }
+    f = ne_alloc(sizeof(*f));
+    f->data = ne_alloc(frame_sizes[i]);
+    f->length = frame_sizes[i];
+    r = ne_io_read(ctx->io, f->data, frame_sizes[i]);
+    if (r != 1) {
+      free(f->data);
+      free(f);
+      nestegg_free_packet(pkt);
+      return -1;
+    }
+
+    if (!last)
+      pkt->frame = f;
+    else
+      last->next = f;
+    last = f;
+  }
+
+  *data = pkt;
+
+  return 1;
+}
+
+static uint64_t
+ne_buf_read_id(unsigned char const * p, size_t length)
+{
+  uint64_t id = 0;
+
+  while (length--) {
+    id <<= 8;
+    id |= *p++;
+  }
+
+  return id;
+}
+
+static struct seek *
+ne_find_seek_for_id(struct ebml_list_node * seek_head, uint64_t id)
+{
+  struct ebml_list * head;
+  struct ebml_list_node * seek;
+  struct ebml_binary binary_id;
+  struct seek * s;
+
+  while (seek_head) {
+    assert(seek_head->id == ID_SEEK_HEAD);
+    head = seek_head->data;
+    seek = head->head;
+
+    while (seek) {
+      assert(seek->id == ID_SEEK);
+      s = seek->data;
+
+      if (ne_get_binary(s->id, &binary_id) == 0 &&
+          ne_buf_read_id(binary_id.data, binary_id.length) == id)
+        return s;
+
+      seek = seek->next;
+    }
+
+    seek_head = seek_head->next;
+  }
+
+  return NULL;
+}
+
+static struct cue_point *
+ne_find_cue_point_for_tstamp(struct ebml_list_node * cue_point, uint64_t scale, uint64_t tstamp)
+{
+  uint64_t time;
+  struct cue_point * c, * prev = NULL;
+
+  while (cue_point) {
+    assert(cue_point->id == ID_CUE_POINT);
+    c = cue_point->data;
+
+    if (!prev)
+      prev = c;
+
+    if (ne_get_uint(c->time, &time) == 0 && time * scale > tstamp)
+      break;
+
+    prev = cue_point->data;
+    cue_point = cue_point->next;
+  }
+
+  return prev;
+}
+
+static int
+ne_is_suspend_element(uint64_t id)
+{
+  /* this could search the tree of elements for DESC_FLAG_SUSPEND */
+  if (id == ID_SIMPLE_BLOCK || id == ID_BLOCK)
+    return 1;
+  return 0;
+}
+
+static void
+ne_null_log_callback(nestegg * ctx, unsigned int severity, char const * fmt, ...)
+{
+  if (ctx && severity && fmt)
+    return;
+}
+
+int
+nestegg_init(nestegg ** context, nestegg_io io, nestegg_log callback)
+{
+  int r;
+  uint64_t id, version, docversion;
+  struct ebml_list_node * track;
+  char * doctype;
+  nestegg * ctx = NULL;
+
+  if (!(io.read && io.seek && io.tell))
+    return -1;
+
+  ctx = ne_alloc(sizeof(*ctx));
+
+  ctx->io = ne_alloc(sizeof(*ctx->io));
+  *ctx->io = io;
+  ctx->log = callback;
+  ctx->alloc_pool = ne_pool_init();
+
+  if (!ctx->log)
+    ctx->log = ne_null_log_callback;
+
+  r = ne_peek_element(ctx, &id, NULL);
+  if (r != 1) {
+    nestegg_destroy(ctx);
+    return -1;
+  }
+
+  if (id != ID_EBML) {
+    nestegg_destroy(ctx);
+    return -1;
+  }
+
+  ctx->log(ctx, NESTEGG_LOG_DEBUG, "ctx %p", ctx);
+
+  ne_ctx_push(ctx, ne_top_level_elements, ctx);
+
+  r = ne_parse(ctx, NULL);
+
+  if (r != 1) {
+    nestegg_destroy(ctx);
+    return -1;
+  }
+
+  if (ne_get_uint(ctx->ebml.ebml_read_version, &version) != 0)
+    version = 1;
+  if (version != 1) {
+    nestegg_destroy(ctx);
+    return -1;
+  }
+
+  if (ne_get_string(ctx->ebml.doctype, &doctype) != 0)
+    doctype = "matroska";
+  if (strcmp(doctype, "webm") != 0) {
+    nestegg_destroy(ctx);
+    return -1;
+  }
+
+  if (ne_get_uint(ctx->ebml.doctype_read_version, &docversion) != 0)
+    docversion = 1;
+  if (docversion < 1 || docversion > 2) {
+    nestegg_destroy(ctx);
+    return -1;
+  }
+
+  if (!ctx->segment.tracks.track_entry.head) {
+    nestegg_destroy(ctx);
+    return -1;
+  }
+
+  track = ctx->segment.tracks.track_entry.head;
+  ctx->track_count = 0;
+
+  while (track) {
+    ctx->track_count += 1;
+    track = track->next;
+  }
+
+  *context = ctx;
+
+  return 0;
+}
+
+void
+nestegg_destroy(nestegg * ctx)
+{
+  while (ctx->ancestor)
+    ne_ctx_pop(ctx);
+  ne_pool_destroy(ctx->alloc_pool);
+  free(ctx->io);
+  free(ctx);
+}
+
+int
+nestegg_duration(nestegg * ctx, uint64_t * duration)
+{
+  uint64_t tc_scale;
+  double unscaled_duration;
+
+  if (ne_get_float(ctx->segment.info.duration, &unscaled_duration) != 0)
+    return -1;
+
+  tc_scale = ne_get_timecode_scale(ctx);
+
+  *duration = (uint64_t) (unscaled_duration * tc_scale);
+  return 0;
+}
+
+int
+nestegg_tstamp_scale(nestegg * ctx, uint64_t * scale)
+{
+  *scale = ne_get_timecode_scale(ctx);
+  return 0;
+}
+
+int
+nestegg_track_count(nestegg * ctx, unsigned int * tracks)
+{
+  *tracks = ctx->track_count;
+  return 0;
+}
+
+int
+nestegg_track_seek(nestegg * ctx, unsigned int track, uint64_t tstamp)
+{
+  int r;
+  struct cue_point * cue_point;
+  struct cue_track_positions * pos;
+  struct saved_state state;
+  struct seek * found;
+  uint64_t seek_pos, tc_scale, t, id;
+  struct ebml_list_node * node = ctx->segment.cues.cue_point.head;
+
+  /* If there are no cues loaded, check for cues element in the seek head
+     and load it. */
+  if (!node) {
+    found = ne_find_seek_for_id(ctx->segment.seek_head.head, ID_CUES);
+    if (!found)
+      return -1;
+
+    if (ne_get_uint(found->position, &seek_pos) != 0)
+      return -1;
+
+    /* Save old parser state. */
+    r = ne_ctx_save(ctx, &state);
+    if (r != 0)
+      return -1;
+
+    /* Seek and set up parser state for segment-level element (Cues). */
+    r = ne_io_seek(ctx->io, ctx->segment_offset + seek_pos, NESTEGG_SEEK_SET);
+    if (r != 0)
+      return -1;
+    ctx->last_id = 0;
+    ctx->last_size = 0;
+
+    r = ne_read_element(ctx, &id, NULL);
+    if (r != 1)
+      return -1;
+
+    if (id != ID_CUES)
+      return -1;
+
+    ctx->ancestor = NULL;
+    ne_ctx_push(ctx, ne_top_level_elements, ctx);
+    ne_ctx_push(ctx, ne_segment_elements, &ctx->segment);
+    ne_ctx_push(ctx, ne_cues_elements, &ctx->segment.cues);
+    /* parser will run until end of cues element. */
+    ctx->log(ctx, NESTEGG_LOG_DEBUG, "seek: parsing cue elements");
+    r = ne_parse(ctx, ne_cues_elements);
+    while (ctx->ancestor)
+      ne_ctx_pop(ctx);
+
+    /* Reset parser state to original state and seek back to old position. */
+    if (ne_ctx_restore(ctx, &state) != 0)
+      return -1;
+
+    if (r < 0)
+      return -1;
+  }
+
+  tc_scale = ne_get_timecode_scale(ctx);
+
+  cue_point = ne_find_cue_point_for_tstamp(ctx->segment.cues.cue_point.head, tc_scale, tstamp);
+  if (!cue_point)
+    return -1;
+
+  node = cue_point->cue_track_positions.head;
+
+  seek_pos = 0;
+
+  while (node) {
+    assert(node->id == ID_CUE_TRACK_POSITIONS);
+    pos = node->data;
+    if (ne_get_uint(pos->track, &t) == 0 && t - 1 == track) {
+      if (ne_get_uint(pos->cluster_position, &seek_pos) != 0)
+        return -1;
+      break;
+    }
+    node = node->next;
+  }
+
+  /* Seek and set up parser state for segment-level element (Cluster). */
+  r = ne_io_seek(ctx->io, ctx->segment_offset + seek_pos, NESTEGG_SEEK_SET);
+  if (r != 0)
+    return -1;
+  ctx->last_id = 0;
+  ctx->last_size = 0;
+
+  while (ctx->ancestor)
+    ne_ctx_pop(ctx);
+
+  ne_ctx_push(ctx, ne_top_level_elements, ctx);
+  ne_ctx_push(ctx, ne_segment_elements, &ctx->segment);
+  ctx->log(ctx, NESTEGG_LOG_DEBUG, "seek: parsing cluster elements");
+  r = ne_parse(ctx, NULL);
+  if (r != 1)
+    return -1;
+
+  if (!ne_is_suspend_element(ctx->last_id))
+    return -1;
+
+  return 0;
+}
+
+int
+nestegg_track_type(nestegg * ctx, unsigned int track)
+{
+  struct track_entry * entry;
+  uint64_t type;
+
+  entry = ne_find_track_entry(ctx, track);
+  if (!entry)
+    return -1;
+
+  if (ne_get_uint(entry->type, &type) != 0)
+    return -1;
+
+  if (type & TRACK_TYPE_VIDEO)
+    return NESTEGG_TRACK_VIDEO;
+
+  if (type & TRACK_TYPE_AUDIO)
+    return NESTEGG_TRACK_AUDIO;
+
+  return -1;
+}
+
+int
+nestegg_track_codec_id(nestegg * ctx, unsigned int track)
+{
+  char * codec_id;
+  struct track_entry * entry;
+
+  entry = ne_find_track_entry(ctx, track);
+  if (!entry)
+    return -1;
+
+  if (ne_get_string(entry->codec_id, &codec_id) != 0)
+    return -1;
+
+  if (strcmp(codec_id, TRACK_ID_VP8) == 0)
+    return NESTEGG_CODEC_VP8;
+
+  if (strcmp(codec_id, TRACK_ID_VORBIS) == 0)
+    return NESTEGG_CODEC_VORBIS;
+
+  return -1;
+}
+
+int
+nestegg_track_codec_data_count(nestegg * ctx, unsigned int track,
+                               unsigned int * count)
+{
+  struct track_entry * entry;
+  struct ebml_binary codec_private;
+  unsigned char * p;
+
+  *count = 0;
+
+  entry = ne_find_track_entry(ctx, track);
+  if (!entry)
+    return -1;
+
+  if (nestegg_track_codec_id(ctx, track) != NESTEGG_CODEC_VORBIS)
+    return -1;
+
+  if (ne_get_binary(entry->codec_private, &codec_private) != 0)
+    return -1;
+
+  if (codec_private.length < 1)
+    return -1;
+
+  p = codec_private.data;
+  *count = *p + 1;
+
+  if (*count > 3)
+    return -1;
+
+  return 0;
+}
+
+int
+nestegg_track_codec_data(nestegg * ctx, unsigned int track, unsigned int item,
+                         unsigned char ** data, size_t * length)
+{
+  struct track_entry * entry;
+  struct ebml_binary codec_private;
+  uint64_t sizes[3], total;
+  unsigned char * p;
+  unsigned int count, i;
+
+  *data = NULL;
+  *length = 0;
+
+  entry = ne_find_track_entry(ctx, track);
+  if (!entry)
+    return -1;
+
+  if (nestegg_track_codec_id(ctx, track) != NESTEGG_CODEC_VORBIS)
+    return -1;
+
+  if (ne_get_binary(entry->codec_private, &codec_private) != 0)
+    return -1;
+
+  p = codec_private.data;
+  count = *p++ + 1;
+
+  if (count > 3)
+    return -1;
+
+  i = 0;
+  total = 0;
+  while (--count) {
+    sizes[i] = ne_xiph_lace_value(&p);
+    total += sizes[i];
+    i += 1;
+  }
+  sizes[i] = codec_private.length - total - (p - codec_private.data);
+
+  for (i = 0; i < item; ++i) {
+    if (sizes[i] > LIMIT_FRAME)
+      return -1;
+    p += sizes[i];
+  }
+  *data = p;
+  *length = sizes[item];
+
+  return 0;
+}
+
+int
+nestegg_track_video_params(nestegg * ctx, unsigned int track,
+                           nestegg_video_params * params)
+{
+  struct track_entry * entry;
+  uint64_t value;
+
+  memset(params, 0, sizeof(*params));
+
+  entry = ne_find_track_entry(ctx, track);
+  if (!entry)
+    return -1;
+
+  if (nestegg_track_type(ctx, track) != NESTEGG_TRACK_VIDEO)
+    return -1;
+
+  if (ne_get_uint(entry->video.pixel_width, &value) != 0)
+    return -1;
+  params->width = value;
+
+  if (ne_get_uint(entry->video.pixel_height, &value) != 0)
+    return -1;
+  params->height = value;
+
+  value = 0;
+  ne_get_uint(entry->video.pixel_crop_bottom, &value);
+  params->crop_bottom = value;
+
+  value = 0;
+  ne_get_uint(entry->video.pixel_crop_top, &value);
+  params->crop_top = value;
+
+  value = 0;
+  ne_get_uint(entry->video.pixel_crop_left, &value);
+  params->crop_left = value;
+
+  value = 0;
+  ne_get_uint(entry->video.pixel_crop_right, &value);
+  params->crop_right = value;
+
+  value = params->width;
+  ne_get_uint(entry->video.display_width, &value);
+  params->display_width = value;
+
+  value = params->height;
+  ne_get_uint(entry->video.display_height, &value);
+  params->display_height = value;
+
+  return 0;
+}
+
+int
+nestegg_track_audio_params(nestegg * ctx, unsigned int track,
+                           nestegg_audio_params * params)
+{
+  struct track_entry * entry;
+  uint64_t value;
+
+  memset(params, 0, sizeof(*params));
+
+  entry = ne_find_track_entry(ctx, track);
+  if (!entry)
+    return -1;
+
+  if (nestegg_track_type(ctx, track) != NESTEGG_TRACK_AUDIO)
+    return -1;
+
+  params->rate = 8000;
+  ne_get_float(entry->audio.sampling_frequency, &params->rate);
+
+  value = 1;
+  ne_get_uint(entry->audio.channels, &value);
+  params->channels = value;
+
+  value = 16;
+  ne_get_uint(entry->audio.bit_depth, &value);
+  params->depth = value;
+
+  return 0;
+}
+
+int
+nestegg_read_packet(nestegg * ctx, nestegg_packet ** pkt)
+{
+  int r;
+  uint64_t id, size;
+
+  *pkt = NULL;
+
+  for (;;) {
+    r = ne_peek_element(ctx, &id, &size);
+    if (r != 1)
+      return r;
+
+    /* any suspend fields must be handled here */
+    if (ne_is_suspend_element(id)) {
+      r = ne_read_element(ctx, &id, &size);
+      if (r != 1)
+        return r;
+
+      /* the only suspend fields are blocks and simple blocks, which we
+         handle directly. */
+      r = ne_read_block(ctx, id, size, pkt);
+      return r;
+    }
+
+    r =  ne_parse(ctx, NULL);
+    if (r != 1)
+      return r;
+  }
+
+  return 1;
+}
+
+void
+nestegg_free_packet(nestegg_packet * pkt)
+{
+  struct frame * frame;
+
+  while (pkt->frame) {
+    frame = pkt->frame;
+    pkt->frame = frame->next;
+    free(frame->data);
+    free(frame);
+  }
+
+ free(pkt);
+}
+
+int
+nestegg_packet_track(nestegg_packet * pkt, unsigned int * track)
+{
+  *track = pkt->track;
+  return 0;
+}
+
+int
+nestegg_packet_tstamp(nestegg_packet * pkt, uint64_t * tstamp)
+{
+  *tstamp = pkt->timecode;
+  return 0;
+}
+
+int
+nestegg_packet_count(nestegg_packet * pkt, unsigned int * count)
+{
+  struct frame * f = pkt->frame;
+
+  *count = 0;
+
+  while (f) {
+    *count += 1;
+    f = f->next;
+  }
+
+  return 0;
+}
+
+int
+nestegg_packet_data(nestegg_packet * pkt, unsigned int item,
+                    unsigned char ** data, size_t * length)
+{
+  struct frame * f = pkt->frame;
+  unsigned int count = 0;
+
+  *data = NULL;
+  *length = 0;
+
+  while (f) {
+    if (count == item) {
+      *data = f->data;
+      *length = f->length;
+      return 0;
+    }
+    count += 1;
+    f = f->next;
+  }
+
+  return -1;
+}
diff --git a/nestegg/test/test.c b/nestegg/test/test.c

new file mode 100644 (file)

index 0000000..210b640
--- /dev/null
+++ b/nestegg/test/test.c
@@ -0,0 +1,248 @@
+/*
+ * Copyright © 2010 Mozilla Foundation
+ *
+ * This program is made available under an ISC-style license.  See the
+ * accompanying file LICENSE for details.
+ */
+#include <assert.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include "nestegg/nestegg.h"
+
+#undef DEBUG
+#define SEEK_TEST
+
+static int
+stdio_read(void * p, size_t length, void * fp)
+{
+  size_t r;
+
+  r = fread(p, length, 1, fp);
+  if (r == 0 && feof(fp))
+    return 0;
+  return r == 0 ? -1 : 1;
+}
+
+static int
+stdio_seek(int64_t offset, int whence, void * fp)
+{
+  return fseek(fp, offset, whence);
+}
+
+static int64_t
+stdio_tell(void * fp)
+{
+  return ftell(fp);
+}
+
+static void
+log_callback(nestegg * ctx, unsigned int severity, char const * fmt, ...)
+{
+  va_list ap;
+  char const * sev = NULL;
+
+#ifndef DEBUG
+  if (severity < NESTEGG_LOG_WARNING)
+    return;
+#endif
+
+  switch (severity) {
+  case NESTEGG_LOG_DEBUG:
+    sev = "debug:   ";
+    break;
+  case NESTEGG_LOG_WARNING:
+    sev = "warning: ";
+    break;
+  case NESTEGG_LOG_CRITICAL:
+    sev = "critical:";
+    break;
+  default:
+    sev = "unknown: ";
+  }
+
+  fprintf(stderr, "%p %s ", (void *) ctx, sev);
+
+  va_start(ap, fmt);
+  vfprintf(stderr, fmt, ap);
+  va_end(ap);
+
+  fprintf(stderr, "\n");
+}
+
+int
+main(int argc, char * argv[])
+{
+  FILE * fp;
+  int r, type;
+  nestegg * ctx;
+  nestegg_audio_params aparams;
+  nestegg_packet * pkt;
+  nestegg_video_params vparams;
+  size_t length, size;
+  uint64_t duration, tstamp, pkt_tstamp;
+  unsigned char * codec_data, * ptr;
+  unsigned int cnt, i, j, track, tracks, pkt_cnt, pkt_track;
+  unsigned int data_items = 0;
+  nestegg_io io = {
+    stdio_read,
+    stdio_seek,
+    stdio_tell,
+    NULL
+  };
+
+  if (argc != 2)
+    return EXIT_FAILURE;
+
+  fp = fopen(argv[1], "rb");
+  if (!fp)
+    return EXIT_FAILURE;
+
+  io.userdata = fp;
+
+  ctx = NULL;
+  r = nestegg_init(&ctx, io, log_callback);
+  if (r != 0)
+    return EXIT_FAILURE;
+
+  nestegg_track_count(ctx, &tracks);
+  nestegg_duration(ctx, &duration);
+#ifdef DEBUG
+  fprintf(stderr, "media has %u tracks and duration %fs\n", tracks, duration / 1e9);
+#endif
+
+  for (i = 0; i < tracks; ++i) {
+    type = nestegg_track_type(ctx, i);
+#ifdef DEBUG
+    fprintf(stderr, "track %u: type: %d codec: %d", i,
+            type, nestegg_track_codec_id(ctx, i));
+#endif
+    nestegg_track_codec_data_count(ctx, i, &data_items);
+    for (j = 0; j < data_items; ++j) {
+      nestegg_track_codec_data(ctx, i, j, &codec_data, &length);
+#ifdef DEBUG
+      fprintf(stderr, " (%p, %u)", codec_data, (unsigned int) length);
+#endif
+    }
+    if (type == NESTEGG_TRACK_VIDEO) {
+      nestegg_track_video_params(ctx, i, &vparams);
+#ifdef DEBUG
+      fprintf(stderr, " video: %ux%u (d: %ux%u %ux%ux%ux%u)",
+              vparams.width, vparams.height,
+              vparams.display_width, vparams.display_height,
+              vparams.crop_top, vparams.crop_left, vparams.crop_bottom, vparams.crop_right);
+#endif
+    } else if (type == NESTEGG_TRACK_AUDIO) {
+      nestegg_track_audio_params(ctx, i, &aparams);
+#ifdef DEBUG
+      fprintf(stderr, " audio: %.2fhz %u bit %u channels",
+              aparams.rate, aparams.depth, aparams.channels);
+#endif
+    }
+#ifdef DEBUG
+    fprintf(stderr, "\n");
+#endif
+  }
+
+#ifdef SEEK_TEST
+#ifdef DEBUG
+  fprintf(stderr, "seek to middle\n");
+#endif
+  r = nestegg_track_seek(ctx, 0, duration / 2);
+  if (r == 0) {
+#ifdef DEBUG
+    fprintf(stderr, "middle ");
+#endif
+    r = nestegg_read_packet(ctx, &pkt);
+    if (r == 1) {
+      nestegg_packet_track(pkt, &track);
+      nestegg_packet_count(pkt, &cnt);
+      nestegg_packet_tstamp(pkt, &tstamp);
+#ifdef DEBUG
+      fprintf(stderr, "* t %u pts %f frames %u\n", track, tstamp / 1e9, cnt);
+#endif
+      nestegg_free_packet(pkt);
+    } else {
+#ifdef DEBUG
+      fprintf(stderr, "middle seek failed\n");
+#endif
+    }
+  }
+
+#ifdef DEBUG
+  fprintf(stderr, "seek to ~end\n");
+#endif
+  r = nestegg_track_seek(ctx, 0, duration - (duration / 10));
+  if (r == 0) {
+#ifdef DEBUG
+    fprintf(stderr, "end ");
+#endif
+    r = nestegg_read_packet(ctx, &pkt);
+    if (r == 1) {
+      nestegg_packet_track(pkt, &track);
+      nestegg_packet_count(pkt, &cnt);
+      nestegg_packet_tstamp(pkt, &tstamp);
+#ifdef DEBUG
+      fprintf(stderr, "* t %u pts %f frames %u\n", track, tstamp / 1e9, cnt);
+#endif
+      nestegg_free_packet(pkt);
+    } else {
+#ifdef DEBUG
+      fprintf(stderr, "end seek failed\n");
+#endif
+    }
+  }
+
+#ifdef DEBUG
+  fprintf(stderr, "seek to ~start\n");
+#endif
+  r = nestegg_track_seek(ctx, 0, duration / 10);
+  if (r == 0) {
+#ifdef DEBUG
+    fprintf(stderr, "start ");
+#endif
+    r = nestegg_read_packet(ctx, &pkt);
+    if (r == 1) {
+      nestegg_packet_track(pkt, &track);
+      nestegg_packet_count(pkt, &cnt);
+      nestegg_packet_tstamp(pkt, &tstamp);
+#ifdef DEBUG
+      fprintf(stderr, "* t %u pts %f frames %u\n", track, tstamp / 1e9, cnt);
+#endif
+      nestegg_free_packet(pkt);
+    } else {
+#ifdef DEBUG
+      fprintf(stderr, "start seek failed\n");
+#endif
+    }
+  }
+#endif
+
+  while (nestegg_read_packet(ctx, &pkt) > 0) {
+    nestegg_packet_track(pkt, &pkt_track);
+    nestegg_packet_count(pkt, &pkt_cnt);
+    nestegg_packet_tstamp(pkt, &pkt_tstamp);
+
+#ifdef DEBUG
+    fprintf(stderr, "t %u pts %f frames %u: ", pkt_track, pkt_tstamp / 1e9, pkt_cnt);
+#endif
+
+    for (i = 0; i < pkt_cnt; ++i) {
+      nestegg_packet_data(pkt, i, &ptr, &size);
+#ifdef DEBUG
+      fprintf(stderr, "%u ", (unsigned int) size);
+#endif
+    }
+#ifdef DEBUG
+    fprintf(stderr, "\n");
+#endif
+
+    nestegg_free_packet(pkt);
+  }
+
+  nestegg_destroy(ctx);
+  fclose(fp);
+
+  return EXIT_SUCCESS;
+}
diff --git a/release.sh b/release.sh

deleted file mode 100755 (executable)

index 800bdf8..0000000
--- a/release.sh
+++ /dev/null
@@ -1,210 +0,0 @@
-#!/bin/sh
-##
-##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-##
-##  Use of this source code is governed by a BSD-style license
-##  that can be found in the LICENSE file in the root of the source
-##  tree. An additional intellectual property rights grant can be found
-##  in the file PATENTS.  All contributing project authors may
-##  be found in the AUTHORS file in the root of the source tree.
-##
-
-
-
-self=$0
-
-for opt; do
-    case $opt in
-        --clean) clean=yes;;
-        -j*) jopt=$opt;;
-        *) echo "Unsupported option $opt"; exit 1;;
-    esac
-done
-
-TAB="$(printf '\t')"
-cat > release.mk << EOF
-%\$(BUILD_SFX).tar.bz2: %/.done
-${TAB}@echo "\$(subst .tar.bz2,,\$@): tarball"
-${TAB}@cd \$(dir \$<); tar -cf - \$(subst .tar.bz2,,\$@) | bzip2 > ../\$@
-
-%\$(BUILD_SFX).zip: %/.done
-${TAB}@echo "\$(subst .zip,,\$@): zip"
-${TAB}@rm -f \$@; cd \$(dir \$<); zip -rq ../\$@ \$(subst .zip,,\$@)
-
-logs/%\$(BUILD_SFX).log.bz2: %/.done
-${TAB}@echo "\$(subst .log.bz2,,\$(notdir \$@)): tarlog"
-${TAB}@mkdir -p logs
-${TAB}@cat \$< | bzip2 > \$@
-
-%/.done:
-${TAB}@mkdir -p \$(dir \$@)
-${TAB}@echo "\$(dir \$@): configure \$(CONFIG_OPTS) \$(EXTRA_PATH)"
-${TAB}@cd \$(dir \$@); export PATH=\$\$PATH\$(EXTRA_PATH); ../\$(SRC_ROOT)/configure \$(CONFIG_OPTS) >makelog.txt 2>&1
-${TAB}@echo "\$(dir \$@): make"
-${TAB}@cd \$(dir \$@); PATH=\$\$PATH\$(EXTRA_PATH) \$(MAKE) >>makelog.txt 2>&1
-${TAB}@echo "\$(dir \$@): test install"
-${TAB}@cd \$(dir \$@); PATH=\$\$PATH\$(EXTRA_PATH) \$(MAKE) install >>makelog.txt 2>&1
-${TAB}@cd \$(dir \$@)/dist/build; PATH=\$\$PATH\$(EXTRA_PATH) \$(MAKE) >>makelog.txt 2>&1
-${TAB}@echo "\$(dir \$@): install"
-${TAB}@cd \$(dir \$@); PATH=\$\$PATH\$(EXTRA_PATH) \$(MAKE) install DIST_DIR=\$(TGT) >>makelog.txt 2>&1
-${TAB}@touch \$@
-
-#include release-deps.mk
-EOF
-
-#[ -f release-deps.mk ] || \
-#    find ${self%/*} -name .git -prune -o -type f -print0 \
-#    | xargs -0 -n1 echo \
-#    | sed -e 's; ;\\ ;g' | awk '{print "$(TGT)/.done: "$0}' > release-deps.mk
-
-build_config_list() {
-    for codec in $CODEC_LIST; do
-        for arch in $ARCH_LIST; do
-            if [ -n "$OS_LIST" ]; then
-                for os in $OS_LIST; do
-                    CONFIGS="$CONFIGS vpx-${codec}-${arch}-${os}"
-                done
-            else
-                CONFIGS="$CONFIGS vpx-${codec}-${arch}"
-            fi
-        done
-    done
-}
-
-CODEC_LIST="vp8 vp8cx vp8dx"
-case `uname` in
-    Linux*)
-        ARCH_LIST="x86 x86_64"
-        OS_LIST="linux"
-        build_config_list
-        ARCH_LIST="armv5te armv6 armv7"
-        OS_LIST="linux-gcc"
-
-        ;;
-    CYGWIN*)
-        TAR_SFX=.zip
-        for vs in vs7 vs8; do
-            for arch in x86-win32 x86_64-win64; do
-                for msvcrt in md mt; do
-                    case $vs,$arch in
-                        vs7,x86_64-win64) continue ;;
-                    esac
-                    ARCH_LIST="$ARCH_LIST ${arch}${msvcrt}-${vs}"
-                done
-            done
-        done
-        ;;
-    Darwin*)
-        ARCH_LIST="universal"
-        OS_LIST="darwin8 darwin9"
-        ;;
-    sun_os*)
-        ARCH_LIST="x86 x86_64"
-        OS_LIST="solaris"
-        ;;
-esac
-build_config_list
-
-TAR_SFX=${TAR_SFX:-.tar.bz2}
-ARM_TOOLCHAIN=/usr/local/google/csl-2009q3-67
-for cfg in $CONFIGS; do
-    full_cfg=$cfg
-    cfg=${cfg#vpx-}
-    opts=
-    rm -f makelog.txt
-
-    case $cfg in
-        src-*)  opts="$opts --enable-codec-srcs"
-                cfg=${cfg#src-}
-                ;;
-        eval-*) opts="$opts --enable-eval-limit"
-                cfg=${cfg#src-}
-                ;;
-    esac
-
-    case $cfg in
-        #
-        # Linux
-        #
-        *x86-linux)
-            opts="$opts --target=x86-linux-gcc" ;;
-        *x86_64-linux)
-            opts="$opts --target=x86_64-linux-gcc" ;;
-        *arm*-linux-gcc)
-            armv=${cfg##*armv}
-            armv=${armv%%-*}
-            opts="$opts --target=armv${armv}-linux-gcc" ;;
-        *arm*-linux-rvct)
-            armv=${cfg##*armv}
-            armv=${armv%%-*}
-            opts="$opts --target=armv${armv}-linux-rvct"
-            opts="$opts --libc=${ARM_TOOLCHAIN}/arm-none-linux-gnueabi/libc" ;;
-
-
-        #
-        # Windows
-        #
-        # need --enable-debug-libs for now until we're smarter about
-        # building the debug/release from the customer installed
-        # environment
-        *-x86-win32*-vs*)
-            opts="$opts --target=x86-win32-vs${cfg##*-vs} --enable-debug-libs";;
-        *-x86_64-win64*-vs8)
-            opts="$opts --target=x86_64-win64-vs8 --enable-debug-libs" ;;
-
-        #
-        # Darwin
-        #
-        *-universal-darwin*)
-            opts="$opts --target=universal-darwin${cfg##*-darwin}-gcc" ;;
-
-        #
-        # Solaris
-        #
-        *x86-solaris)
-            opts="$opts --target=x86-solaris-gcc" ;;
-        *x86_64-solaris)
-            opts="$opts --target=x86_64-solaris-gcc" ;;
-    esac
-
-    case $cfg in
-        *x86-linux | *x86-solaris) opts="$opts --enable-pic" ;;
-    esac
-
-    case $cfg in
-        *-win[36][24]mt*)  opts="$opts --enable-static-msvcrt" ;;
-        *-win[36][24]md*)  opts="$opts --disable-static-msvcrt" ;;
-    esac
-
-    opts="$opts --disable-codecs"
-    case $cfg in
-        vp8*) opts="$opts --enable-vp8" ;;
-    esac
-    case $cfg in
-        *cx-*) opts="${opts}-encoder" ;;
-        *dx-*) opts="${opts}-decoder" ;;
-    esac
-    opts="$opts --enable-postproc"
-
-    [ "x${clean}" = "xyes" ] \
-        && rm -rf ${full_cfg}${BUILD_SFX}${TAR_SFX} \
-        && rm -rf logs/${full_cfg}${BUILD_SFX}.log.bz2
-
-    TGT=${full_cfg}${BUILD_SFX}
-    BUILD_TARGETS="logs/${TGT}.log.bz2 ${TGT}${TAR_SFX}"
-    echo "${BUILD_TARGETS}: CONFIG_OPTS=$opts" >>release.mk
-    echo "${BUILD_TARGETS}: TGT=${TGT}" >>release.mk
-    case $cfg in
-        *-arm*-linux-*)
-            echo "${BUILD_TARGETS}: EXTRA_PATH=:${ARM_TOOLCHAIN}/bin/" >>release.mk ;;
-        *-vs7)
-            echo "${BUILD_TARGETS}: EXTRA_PATH=:/cygdrive/c/Program\ Files/Microsoft\ Visual\ Studio\ .NET\ 2003/Common7/IDE" >>release.mk ;;
-        *-vs8)
-            echo "${BUILD_TARGETS}: EXTRA_PATH=:/cygdrive/c/Program\ Files/Microsoft\ Visual\ Studio\ 8/Common7/IDE" >>release.mk ;;
-    esac
-    MAKE_TGTS="$MAKE_TGTS ${TGT}${TAR_SFX} logs/${TGT}.log.bz2"
-done
-
-
-${MAKE:-make} ${jopt:--j3} -f release.mk  \
-    SRC_ROOT=${self%/*} BUILD_SFX=${BUILD_SFX} ${MAKE_TGTS}
diff --git a/solution.mk b/solution.mk

index 8e852ec5df1e02a8ef096abc0cf7080961f6f55d..6d2c08d060c6692a4adafc70531f1d7d5f5b325a 100644 (file)
--- a/solution.mk
+++ b/solution.mk
@@ -22,7 +22,7 @@ else
  vpx.sln: $(wildcard *.vcproj)
         @echo "    [CREATE] $@"
         $(SRC_PATH_BARE)/build/make/gen_msvs_sln.sh \
-            $(if $(filter %vpx.vcproj,$^),--dep=ivfdec:vpx) \
+            $(if $(filter %vpx.vcproj,$^),--dep=vpxdec:vpx) \
              $(if $(filter %vpx.vcproj,$^),--dep=xma:vpx) \
              --ver=$(CONFIG_VS_VERSION)\
              --target=$(TOOLCHAIN)\
diff --git a/vp8/common/arm/arm_systemdependent.c b/vp8/common/arm/arm_systemdependent.c

new file mode 100644 (file)

index 0000000..fe62fae
--- /dev/null
+++ b/vp8/common/arm/arm_systemdependent.c
@@ -0,0 +1,134 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_ports/config.h"
+#include "vpx_ports/arm.h"
+#include "g_common.h"
+#include "pragmas.h"
+#include "subpixel.h"
+#include "loopfilter.h"
+#include "recon.h"
+#include "idct.h"
+#include "onyxc_int.h"
+
+extern void (*vp8_build_intra_predictors_mby_ptr)(MACROBLOCKD *x);
+extern void vp8_build_intra_predictors_mby(MACROBLOCKD *x);
+extern void vp8_build_intra_predictors_mby_neon(MACROBLOCKD *x);
+
+extern void (*vp8_build_intra_predictors_mby_s_ptr)(MACROBLOCKD *x);
+extern void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x);
+extern void vp8_build_intra_predictors_mby_s_neon(MACROBLOCKD *x);
+
+void vp8_arch_arm_common_init(VP8_COMMON *ctx)
+{
+#if CONFIG_RUNTIME_CPU_DETECT
+    VP8_COMMON_RTCD *rtcd = &ctx->rtcd;
+    int flags = arm_cpu_caps();
+    int has_edsp = flags & HAS_EDSP;
+    int has_media = flags & HAS_MEDIA;
+    int has_neon = flags & HAS_NEON;
+    rtcd->flags = flags;
+
+    /* Override default functions with fastest ones for this CPU. */
+#if HAVE_ARMV6
+    if (has_media)
+    {
+        rtcd->subpix.sixtap16x16   = vp8_sixtap_predict16x16_armv6;
+        rtcd->subpix.sixtap8x8     = vp8_sixtap_predict8x8_armv6;
+        rtcd->subpix.sixtap8x4     = vp8_sixtap_predict8x4_armv6;
+        rtcd->subpix.sixtap4x4     = vp8_sixtap_predict_armv6;
+        rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_armv6;
+        rtcd->subpix.bilinear8x8   = vp8_bilinear_predict8x8_armv6;
+        rtcd->subpix.bilinear8x4   = vp8_bilinear_predict8x4_armv6;
+        rtcd->subpix.bilinear4x4   = vp8_bilinear_predict4x4_armv6;
+
+        rtcd->idct.idct1        = vp8_short_idct4x4llm_1_v6;
+        rtcd->idct.idct16       = vp8_short_idct4x4llm_v6_dual;
+        rtcd->idct.iwalsh1      = vp8_short_inv_walsh4x4_1_v6;
+        rtcd->idct.iwalsh16     = vp8_short_inv_walsh4x4_v6;
+
+        rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_armv6;
+        rtcd->loopfilter.normal_b_v  = vp8_loop_filter_bv_armv6;
+        rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_armv6;
+        rtcd->loopfilter.normal_b_h  = vp8_loop_filter_bh_armv6;
+        rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_armv6;
+        rtcd->loopfilter.simple_b_v  = vp8_loop_filter_bvs_armv6;
+        rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_armv6;
+        rtcd->loopfilter.simple_b_h  = vp8_loop_filter_bhs_armv6;
+
+        rtcd->recon.copy16x16   = vp8_copy_mem16x16_v6;
+        rtcd->recon.copy8x8     = vp8_copy_mem8x8_v6;
+        rtcd->recon.copy8x4     = vp8_copy_mem8x4_v6;
+        rtcd->recon.recon       = vp8_recon_b_armv6;
+        rtcd->recon.recon2      = vp8_recon2b_armv6;
+        rtcd->recon.recon4      = vp8_recon4b_armv6;
+    }
+#endif
+
+#if HAVE_ARMV7
+    if (has_neon)
+    {
+        rtcd->subpix.sixtap16x16   = vp8_sixtap_predict16x16_neon;
+        rtcd->subpix.sixtap8x8     = vp8_sixtap_predict8x8_neon;
+        rtcd->subpix.sixtap8x4     = vp8_sixtap_predict8x4_neon;
+        rtcd->subpix.sixtap4x4     = vp8_sixtap_predict_neon;
+        rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_neon;
+        rtcd->subpix.bilinear8x8   = vp8_bilinear_predict8x8_neon;
+        rtcd->subpix.bilinear8x4   = vp8_bilinear_predict8x4_neon;
+        rtcd->subpix.bilinear4x4   = vp8_bilinear_predict4x4_neon;
+
+        rtcd->idct.idct1        = vp8_short_idct4x4llm_1_neon;
+        rtcd->idct.idct16       = vp8_short_idct4x4llm_neon;
+        rtcd->idct.iwalsh1      = vp8_short_inv_walsh4x4_1_neon;
+        rtcd->idct.iwalsh16     = vp8_short_inv_walsh4x4_neon;
+
+        rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_neon;
+        rtcd->loopfilter.normal_b_v  = vp8_loop_filter_bv_neon;
+        rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_neon;
+        rtcd->loopfilter.normal_b_h  = vp8_loop_filter_bh_neon;
+        rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_neon;
+        rtcd->loopfilter.simple_b_v  = vp8_loop_filter_bvs_neon;
+        rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_neon;
+        rtcd->loopfilter.simple_b_h  = vp8_loop_filter_bhs_neon;
+
+        rtcd->recon.copy16x16   = vp8_copy_mem16x16_neon;
+        rtcd->recon.copy8x8     = vp8_copy_mem8x8_neon;
+        rtcd->recon.copy8x4     = vp8_copy_mem8x4_neon;
+        rtcd->recon.recon       = vp8_recon_b_neon;
+        rtcd->recon.recon2      = vp8_recon2b_neon;
+        rtcd->recon.recon4      = vp8_recon4b_neon;
+    }
+#endif
+
+#endif
+
+#if HAVE_ARMV6
+#if CONFIG_RUNTIME_CPU_DETECT
+    if (has_media)
+#endif
+    {
+        vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby;
+        vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s;
+    }
+#endif
+
+#if HAVE_ARMV7
+#if CONFIG_RUNTIME_CPU_DETECT
+    if (has_neon)
+#endif
+    {
+        vp8_build_intra_predictors_mby_ptr =
+         vp8_build_intra_predictors_mby_neon;
+        vp8_build_intra_predictors_mby_s_ptr =
+         vp8_build_intra_predictors_mby_s_neon;
+    }
+#endif
+}
diff --git a/vp8/common/arm/idct_arm.h b/vp8/common/arm/idct_arm.h

index f28d7f649ea0c541c2b62c8f4c524db6df9b317f..8b8d179172b841f1b2be3226c1756e5201ca1b25 100644 (file)
--- a/vp8/common/arm/idct_arm.h
+++ b/vp8/common/arm/idct_arm.h
@@ -19,6 +19,7 @@ extern prototype_idct_scalar_add(vp8_dc_only_idct_add_v6);
  extern prototype_second_order(vp8_short_inv_walsh4x4_1_v6);
  extern prototype_second_order(vp8_short_inv_walsh4x4_v6);
  
+#if !CONFIG_RUNTIME_CPU_DETECT
  #undef  vp8_idct_idct1
  #define vp8_idct_idct1 vp8_short_idct4x4llm_1_v6
  
@@ -34,6 +35,7 @@ extern prototype_second_order(vp8_short_inv_walsh4x4_v6);
  #undef  vp8_idct_iwalsh16
  #define vp8_idct_iwalsh16 vp8_short_inv_walsh4x4_v6
  #endif
+#endif
  
  #if HAVE_ARMV7
  extern prototype_idct(vp8_short_idct4x4llm_1_neon);
@@ -42,6 +44,7 @@ extern prototype_idct_scalar_add(vp8_dc_only_idct_add_neon);
  extern prototype_second_order(vp8_short_inv_walsh4x4_1_neon);
  extern prototype_second_order(vp8_short_inv_walsh4x4_neon);
  
+#if !CONFIG_RUNTIME_CPU_DETECT
  #undef  vp8_idct_idct1
  #define vp8_idct_idct1 vp8_short_idct4x4llm_1_neon
  
@@ -57,5 +60,6 @@ extern prototype_second_order(vp8_short_inv_walsh4x4_neon);
  #undef  vp8_idct_iwalsh16
  #define vp8_idct_iwalsh16 vp8_short_inv_walsh4x4_neon
  #endif
+#endif
  
  #endif
diff --git a/vp8/common/arm/loopfilter_arm.h b/vp8/common/arm/loopfilter_arm.h

index 6c3628ae939e7138e9d32ca0213d4f142ea65726..cd62207d7012961cba8b9aa7109d266cc4b9d609 100644 (file)
--- a/vp8/common/arm/loopfilter_arm.h
+++ b/vp8/common/arm/loopfilter_arm.h
@@ -22,6 +22,7 @@ extern prototype_loopfilter_block(vp8_loop_filter_bvs_armv6);
  extern prototype_loopfilter_block(vp8_loop_filter_mbhs_armv6);
  extern prototype_loopfilter_block(vp8_loop_filter_bhs_armv6);
  
+#if !CONFIG_RUNTIME_CPU_DETECT
  #undef  vp8_lf_normal_mb_v
  #define vp8_lf_normal_mb_v vp8_loop_filter_mbv_armv6
  
@@ -46,6 +47,7 @@ extern prototype_loopfilter_block(vp8_loop_filter_bhs_armv6);
  #undef  vp8_lf_simple_b_h
  #define vp8_lf_simple_b_h vp8_loop_filter_bhs_armv6
  #endif
+#endif
  
  #if HAVE_ARMV7
  extern prototype_loopfilter_block(vp8_loop_filter_mbv_neon);
@@ -57,6 +59,7 @@ extern prototype_loopfilter_block(vp8_loop_filter_bvs_neon);
  extern prototype_loopfilter_block(vp8_loop_filter_mbhs_neon);
  extern prototype_loopfilter_block(vp8_loop_filter_bhs_neon);
  
+#if !CONFIG_RUNTIME_CPU_DETECT
  #undef  vp8_lf_normal_mb_v
  #define vp8_lf_normal_mb_v vp8_loop_filter_mbv_neon
  
@@ -81,5 +84,6 @@ extern prototype_loopfilter_block(vp8_loop_filter_bhs_neon);
  #undef  vp8_lf_simple_b_h
  #define vp8_lf_simple_b_h vp8_loop_filter_bhs_neon
  #endif
+#endif
  
  #endif
diff --git a/vp8/common/arm/neon/loopfilter_neon.asm b/vp8/common/arm/neon/loopfilter_neon.asm

new file mode 100644 (file)

index 0000000..bf0c357
--- /dev/null
+++ b/vp8/common/arm/neon/loopfilter_neon.asm
@@ -0,0 +1,409 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT  |vp8_loop_filter_horizontal_edge_y_neon|
+    EXPORT  |vp8_loop_filter_horizontal_edge_uv_neon|
+    EXPORT  |vp8_loop_filter_vertical_edge_y_neon|
+    EXPORT  |vp8_loop_filter_vertical_edge_uv_neon|
+    ARM
+    REQUIRE8
+    PRESERVE8
+
+    AREA ||.text||, CODE, READONLY, ALIGN=2
+
+; flimit, limit, and thresh should be positive numbers.
+; All 16 elements in these variables are equal.
+
+; void vp8_loop_filter_horizontal_edge_y_neon(unsigned char *src, int pitch,
+;                                             const signed char *flimit,
+;                                             const signed char *limit,
+;                                             const signed char *thresh,
+;                                             int count)
+; r0    unsigned char *src
+; r1    int pitch
+; r2    const signed char *flimit
+; r3    const signed char *limit
+; sp    const signed char *thresh,
+; sp+4  int count (unused)
+|vp8_loop_filter_horizontal_edge_y_neon| PROC
+    stmdb       sp!, {lr}
+    vld1.s8     {d0[], d1[]}, [r2]          ; flimit
+    vld1.s8     {d2[], d3[]}, [r3]          ; limit
+    sub         r2, r0, r1, lsl #2          ; move src pointer down by 4 lines
+    ldr         r12, [sp, #4]               ; load thresh pointer
+
+    vld1.u8     {q3}, [r2], r1              ; p3
+    vld1.u8     {q4}, [r2], r1              ; p2
+    vld1.u8     {q5}, [r2], r1              ; p1
+    vld1.u8     {q6}, [r2], r1              ; p0
+    vld1.u8     {q7}, [r2], r1              ; q0
+    vld1.u8     {q8}, [r2], r1              ; q1
+    vld1.u8     {q9}, [r2], r1              ; q2
+    vld1.u8     {q10}, [r2]                 ; q3
+    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
+    sub         r0, r0, r1, lsl #1
+
+    bl          vp8_loop_filter_neon
+
+    vst1.u8     {q5}, [r0], r1              ; store op1
+    vst1.u8     {q6}, [r0], r1              ; store op0
+    vst1.u8     {q7}, [r0], r1              ; store oq0
+    vst1.u8     {q8}, [r0], r1              ; store oq1
+
+    ldmia       sp!, {pc}
+    ENDP        ; |vp8_loop_filter_horizontal_edge_y_neon|
+
+; void vp8_loop_filter_horizontal_edge_uv_neon(unsigned char *u, int pitch
+;                                              const signed char *flimit,
+;                                              const signed char *limit,
+;                                              const signed char *thresh,
+;                                              unsigned char *v)
+; r0    unsigned char *u,
+; r1    int pitch,
+; r2    const signed char *flimit,
+; r3    const signed char *limit,
+; sp    const signed char *thresh,
+; sp+4  unsigned char *v
+|vp8_loop_filter_horizontal_edge_uv_neon| PROC
+    stmdb       sp!, {lr}
+    vld1.s8     {d0[], d1[]}, [r2]          ; flimit
+    vld1.s8     {d2[], d3[]}, [r3]          ; limit
+    ldr         r2, [sp, #8]                ; load v ptr
+
+    sub         r3, r0, r1, lsl #2          ; move u pointer down by 4 lines
+    vld1.u8     {d6}, [r3], r1              ; p3
+    vld1.u8     {d8}, [r3], r1              ; p2
+    vld1.u8     {d10}, [r3], r1             ; p1
+    vld1.u8     {d12}, [r3], r1             ; p0
+    vld1.u8     {d14}, [r3], r1             ; q0
+    vld1.u8     {d16}, [r3], r1             ; q1
+    vld1.u8     {d18}, [r3], r1             ; q2
+    vld1.u8     {d20}, [r3]                 ; q3
+
+    ldr         r3, [sp, #4]                ; load thresh pointer
+
+    sub         r12, r2, r1, lsl #2         ; move v pointer down by 4 lines
+    vld1.u8     {d7}, [r12], r1             ; p3
+    vld1.u8     {d9}, [r12], r1             ; p2
+    vld1.u8     {d11}, [r12], r1            ; p1
+    vld1.u8     {d13}, [r12], r1            ; p0
+    vld1.u8     {d15}, [r12], r1            ; q0
+    vld1.u8     {d17}, [r12], r1            ; q1
+    vld1.u8     {d19}, [r12], r1            ; q2
+    vld1.u8     {d21}, [r12]                ; q3
+
+    vld1.s8     {d4[], d5[]}, [r3]          ; thresh
+
+    bl          vp8_loop_filter_neon
+
+    sub         r0, r0, r1, lsl #1
+    sub         r2, r2, r1, lsl #1
+
+    vst1.u8     {d10}, [r0], r1             ; store u op1
+    vst1.u8     {d11}, [r2], r1             ; store v op1
+    vst1.u8     {d12}, [r0], r1             ; store u op0
+    vst1.u8     {d13}, [r2], r1             ; store v op0
+    vst1.u8     {d14}, [r0], r1             ; store u oq0
+    vst1.u8     {d15}, [r2], r1             ; store v oq0
+    vst1.u8     {d16}, [r0]                 ; store u oq1
+    vst1.u8     {d17}, [r2]                 ; store v oq1
+
+    ldmia       sp!, {pc}
+    ENDP        ; |vp8_loop_filter_horizontal_edge_uv_neon|
+
+; void vp8_loop_filter_vertical_edge_y_neon(unsigned char *src, int pitch,
+;                                           const signed char *flimit,
+;                                           const signed char *limit,
+;                                           const signed char *thresh,
+;                                           int count)
+; r0    unsigned char *src,
+; r1    int pitch,
+; r2    const signed char *flimit,
+; r3    const signed char *limit,
+; sp    const signed char *thresh,
+; sp+4  int count (unused)
+|vp8_loop_filter_vertical_edge_y_neon| PROC
+    stmdb       sp!, {lr}
+    vld1.s8     {d0[], d1[]}, [r2]          ; flimit
+    vld1.s8     {d2[], d3[]}, [r3]          ; limit
+    sub         r2, r0, #4                  ; src ptr down by 4 columns
+    sub         r0, r0, #2                  ; dst ptr
+    ldr         r12, [sp, #4]               ; load thresh pointer
+
+    vld1.u8     {d6}, [r2], r1              ; load first 8-line src data
+    vld1.u8     {d8}, [r2], r1
+    vld1.u8     {d10}, [r2], r1
+    vld1.u8     {d12}, [r2], r1
+    vld1.u8     {d14}, [r2], r1
+    vld1.u8     {d16}, [r2], r1
+    vld1.u8     {d18}, [r2], r1
+    vld1.u8     {d20}, [r2], r1
+
+    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
+
+    vld1.u8     {d7}, [r2], r1              ; load second 8-line src data
+    vld1.u8     {d9}, [r2], r1
+    vld1.u8     {d11}, [r2], r1
+    vld1.u8     {d13}, [r2], r1
+    vld1.u8     {d15}, [r2], r1
+    vld1.u8     {d17}, [r2], r1
+    vld1.u8     {d19}, [r2], r1
+    vld1.u8     {d21}, [r2]
+
+    ;transpose to 8x16 matrix
+    vtrn.32     q3, q7
+    vtrn.32     q4, q8
+    vtrn.32     q5, q9
+    vtrn.32     q6, q10
+
+    vtrn.16     q3, q5
+    vtrn.16     q4, q6
+    vtrn.16     q7, q9
+    vtrn.16     q8, q10
+
+    vtrn.8      q3, q4
+    vtrn.8      q5, q6
+    vtrn.8      q7, q8
+    vtrn.8      q9, q10
+
+    bl          vp8_loop_filter_neon
+
+    vswp        d12, d11
+    vswp        d16, d13
+    vswp        d14, d12
+    vswp        d16, d15
+
+    ;store op1, op0, oq0, oq1
+    vst4.8      {d10[0], d11[0], d12[0], d13[0]}, [r0], r1
+    vst4.8      {d10[1], d11[1], d12[1], d13[1]}, [r0], r1
+    vst4.8      {d10[2], d11[2], d12[2], d13[2]}, [r0], r1
+    vst4.8      {d10[3], d11[3], d12[3], d13[3]}, [r0], r1
+    vst4.8      {d10[4], d11[4], d12[4], d13[4]}, [r0], r1
+    vst4.8      {d10[5], d11[5], d12[5], d13[5]}, [r0], r1
+    vst4.8      {d10[6], d11[6], d12[6], d13[6]}, [r0], r1
+    vst4.8      {d10[7], d11[7], d12[7], d13[7]}, [r0], r1
+    vst4.8      {d14[0], d15[0], d16[0], d17[0]}, [r0], r1
+    vst4.8      {d14[1], d15[1], d16[1], d17[1]}, [r0], r1
+    vst4.8      {d14[2], d15[2], d16[2], d17[2]}, [r0], r1
+    vst4.8      {d14[3], d15[3], d16[3], d17[3]}, [r0], r1
+    vst4.8      {d14[4], d15[4], d16[4], d17[4]}, [r0], r1
+    vst4.8      {d14[5], d15[5], d16[5], d17[5]}, [r0], r1
+    vst4.8      {d14[6], d15[6], d16[6], d17[6]}, [r0], r1
+    vst4.8      {d14[7], d15[7], d16[7], d17[7]}, [r0]
+
+    ldmia       sp!, {pc}
+    ENDP        ; |vp8_loop_filter_vertical_edge_y_neon|
+
+; void vp8_loop_filter_vertical_edge_uv_neon(unsigned char *u, int pitch
+;                                            const signed char *flimit,
+;                                            const signed char *limit,
+;                                            const signed char *thresh,
+;                                            unsigned char *v)
+; r0    unsigned char *u,
+; r1    int pitch,
+; r2    const signed char *flimit,
+; r3    const signed char *limit,
+; sp    const signed char *thresh,
+; sp+4  unsigned char *v
+|vp8_loop_filter_vertical_edge_uv_neon| PROC
+    stmdb       sp!, {lr}
+    sub         r12, r0, #4                  ; move u pointer down by 4 columns
+    vld1.s8     {d0[], d1[]}, [r2]          ; flimit
+    vld1.s8     {d2[], d3[]}, [r3]          ; limit
+
+    ldr         r2, [sp, #8]                ; load v ptr
+
+    vld1.u8     {d6}, [r12], r1              ;load u data
+    vld1.u8     {d8}, [r12], r1
+    vld1.u8     {d10}, [r12], r1
+    vld1.u8     {d12}, [r12], r1
+    vld1.u8     {d14}, [r12], r1
+    vld1.u8     {d16}, [r12], r1
+    vld1.u8     {d18}, [r12], r1
+    vld1.u8     {d20}, [r12]
+
+    sub         r3, r2, #4                  ; move v pointer down by 4 columns
+    vld1.u8     {d7}, [r3], r1              ;load v data
+    vld1.u8     {d9}, [r3], r1
+    vld1.u8     {d11}, [r3], r1
+    vld1.u8     {d13}, [r3], r1
+    vld1.u8     {d15}, [r3], r1
+    vld1.u8     {d17}, [r3], r1
+    vld1.u8     {d19}, [r3], r1
+    vld1.u8     {d21}, [r3]
+
+    ldr         r12, [sp, #4]               ; load thresh pointer
+
+    ;transpose to 8x16 matrix
+    vtrn.32     q3, q7
+    vtrn.32     q4, q8
+    vtrn.32     q5, q9
+    vtrn.32     q6, q10
+
+    vtrn.16     q3, q5
+    vtrn.16     q4, q6
+    vtrn.16     q7, q9
+    vtrn.16     q8, q10
+
+    vtrn.8      q3, q4
+    vtrn.8      q5, q6
+    vtrn.8      q7, q8
+    vtrn.8      q9, q10
+
+    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
+
+    bl          vp8_loop_filter_neon
+
+    sub         r0, r0, #2
+    sub         r2, r2, #2
+
+    vswp        d12, d11
+    vswp        d16, d13
+    vswp        d14, d12
+    vswp        d16, d15
+
+    ;store op1, op0, oq0, oq1
+    vst4.8      {d10[0], d11[0], d12[0], d13[0]}, [r0], r1
+    vst4.8      {d14[0], d15[0], d16[0], d17[0]}, [r2], r1
+    vst4.8      {d10[1], d11[1], d12[1], d13[1]}, [r0], r1
+    vst4.8      {d14[1], d15[1], d16[1], d17[1]}, [r2], r1
+    vst4.8      {d10[2], d11[2], d12[2], d13[2]}, [r0], r1
+    vst4.8      {d14[2], d15[2], d16[2], d17[2]}, [r2], r1
+    vst4.8      {d10[3], d11[3], d12[3], d13[3]}, [r0], r1
+    vst4.8      {d14[3], d15[3], d16[3], d17[3]}, [r2], r1
+    vst4.8      {d10[4], d11[4], d12[4], d13[4]}, [r0], r1
+    vst4.8      {d14[4], d15[4], d16[4], d17[4]}, [r2], r1
+    vst4.8      {d10[5], d11[5], d12[5], d13[5]}, [r0], r1
+    vst4.8      {d14[5], d15[5], d16[5], d17[5]}, [r2], r1
+    vst4.8      {d10[6], d11[6], d12[6], d13[6]}, [r0], r1
+    vst4.8      {d14[6], d15[6], d16[6], d17[6]}, [r2], r1
+    vst4.8      {d10[7], d11[7], d12[7], d13[7]}, [r0]
+    vst4.8      {d14[7], d15[7], d16[7], d17[7]}, [r2]
+
+    ldmia       sp!, {pc}
+    ENDP        ; |vp8_loop_filter_vertical_edge_uv_neon|
+
+; void vp8_loop_filter_neon();
+; This is a helper function for the loopfilters. The invidual functions do the
+; necessary load, transpose (if necessary) and store.
+
+; r0-r3 PRESERVE
+; q0    flimit
+; q1    limit
+; q2    thresh
+; q3    p3
+; q4    p2
+; q5    p1
+; q6    p0
+; q7    q0
+; q8    q1
+; q9    q2
+; q10   q3
+|vp8_loop_filter_neon| PROC
+    ldr         r12, _lf_coeff_
+
+    ; vp8_filter_mask
+    vabd.u8     q11, q3, q4                 ; abs(p3 - p2)
+    vabd.u8     q12, q4, q5                 ; abs(p2 - p1)
+    vabd.u8     q13, q5, q6                 ; abs(p1 - p0)
+    vabd.u8     q14, q8, q7                 ; abs(q1 - q0)
+    vabd.u8     q3, q9, q8                  ; abs(q2 - q1)
+    vabd.u8     q4, q10, q9                 ; abs(q3 - q2)
+    vabd.u8     q9, q6, q7                  ; abs(p0 - q0)
+
+    vmax.u8     q11, q11, q12
+    vmax.u8     q12, q13, q14
+    vmax.u8     q3, q3, q4
+    vmax.u8     q15, q11, q12
+
+    ; vp8_hevmask
+    vcgt.u8     q13, q13, q2                ; (abs(p1 - p0) > thresh)*-1
+    vcgt.u8     q14, q14, q2                ; (abs(q1 - q0) > thresh)*-1
+    vmax.u8     q15, q15, q3
+
+    vadd.u8     q0, q0, q0                  ; flimit * 2
+    vadd.u8     q0, q0, q1                  ; flimit * 2 + limit
+    vcge.u8     q15, q1, q15
+
+    vabd.u8     q2, q5, q8                  ; a = abs(p1 - q1)
+    vqadd.u8    q9, q9, q9                  ; b = abs(p0 - q0) * 2
+    vshr.u8     q2, q2, #1                  ; a = a / 2
+    vqadd.u8    q9, q9, q2                  ; a = b + a
+    vcge.u8     q9, q0, q9                  ; (a > flimit * 2 + limit) * -1
+
+    vld1.u8     {q0}, [r12]!
+
+    ; vp8_filter() function
+    ; convert to signed
+    veor        q7, q7, q0                  ; qs0
+    veor        q6, q6, q0                  ; ps0
+    veor        q5, q5, q0                  ; ps1
+    veor        q8, q8, q0                  ; qs1
+
+    vld1.u8     {q10}, [r12]!
+
+    vsubl.s8    q2, d14, d12                ; ( qs0 - ps0)
+    vsubl.s8    q11, d15, d13
+
+    vmovl.u8    q4, d20
+
+    vqsub.s8    q1, q5, q8                  ; vp8_filter = clamp(ps1-qs1)
+    vorr        q14, q13, q14               ; vp8_hevmask
+
+    vmul.i16    q2, q2, q4                  ; 3 * ( qs0 - ps0)
+    vmul.i16    q11, q11, q4
+
+    vand        q1, q1, q14                 ; vp8_filter &= hev
+    vand        q15, q15, q9                ; vp8_filter_mask
+
+    vaddw.s8    q2, q2, d2
+    vaddw.s8    q11, q11, d3
+
+    vld1.u8     {q9}, [r12]!
+
+    ; vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0))
+    vqmovn.s16  d2, q2
+    vqmovn.s16  d3, q11
+    vand        q1, q1, q15                 ; vp8_filter &= mask
+
+    vqadd.s8    q2, q1, q10                 ; Filter2 = clamp(vp8_filter+3)
+    vqadd.s8    q1, q1, q9                  ; Filter1 = clamp(vp8_filter+4)
+    vshr.s8     q2, q2, #3                  ; Filter2 >>= 3
+    vshr.s8     q1, q1, #3                  ; Filter1 >>= 3
+
+    vqadd.s8    q11, q6, q2                 ; u = clamp(ps0 + Filter2)
+    vqsub.s8    q10, q7, q1                 ; u = clamp(qs0 - Filter1)
+
+    ; outer tap adjustments: ++vp8_filter >> 1
+    vrshr.s8    q1, q1, #1
+    vbic        q1, q1, q14                 ; vp8_filter &= ~hev
+
+    vqadd.s8    q13, q5, q1                 ; u = clamp(ps1 + vp8_filter)
+    vqsub.s8    q12, q8, q1                 ; u = clamp(qs1 - vp8_filter)
+
+    veor        q5, q13, q0                 ; *op1 = u^0x80
+    veor        q6, q11, q0                 ; *op0 = u^0x80
+    veor        q7, q10, q0                 ; *oq0 = u^0x80
+    veor        q8, q12, q0                 ; *oq1 = u^0x80
+
+    bx          lr
+    ENDP        ; |vp8_loop_filter_horizontal_edge_y_neon|
+
+    AREA    loopfilter_dat, DATA, READONLY
+_lf_coeff_
+    DCD     lf_coeff
+lf_coeff
+    DCD     0x80808080, 0x80808080, 0x80808080, 0x80808080
+    DCD     0x03030303, 0x03030303, 0x03030303, 0x03030303
+    DCD     0x04040404, 0x04040404, 0x04040404, 0x04040404
+    DCD     0x01010101, 0x01010101, 0x01010101, 0x01010101
+
+    END
diff --git a/vp8/common/arm/neon/loopfilterhorizontaledge_uv_neon.asm b/vp8/common/arm/neon/loopfilterhorizontaledge_uv_neon.asm

deleted file mode 100644 (file)

index 23ace0f..0000000
--- a/vp8/common/arm/neon/loopfilterhorizontaledge_uv_neon.asm
+++ /dev/null
@@ -1,178 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_loop_filter_horizontal_edge_uv_neon|
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-;Note: flimit, limit, and thresh shpuld be positive numbers. All 16 elements in flimit
-;are equal. So, in the code, only one load is needed
-;for flimit. Same way applies to limit and thresh.
-; r0    unsigned char *u,
-; r1    int p, //pitch
-; r2    const signed char *flimit,
-; r3    const signed char *limit,
-; stack(r4) const signed char *thresh,
-; stack(r5) unsigned char *v
-
-|vp8_loop_filter_horizontal_edge_uv_neon| PROC
-    sub         r0, r0, r1, lsl #2          ; move u pointer down by 4 lines
-    vld1.s8     {d0[], d1[]}, [r2]          ; flimit
-
-    ldr         r2, [sp, #4]                ; load v ptr
-    ldr         r12, [sp, #0]               ; load thresh pointer
-
-    sub         r2, r2, r1, lsl #2          ; move v pointer down by 4 lines
-
-    vld1.u8     {d6}, [r0], r1              ; p3
-    vld1.u8     {d7}, [r2], r1              ; p3
-    vld1.u8     {d8}, [r0], r1              ; p2
-    vld1.u8     {d9}, [r2], r1              ; p2
-    vld1.u8     {d10}, [r0], r1             ; p1
-    vld1.u8     {d11}, [r2], r1             ; p1
-    vld1.u8     {d12}, [r0], r1             ; p0
-    vld1.u8     {d13}, [r2], r1             ; p0
-    vld1.u8     {d14}, [r0], r1             ; q0
-    vld1.u8     {d15}, [r2], r1             ; q0
-    vld1.u8     {d16}, [r0], r1             ; q1
-    vld1.u8     {d17}, [r2], r1             ; q1
-    vld1.u8     {d18}, [r0], r1             ; q2
-    vld1.u8     {d19}, [r2], r1             ; q2
-    vld1.u8     {d20}, [r0], r1             ; q3
-    vld1.u8     {d21}, [r2], r1             ; q3
-
-    vld1.s8     {d2[], d3[]}, [r3]          ; limit
-    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
-
-    ldr         r12, _lfhuv_coeff_
-    ; vp8_filter_mask
-    vabd.u8     q11, q3, q4                 ; abs(p3 - p2)
-    vabd.u8     q12, q4, q5                 ; abs(p2 - p1)
-    vabd.u8     q13, q5, q6                 ; abs(p1 - p0)
-    vabd.u8     q14, q8, q7                 ; abs(q1 - q0)
-    vabd.u8     q3, q9, q8                  ; abs(q2 - q1)
-    vabd.u8     q4, q10, q9                 ; abs(q3 - q2)
-    vabd.u8     q9, q6, q7                  ; abs(p0 - q0)
-
-    vmax.u8     q11, q11, q12
-    vmax.u8     q12, q13, q14
-    vmax.u8     q3, q3, q4
-    vmax.u8     q15, q11, q12
-
-    ; vp8_hevmask
-    vcgt.u8     q13, q13, q2                ; (abs(p1 - p0) > thresh)*-1
-    vcgt.u8     q14, q14, q2                ; (abs(q1 - q0) > thresh)*-1
-    vmax.u8     q15, q15, q3
-
-    vadd.u8     q0, q0, q0                  ; flimit * 2
-    vadd.u8     q0, q0, q1                  ; flimit * 2 + limit
-    vcge.u8     q15, q1, q15                ; (max  > limit) * -1
-
-    vabd.u8     q2, q5, q8                  ; abs(p1 - q1)
-    vqadd.u8    q9, q9, q9                  ; abs(p0 - q0) * 2
-    vshr.u8     q2, q2, #1                  ; abs(p1 - q1) / 2
-    vqadd.u8    q9, q9, q2                  ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
-    vcge.u8     q9, q0, q9                  ; (abs(p0 - q0)*2 + abs(p1-q1)/2 > flimit*2 + limit)*-1
-
-    vld1.u8     {q0}, [r12]!
-
-    ;vp8_filter() function
-    veor        q7, q7, q0                  ; qs0: q0 offset to convert to a signed value
-    veor        q6, q6, q0                  ; ps0: p0 offset to convert to a signed value
-    veor        q5, q5, q0                  ; ps1: p1 offset to convert to a signed value
-    veor        q8, q8, q0                  ; qs1: q1 offset to convert to a signed value
-;;;;;;;;;;;;;;
-    vld1.u8     {q10}, [r12]!
-
-    vsubl.s8    q2, d14, d12                ; ( qs0 - ps0)
-    vsubl.s8    q11, d15, d13
-
-    vmovl.u8    q4, d20
-
-    vqsub.s8    q1, q5, q8                  ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
-    vorr        q14, q13, q14               ; q14: vp8_hevmask
-
-    vmul.i16    q2, q2, q4                  ; 3 * ( qs0 - ps0)
-    vmul.i16    q11, q11, q4
-
-    vand        q1, q1, q14                 ; vp8_filter &= hev
-    vand        q15, q15, q9                ; vp8_filter_mask
-
-    vaddw.s8    q2, q2, d2
-    vaddw.s8    q11, q11, d3
-
-    vld1.u8     {q9}, [r12]!
-    ;
-    vqmovn.s16  d2, q2                      ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
-    vqmovn.s16  d3, q11
-    ;;
-
-    vand        q1, q1, q15                 ; vp8_filter &= mask
-    vqadd.s8    q2, q1, q10                 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3)
-    vqadd.s8    q1, q1, q9                  ; Filter1 = vp8_signed_char_clamp(vp8_filter+4)
-    vshr.s8     q2, q2, #3                  ; Filter2 >>= 3
-    vshr.s8     q1, q1, #3                  ; Filter1 >>= 3
-
-    ;calculate output
-    vqadd.s8    q11, q6, q2             ; u = vp8_signed_char_clamp(ps0 + Filter2)
-    vqsub.s8    q10, q7, q1                 ; u = vp8_signed_char_clamp(qs0 - Filter1)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-    vrshr.s8    q1, q1, #1                  ;round/shift:  vp8_filter += 1; vp8_filter >>= 1
-
-    sub         r0, r0, r1, lsl #2
-    sub         r0, r0, r1, lsl #1
-    ;
-
-    vbic        q1, q1, q14                 ; vp8_filter &= ~hev
-
-    sub         r2, r2, r1, lsl #2
-    sub         r2, r2, r1, lsl #1
-    ;;
-
-    vqadd.s8    q13, q5, q1                 ; u = vp8_signed_char_clamp(ps1 + vp8_filter)
-    vqsub.s8    q12, q8, q1                 ; u = vp8_signed_char_clamp(qs1 - vp8_filter)
-    ;
-
-    veor        q5, q13, q0                 ; *op1 = u^0x80
-    veor        q6, q11, q0                 ; *op0 = u^0x80
-    veor        q7, q10, q0                 ; *oq0 = u^0x80
-    veor        q8, q12, q0                 ; *oq1 = u^0x80
-    ;
-
-    vst1.u8     {d10}, [r0], r1             ; store u op1
-    vst1.u8     {d11}, [r2], r1             ; store v op1
-    vst1.u8     {d12}, [r0], r1             ; store u op0
-    vst1.u8     {d13}, [r2], r1             ; store v op0
-    vst1.u8     {d14}, [r0], r1             ; store u oq0
-    vst1.u8     {d15}, [r2], r1             ; store v oq0
-    vst1.u8     {d16}, [r0], r1             ; store u oq1
-    vst1.u8     {d17}, [r2], r1             ; store v oq1
-
-    bx          lr
-    ENDP        ; |vp8_loop_filter_horizontal_edge_uv_neon|
-
-;-----------------
-    AREA    hloopfilteruv_dat, DATA, READWRITE          ;read/write by default
-;Data section with name data_area is specified. DCD reserves space in memory for 16 data.
-;One word each is reserved. Label filter_coeff can be used to access the data.
-;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
-_lfhuv_coeff_
-    DCD     lfhuv_coeff
-lfhuv_coeff
-    DCD     0x80808080, 0x80808080, 0x80808080, 0x80808080
-    DCD     0x03030303, 0x03030303, 0x03030303, 0x03030303
-    DCD     0x04040404, 0x04040404, 0x04040404, 0x04040404
-    DCD     0x01010101, 0x01010101, 0x01010101, 0x01010101
-
-    END
diff --git a/vp8/common/arm/neon/loopfilterhorizontaledge_y_neon.asm b/vp8/common/arm/neon/loopfilterhorizontaledge_y_neon.asm

deleted file mode 100644 (file)

index e1896e4..0000000
--- a/vp8/common/arm/neon/loopfilterhorizontaledge_y_neon.asm
+++ /dev/null
@@ -1,161 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_loop_filter_horizontal_edge_y_neon|
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-;Note: flimit, limit, and thresh shpuld be positive numbers. All 16 elements in flimit
-;are equal. So, in the code, only one load is needed
-;for flimit. Same way applies to limit and thresh.
-; r0    unsigned char *s,
-; r1    int p, //pitch
-; r2    const signed char *flimit,
-; r3    const signed char *limit,
-; stack(r4) const signed char *thresh,
-; //stack(r5)   int count --unused
-
-|vp8_loop_filter_horizontal_edge_y_neon| PROC
-    sub         r0, r0, r1, lsl #2          ; move src pointer down by 4 lines
-    ldr         r12, [sp, #0]               ; load thresh pointer
-
-    vld1.u8     {q3}, [r0], r1              ; p3
-    vld1.s8     {d0[], d1[]}, [r2]          ; flimit
-    vld1.u8     {q4}, [r0], r1              ; p2
-    vld1.s8     {d2[], d3[]}, [r3]          ; limit
-    vld1.u8     {q5}, [r0], r1              ; p1
-    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
-    vld1.u8     {q6}, [r0], r1              ; p0
-    ldr         r12, _lfhy_coeff_
-    vld1.u8     {q7}, [r0], r1              ; q0
-
-    ; vp8_filter_mask
-    vabd.u8     q11, q3, q4                 ; abs(p3 - p2)
-    vld1.u8     {q8}, [r0], r1              ; q1
-    vabd.u8     q12, q4, q5                 ; abs(p2 - p1)
-    vld1.u8     {q9}, [r0], r1              ; q2
-    vabd.u8     q13, q5, q6                 ; abs(p1 - p0)
-    vld1.u8     {q10}, [r0], r1             ; q3
-    vabd.u8     q14, q8, q7                 ; abs(q1 - q0)
-    vabd.u8     q3, q9, q8                  ; abs(q2 - q1)
-    vabd.u8     q4, q10, q9                 ; abs(q3 - q2)
-    vabd.u8     q9, q6, q7                  ; abs(p0 - q0)
-
-    vmax.u8     q11, q11, q12
-    vmax.u8     q12, q13, q14
-    vmax.u8     q3, q3, q4
-    vmax.u8     q15, q11, q12
-
-    ; vp8_hevmask
-    vcgt.u8     q13, q13, q2                ; (abs(p1 - p0) > thresh)*-1
-    vcgt.u8     q14, q14, q2                ; (abs(q1 - q0) > thresh)*-1
-    vmax.u8     q15, q15, q3
-
-    vadd.u8     q0, q0, q0                  ; flimit * 2
-    vadd.u8     q0, q0, q1                  ; flimit * 2 + limit
-    vcge.u8     q15, q1, q15
-
-    vabd.u8     q2, q5, q8                  ; abs(p1 - q1)
-    vqadd.u8    q9, q9, q9                  ; abs(p0 - q0) * 2
-    vshr.u8     q2, q2, #1                  ; abs(p1 - q1) / 2
-    vqadd.u8    q9, q9, q2                  ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
-    vcge.u8     q9, q0, q9                  ; (abs(p0 - q0)*2 + abs(p1-q1)/2 > flimit*2 + limit)*-1
-
-    vld1.u8     {q0}, [r12]!
-
-    ;vp8_filter() function
-    veor        q7, q7, q0                  ; qs0: q0 offset to convert to a signed value
-    veor        q6, q6, q0                  ; ps0: p0 offset to convert to a signed value
-    veor        q5, q5, q0                  ; ps1: p1 offset to convert to a signed value
-    veor        q8, q8, q0                  ; qs1: q1 offset to convert to a signed value
-;;;;;;;;;;;;;;
-    vld1.u8     {q10}, [r12]!
-
-    vsubl.s8    q2, d14, d12                ; ( qs0 - ps0)
-    vsubl.s8    q11, d15, d13
-
-    vmovl.u8    q4, d20
-
-    vqsub.s8    q1, q5, q8                  ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
-    vorr        q14, q13, q14               ; q14: vp8_hevmask
-
-    vmul.i16    q2, q2, q4                  ; 3 * ( qs0 - ps0)
-    vmul.i16    q11, q11, q4
-
-    vand        q1, q1, q14                 ; vp8_filter &= hev
-    vand        q15, q15, q9                ; vp8_filter_mask
-
-    vaddw.s8    q2, q2, d2
-    vaddw.s8    q11, q11, d3
-
-    vld1.u8     {q9}, [r12]!
-    ;
-    vqmovn.s16  d2, q2                      ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
-    vqmovn.s16  d3, q11
-    ;;
-
-    vand        q1, q1, q15                 ; vp8_filter &= mask
-    vqadd.s8    q2, q1, q10                 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3)
-    vqadd.s8    q1, q1, q9                  ; Filter1 = vp8_signed_char_clamp(vp8_filter+4)
-    vshr.s8     q2, q2, #3                  ; Filter2 >>= 3
-    vshr.s8     q1, q1, #3                  ; Filter1 >>= 3
-
-    ;calculate output
-    vqadd.s8    q11, q6, q2                 ; u = vp8_signed_char_clamp(ps0 + Filter2)
-    vqsub.s8    q10, q7, q1                 ; u = vp8_signed_char_clamp(qs0 - Filter1)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-    vrshr.s8    q1, q1, #1                  ;round/shift:  vp8_filter += 1; vp8_filter >>= 1
-
-    sub         r0, r0, r1, lsl #2
-    sub         r0, r0, r1, lsl #1
-    ;
-
-    vbic        q1, q1, q14                 ; vp8_filter &= ~hev
-    ;
-    add         r2, r1, r0
-
-    vqadd.s8    q13, q5, q1                 ; u = vp8_signed_char_clamp(ps1 + vp8_filter)
-    vqsub.s8    q12, q8, q1                 ; u = vp8_signed_char_clamp(qs1 - vp8_filter)
-
-    add         r3, r2, r1
-
-    veor        q5, q13, q0                 ; *op1 = u^0x80
-    veor        q6, q11, q0                 ; *op0 = u^0x80
-    veor        q7, q10, q0                 ; *oq0 = u^0x80
-    veor        q8, q12, q0                 ; *oq1 = u^0x80
-
-    add         r12, r3, r1
-
-    vst1.u8     {q5}, [r0]                  ; store op1
-    vst1.u8     {q6}, [r2]                  ; store op0
-    vst1.u8     {q7}, [r3]                  ; store oq0
-    vst1.u8     {q8}, [r12]                 ; store oq1
-
-    bx          lr
-    ENDP        ; |vp8_loop_filter_horizontal_edge_y_neon|
-
-;-----------------
-    AREA    hloopfiltery_dat, DATA, READWRITE           ;read/write by default
-;Data section with name data_area is specified. DCD reserves space in memory for 16 data.
-;One word each is reserved. Label filter_coeff can be used to access the data.
-;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
-_lfhy_coeff_
-    DCD     lfhy_coeff
-lfhy_coeff
-    DCD     0x80808080, 0x80808080, 0x80808080, 0x80808080
-    DCD     0x03030303, 0x03030303, 0x03030303, 0x03030303
-    DCD     0x04040404, 0x04040404, 0x04040404, 0x04040404
-    DCD     0x01010101, 0x01010101, 0x01010101, 0x01010101
-
-    END
diff --git a/vp8/common/arm/neon/loopfilterverticaledge_uv_neon.asm b/vp8/common/arm/neon/loopfilterverticaledge_uv_neon.asm

deleted file mode 100644 (file)

index a9c2d12..0000000
--- a/vp8/common/arm/neon/loopfilterverticaledge_uv_neon.asm
+++ /dev/null
@@ -1,203 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_loop_filter_vertical_edge_uv_neon|
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-;Note: flimit, limit, and thresh shpuld be positive numbers. All 16 elements in flimit
-;are equal. So, in the code, only one load is needed
-;for flimit. Same way applies to limit and thresh.
-; r0    unsigned char *u,
-; r1    int p, //pitch
-; r2    const signed char *flimit,
-; r3    const signed char *limit,
-; stack(r4) const signed char *thresh,
-; stack(r5) unsigned char *v
-
-|vp8_loop_filter_vertical_edge_uv_neon| PROC
-    sub         r0, r0, #4          ; move u pointer down by 4 columns
-    vld1.s8     {d0[], d1[]}, [r2]          ; flimit
-
-    ldr         r2, [sp, #4]                ; load v ptr
-    ldr         r12, [sp, #0]               ; load thresh pointer
-
-    sub         r2, r2, #4          ; move v pointer down by 4 columns
-
-    vld1.u8     {d6}, [r0], r1              ;load u data
-    vld1.u8     {d7}, [r2], r1              ;load v data
-    vld1.u8     {d8}, [r0], r1
-    vld1.u8     {d9}, [r2], r1
-    vld1.u8     {d10}, [r0], r1
-    vld1.u8     {d11}, [r2], r1
-    vld1.u8     {d12}, [r0], r1
-    vld1.u8     {d13}, [r2], r1
-    vld1.u8     {d14}, [r0], r1
-    vld1.u8     {d15}, [r2], r1
-    vld1.u8     {d16}, [r0], r1
-    vld1.u8     {d17}, [r2], r1
-    vld1.u8     {d18}, [r0], r1
-    vld1.u8     {d19}, [r2], r1
-    vld1.u8     {d20}, [r0], r1
-    vld1.u8     {d21}, [r2], r1
-
-    ;transpose to 8x16 matrix
-    vtrn.32     q3, q7
-    vtrn.32     q4, q8
-    vtrn.32     q5, q9
-    vtrn.32     q6, q10
-
-    vtrn.16     q3, q5
-    vtrn.16     q4, q6
-    vtrn.16     q7, q9
-    vtrn.16     q8, q10
-
-    vtrn.8      q3, q4
-    vtrn.8      q5, q6
-    vtrn.8      q7, q8
-    vtrn.8      q9, q10
-
-    vld1.s8     {d2[], d3[]}, [r3]          ; limit
-    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
-
-    ldr         r12, _vlfuv_coeff_
-    ; vp8_filter_mask
-    vabd.u8     q11, q3, q4                 ; abs(p3 - p2)
-    vabd.u8     q12, q4, q5                 ; abs(p2 - p1)
-    vabd.u8     q13, q5, q6                 ; abs(p1 - p0)
-    vabd.u8     q14, q8, q7                 ; abs(q1 - q0)
-    vabd.u8     q3, q9, q8                  ; abs(q2 - q1)
-    vabd.u8     q4, q10, q9                 ; abs(q3 - q2)
-    vabd.u8     q9, q6, q7                  ; abs(p0 - q0)
-
-    vmax.u8     q11, q11, q12
-    vmax.u8     q12, q13, q14
-    vmax.u8     q3, q3, q4
-    vmax.u8     q15, q11, q12
-
-    ; vp8_hevmask
-    vcgt.u8     q13, q13, q2                ; (abs(p1 - p0) > thresh)*-1
-    vcgt.u8     q14, q14, q2                ; (abs(q1 - q0) > thresh)*-1
-    vmax.u8     q15, q15, q3
-
-    vadd.u8     q0, q0, q0                  ; flimit * 2
-    vadd.u8     q0, q0, q1                  ; flimit * 2 + limit
-    vcge.u8     q15, q1, q15
-
-    vabd.u8     q2, q5, q8                  ; abs(p1 - q1)
-    vqadd.u8    q9, q9, q9                  ; abs(p0 - q0) * 2
-    vshr.u8     q2, q2, #1                  ; abs(p1 - q1) / 2
-    vqadd.u8    q9, q9, q2                  ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
-    vcge.u8     q9, q0, q9                  ; (abs(p0 - q0)*2 + abs(p1-q1)/2 > flimit*2 + limit)*-1
-
-    vld1.u8     {q0}, [r12]!
-
-    ;vp8_filter() function
-    veor        q7, q7, q0                  ; qs0: q0 offset to convert to a signed value
-    veor        q6, q6, q0                  ; ps0: p0 offset to convert to a signed value
-    veor        q5, q5, q0                  ; ps1: p1 offset to convert to a signed value
-    veor        q8, q8, q0                  ; qs1: q1 offset to convert to a signed value
-;;;;;;;;;;;;;;
-    vld1.u8     {q10}, [r12]!
-
-    vsubl.s8    q2, d14, d12                ; ( qs0 - ps0)
-    vsubl.s8    q11, d15, d13
-
-    vmovl.u8    q4, d20
-
-    vqsub.s8    q1, q5, q8                  ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
-    vorr        q14, q13, q14               ; q14: vp8_hevmask
-
-    vmul.i16    q2, q2, q4                  ; 3 * ( qs0 - ps0)
-    vmul.i16    q11, q11, q4
-
-    vand        q1, q1, q14                 ; vp8_filter &= hev
-    vand        q15, q15, q9                ; vp8_filter_mask
-
-    vaddw.s8    q2, q2, d2
-    vaddw.s8    q11, q11, d3
-
-    vld1.u8     {q9}, [r12]!
-    ;
-    vqmovn.s16  d2, q2                      ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
-    vqmovn.s16  d3, q11
-    ;;
-
-    vand        q1, q1, q15                 ; vp8_filter &= mask
-    vqadd.s8    q2, q1, q10                 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3)
-    vqadd.s8    q1, q1, q9                  ; Filter1 = vp8_signed_char_clamp(vp8_filter+4)
-    vshr.s8     q2, q2, #3                  ; Filter2 >>= 3
-    vshr.s8     q1, q1, #3                  ; Filter1 >>= 3
-    ;calculate output
-    vqadd.s8    q11, q6, q2             ; u = vp8_signed_char_clamp(ps0 + Filter2)
-    vqsub.s8    q10, q7, q1                 ; u = vp8_signed_char_clamp(qs0 - Filter1)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-    vrshr.s8    q1, q1, #1                  ;round/shift:  vp8_filter += 1; vp8_filter >>= 1
-
-    sub         r0, r0, r1, lsl #3
-    add         r0, r0, #2
-
-    vbic        q1, q1, q14                 ; vp8_filter &= ~hev
-
-    sub         r2, r2, r1, lsl #3
-    add         r2, r2, #2
-
-    vqadd.s8    q13, q5, q1                 ; u = vp8_signed_char_clamp(ps1 + vp8_filter)
-    vqsub.s8    q12, q8, q1                 ; u = vp8_signed_char_clamp(qs1 - vp8_filter)
-
-    veor        q7, q10, q0                 ; *oq0 = u^0x80
-    veor        q5, q13, q0                 ; *op1 = u^0x80
-    veor        q6, q11, q0                 ; *op0 = u^0x80
-    veor        q8, q12, q0                 ; *oq1 = u^0x80
-
-    vswp        d12, d11
-    vswp        d16, d13
-    vswp        d14, d12
-    vswp        d16, d15
-
-    ;store op1, op0, oq0, oq1
-    vst4.8      {d10[0], d11[0], d12[0], d13[0]}, [r0], r1
-    vst4.8      {d14[0], d15[0], d16[0], d17[0]}, [r2], r1
-    vst4.8      {d10[1], d11[1], d12[1], d13[1]}, [r0], r1
-    vst4.8      {d14[1], d15[1], d16[1], d17[1]}, [r2], r1
-    vst4.8      {d10[2], d11[2], d12[2], d13[2]}, [r0], r1
-    vst4.8      {d14[2], d15[2], d16[2], d17[2]}, [r2], r1
-    vst4.8      {d10[3], d11[3], d12[3], d13[3]}, [r0], r1
-    vst4.8      {d14[3], d15[3], d16[3], d17[3]}, [r2], r1
-    vst4.8      {d10[4], d11[4], d12[4], d13[4]}, [r0], r1
-    vst4.8      {d14[4], d15[4], d16[4], d17[4]}, [r2], r1
-    vst4.8      {d10[5], d11[5], d12[5], d13[5]}, [r0], r1
-    vst4.8      {d14[5], d15[5], d16[5], d17[5]}, [r2], r1
-    vst4.8      {d10[6], d11[6], d12[6], d13[6]}, [r0], r1
-    vst4.8      {d14[6], d15[6], d16[6], d17[6]}, [r2], r1
-    vst4.8      {d10[7], d11[7], d12[7], d13[7]}, [r0], r1
-    vst4.8      {d14[7], d15[7], d16[7], d17[7]}, [r2], r1
-
-    bx          lr
-    ENDP        ; |vp8_loop_filter_vertical_edge_uv_neon|
-
-;-----------------
-    AREA    vloopfilteruv_dat, DATA, READWRITE          ;read/write by default
-;Data section with name data_area is specified. DCD reserves space in memory for 16 data.
-;One word each is reserved. Label filter_coeff can be used to access the data.
-;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
-_vlfuv_coeff_
-    DCD     vlfuv_coeff
-vlfuv_coeff
-    DCD     0x80808080, 0x80808080, 0x80808080, 0x80808080
-    DCD     0x03030303, 0x03030303, 0x03030303, 0x03030303
-    DCD     0x04040404, 0x04040404, 0x04040404, 0x04040404
-    DCD     0x01010101, 0x01010101, 0x01010101, 0x01010101
-
-    END
diff --git a/vp8/common/arm/neon/loopfilterverticaledge_y_neon.asm b/vp8/common/arm/neon/loopfilterverticaledge_y_neon.asm

deleted file mode 100644 (file)

index 64a49bb..0000000
--- a/vp8/common/arm/neon/loopfilterverticaledge_y_neon.asm
+++ /dev/null
@@ -1,207 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_loop_filter_vertical_edge_y_neon|
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-;Note: flimit, limit, and thresh shpuld be positive numbers. All 16 elements in flimit
-;are equal. So, in the code, only one load is needed
-;for flimit. Same way applies to limit and thresh.
-; r0    unsigned char *s,
-; r1    int p, //pitch
-; r2    const signed char *flimit,
-; r3    const signed char *limit,
-; stack(r4) const signed char *thresh,
-; //stack(r5)   int count --unused
-
-|vp8_loop_filter_vertical_edge_y_neon| PROC
-    sub         r0, r0, #4                  ; move src pointer down by 4 columns
-    ldr         r12, [sp, #0]               ; load thresh pointer
-
-    vld1.u8     {d6}, [r0], r1              ; load first 8-line src data
-    vld1.s8     {d0[], d1[]}, [r2]          ; flimit
-    vld1.u8     {d8}, [r0], r1
-    vld1.s8     {d2[], d3[]}, [r3]          ; limit
-    vld1.u8     {d10}, [r0], r1
-    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
-    vld1.u8     {d12}, [r0], r1
-    ldr         r12, _vlfy_coeff_
-    vld1.u8     {d14}, [r0], r1
-    vld1.u8     {d16}, [r0], r1
-    vld1.u8     {d18}, [r0], r1
-    vld1.u8     {d20}, [r0], r1
-
-    vld1.u8     {d7}, [r0], r1              ; load second 8-line src data
-    vld1.u8     {d9}, [r0], r1
-    vld1.u8     {d11}, [r0], r1
-    vld1.u8     {d13}, [r0], r1
-    vld1.u8     {d15}, [r0], r1
-    vld1.u8     {d17}, [r0], r1
-    vld1.u8     {d19}, [r0], r1
-    vld1.u8     {d21}, [r0], r1
-
-    ;transpose to 8x16 matrix
-    vtrn.32     q3, q7
-    vtrn.32     q4, q8
-    vtrn.32     q5, q9
-    vtrn.32     q6, q10
-
-    vtrn.16     q3, q5
-    vtrn.16     q4, q6
-    vtrn.16     q7, q9
-    vtrn.16     q8, q10
-
-    vtrn.8      q3, q4
-    vtrn.8      q5, q6
-    vtrn.8      q7, q8
-    vtrn.8      q9, q10
-
-    ; vp8_filter_mask
-    vabd.u8     q11, q3, q4                 ; abs(p3 - p2)
-    vabd.u8     q12, q4, q5                 ; abs(p2 - p1)
-    vabd.u8     q13, q5, q6                 ; abs(p1 - p0)
-    vabd.u8     q14, q8, q7                 ; abs(q1 - q0)
-    vabd.u8     q3, q9, q8                  ; abs(q2 - q1)
-    vabd.u8     q4, q10, q9                 ; abs(q3 - q2)
-    vabd.u8     q9, q6, q7                  ; abs(p0 - q0)
-
-    vmax.u8     q11, q11, q12
-    vmax.u8     q12, q13, q14
-    vmax.u8     q3, q3, q4
-    vmax.u8     q15, q11, q12
-
-    ; vp8_hevmask
-    vcgt.u8     q13, q13, q2                ; (abs(p1 - p0) > thresh)*-1
-    vcgt.u8     q14, q14, q2                ; (abs(q1 - q0) > thresh)*-1
-    vmax.u8     q15, q15, q3
-
-    vadd.u8     q0, q0, q0                  ; flimit * 2
-    vadd.u8     q0, q0, q1                  ; flimit * 2 + limit
-    vcge.u8     q15, q1, q15
-
-    vabd.u8     q2, q5, q8                  ; abs(p1 - q1)
-    vqadd.u8    q9, q9, q9                  ; abs(p0 - q0) * 2
-    vshr.u8     q2, q2, #1                  ; abs(p1 - q1) / 2
-    vqadd.u8    q9, q9, q2                  ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
-    vcge.u8     q9, q0, q9                  ; (abs(p0 - q0)*2 + abs(p1-q1)/2 > flimit*2 + limit)*-1
-
-    vld1.u8     {q0}, [r12]!
-
-    ;vp8_filter() function
-    veor        q7, q7, q0                  ; qs0: q0 offset to convert to a signed value
-    veor        q6, q6, q0                  ; ps0: p0 offset to convert to a signed value
-    veor        q5, q5, q0                  ; ps1: p1 offset to convert to a signed value
-    veor        q8, q8, q0                  ; qs1: q1 offset to convert to a signed value
-;;;;;;;;;;;;;;
-    vld1.u8     {q10}, [r12]!
-
-    vsubl.s8    q2, d14, d12                ; ( qs0 - ps0)
-    vsubl.s8    q11, d15, d13
-
-    vmovl.u8    q4, d20
-
-    vqsub.s8    q1, q5, q8                  ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
-    vorr        q14, q13, q14               ; q14: vp8_hevmask
-
-    vmul.i16    q2, q2, q4                  ; 3 * ( qs0 - ps0)
-    vmul.i16    q11, q11, q4
-
-    vand        q1, q1, q14                 ; vp8_filter &= hev
-    vand        q15, q15, q9                ; vp8_filter_mask
-
-    vaddw.s8    q2, q2, d2
-    vaddw.s8    q11, q11, d3
-
-    vld1.u8     {q9}, [r12]!
-    ;
-    vqmovn.s16  d2, q2                      ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
-    vqmovn.s16  d3, q11
-    ;;
-
-    vand        q1, q1, q15                 ; vp8_filter &= mask
-    vqadd.s8    q2, q1, q10                 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3)
-    vqadd.s8    q1, q1, q9                  ; Filter1 = vp8_signed_char_clamp(vp8_filter+4)
-    vshr.s8     q2, q2, #3                  ; Filter2 >>= 3
-    vshr.s8     q1, q1, #3                  ; Filter1 >>= 3
-    ;calculate output
-    vqadd.s8    q11, q6, q2             ; u = vp8_signed_char_clamp(ps0 + Filter2)
-    vqsub.s8    q10, q7, q1                 ; u = vp8_signed_char_clamp(qs0 - Filter1)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-    vrshr.s8    q1, q1, #1                  ;round/shift:  vp8_filter += 1; vp8_filter >>= 1
-
-    sub         r0, r0, r1, lsl #4
-    add         r0, r0, #2
-    ;
-
-    vbic        q1, q1, q14                 ; vp8_filter &= ~hev
-    add         r2, r0, r1
-    ;
-
-    vqadd.s8    q13, q5, q1                 ; u = vp8_signed_char_clamp(ps1 + vp8_filter)
-    vqsub.s8    q12, q8, q1                 ; u = vp8_signed_char_clamp(qs1 - vp8_filter)
-
-    veor        q7, q10, q0                 ; *oq0 = u^0x80
-    veor        q5, q13, q0                 ; *op1 = u^0x80
-    veor        q6, q11, q0                 ; *op0 = u^0x80
-    veor        q8, q12, q0                 ; *oq1 = u^0x80
-    add         r3, r2, r1
-    ;
-    vswp        d12, d11
-    vswp        d16, d13
-    add         r12, r3, r1
-    vswp        d14, d12
-    vswp        d16, d15
-
-    ;store op1, op0, oq0, oq1
-    vst4.8      {d10[0], d11[0], d12[0], d13[0]}, [r0]
-    vst4.8      {d10[1], d11[1], d12[1], d13[1]}, [r2]
-    vst4.8      {d10[2], d11[2], d12[2], d13[2]}, [r3]
-    vst4.8      {d10[3], d11[3], d12[3], d13[3]}, [r12], r1
-    add         r0, r12, r1
-    vst4.8      {d10[4], d11[4], d12[4], d13[4]}, [r12]
-    vst4.8      {d10[5], d11[5], d12[5], d13[5]}, [r0], r1
-    add         r2, r0, r1
-    vst4.8      {d10[6], d11[6], d12[6], d13[6]}, [r0]
-    vst4.8      {d10[7], d11[7], d12[7], d13[7]}, [r2], r1
-    add         r3, r2, r1
-    vst4.8      {d14[0], d15[0], d16[0], d17[0]}, [r2]
-    vst4.8      {d14[1], d15[1], d16[1], d17[1]}, [r3], r1
-    add         r12, r3, r1
-    vst4.8      {d14[2], d15[2], d16[2], d17[2]}, [r3]
-    vst4.8      {d14[3], d15[3], d16[3], d17[3]}, [r12], r1
-    add         r0, r12, r1
-    vst4.8      {d14[4], d15[4], d16[4], d17[4]}, [r12]
-    vst4.8      {d14[5], d15[5], d16[5], d17[5]}, [r0], r1
-    add         r2, r0, r1
-    vst4.8      {d14[6], d15[6], d16[6], d17[6]}, [r0]
-    vst4.8      {d14[7], d15[7], d16[7], d17[7]}, [r2]
-
-    bx          lr
-    ENDP        ; |vp8_loop_filter_vertical_edge_y_neon|
-
-;-----------------
-    AREA    vloopfiltery_dat, DATA, READWRITE           ;read/write by default
-;Data section with name data_area is specified. DCD reserves space in memory for 16 data.
-;One word each is reserved. Label filter_coeff can be used to access the data.
-;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
-_vlfy_coeff_
-    DCD     vlfy_coeff
-vlfy_coeff
-    DCD     0x80808080, 0x80808080, 0x80808080, 0x80808080
-    DCD     0x03030303, 0x03030303, 0x03030303, 0x03030303
-    DCD     0x04040404, 0x04040404, 0x04040404, 0x04040404
-    DCD     0x01010101, 0x01010101, 0x01010101, 0x01010101
-
-    END
diff --git a/vp8/common/arm/neon/mbloopfilter_neon.asm b/vp8/common/arm/neon/mbloopfilter_neon.asm

new file mode 100644 (file)

index 0000000..255dd56
--- /dev/null
+++ b/vp8/common/arm/neon/mbloopfilter_neon.asm
@@ -0,0 +1,519 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT  |vp8_mbloop_filter_horizontal_edge_y_neon|
+    EXPORT  |vp8_mbloop_filter_horizontal_edge_uv_neon|
+    EXPORT  |vp8_mbloop_filter_vertical_edge_y_neon|
+    EXPORT  |vp8_mbloop_filter_vertical_edge_uv_neon|
+    ARM
+    REQUIRE8
+    PRESERVE8
+
+    AREA ||.text||, CODE, READONLY, ALIGN=2
+
+; flimit, limit, and thresh should be positive numbers.
+; All 16 elements in these variables are equal.
+
+; void vp8_mbloop_filter_horizontal_edge_y_neon(unsigned char *src, int pitch,
+;                                               const signed char *flimit,
+;                                               const signed char *limit,
+;                                               const signed char *thresh,
+;                                               int count)
+; r0    unsigned char *src,
+; r1    int pitch,
+; r2    const signed char *flimit,
+; r3    const signed char *limit,
+; sp    const signed char *thresh,
+; sp+4  int count (unused)
+|vp8_mbloop_filter_horizontal_edge_y_neon| PROC
+    stmdb       sp!, {lr}
+    sub         r0, r0, r1, lsl #2          ; move src pointer down by 4 lines
+    ldr         r12, [sp, #4]               ; load thresh pointer
+
+    vld1.u8     {q3}, [r0], r1              ; p3
+    vld1.s8     {d2[], d3[]}, [r3]          ; limit
+    vld1.u8     {q4}, [r0], r1              ; p2
+    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
+    vld1.u8     {q5}, [r0], r1              ; p1
+    vld1.u8     {q6}, [r0], r1              ; p0
+    vld1.u8     {q7}, [r0], r1              ; q0
+    vld1.u8     {q8}, [r0], r1              ; q1
+    vld1.u8     {q9}, [r0], r1              ; q2
+    vld1.u8     {q10}, [r0], r1             ; q3
+
+    bl          vp8_mbloop_filter_neon
+
+    sub         r0, r0, r1, lsl #3
+    add         r0, r0, r1
+    add         r2, r0, r1
+    add         r3, r2, r1
+
+    vst1.u8     {q4}, [r0]                  ; store op2
+    vst1.u8     {q5}, [r2]                  ; store op1
+    vst1.u8     {q6}, [r3], r1              ; store op0
+    add         r12, r3, r1
+    vst1.u8     {q7}, [r3]                  ; store oq0
+    vst1.u8     {q8}, [r12], r1             ; store oq1
+    vst1.u8     {q9}, [r12]             ; store oq2
+
+    ldmia       sp!, {pc}
+    ENDP        ; |vp8_mbloop_filter_horizontal_edge_y_neon|
+
+; void vp8_mbloop_filter_horizontal_edge_uv_neon(unsigned char *u, int pitch,
+;                                                const signed char *flimit,
+;                                                const signed char *limit,
+;                                                const signed char *thresh,
+;                                                unsigned char *v)
+; r0    unsigned char *u,
+; r1    int pitch,
+; r2    const signed char *flimit,
+; r3    const signed char *limit,
+; sp    const signed char *thresh,
+; sp+4  unsigned char *v
+|vp8_mbloop_filter_horizontal_edge_uv_neon| PROC
+    stmdb       sp!, {lr}
+    sub         r0, r0, r1, lsl #2          ; move u pointer down by 4 lines
+    vld1.s8     {d2[], d3[]}, [r3]          ; limit
+    ldr         r3, [sp, #8]                ; load v ptr
+    ldr         r12, [sp, #4]               ; load thresh pointer
+    sub         r3, r3, r1, lsl #2          ; move v pointer down by 4 lines
+
+    vld1.u8     {d6}, [r0], r1              ; p3
+    vld1.u8     {d7}, [r3], r1              ; p3
+    vld1.u8     {d8}, [r0], r1              ; p2
+    vld1.u8     {d9}, [r3], r1              ; p2
+    vld1.u8     {d10}, [r0], r1             ; p1
+    vld1.u8     {d11}, [r3], r1             ; p1
+    vld1.u8     {d12}, [r0], r1             ; p0
+    vld1.u8     {d13}, [r3], r1             ; p0
+    vld1.u8     {d14}, [r0], r1             ; q0
+    vld1.u8     {d15}, [r3], r1             ; q0
+    vld1.u8     {d16}, [r0], r1             ; q1
+    vld1.u8     {d17}, [r3], r1             ; q1
+    vld1.u8     {d18}, [r0], r1             ; q2
+    vld1.u8     {d19}, [r3], r1             ; q2
+    vld1.u8     {d20}, [r0], r1             ; q3
+    vld1.u8     {d21}, [r3], r1             ; q3
+
+    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
+
+    bl          vp8_mbloop_filter_neon
+
+    sub         r0, r0, r1, lsl #3
+    sub         r3, r3, r1, lsl #3
+
+    add         r0, r0, r1
+    add         r3, r3, r1
+
+    vst1.u8     {d8}, [r0], r1              ; store u op2
+    vst1.u8     {d9}, [r3], r1              ; store v op2
+    vst1.u8     {d10}, [r0], r1             ; store u op1
+    vst1.u8     {d11}, [r3], r1             ; store v op1
+    vst1.u8     {d12}, [r0], r1             ; store u op0
+    vst1.u8     {d13}, [r3], r1             ; store v op0
+    vst1.u8     {d14}, [r0], r1             ; store u oq0
+    vst1.u8     {d15}, [r3], r1             ; store v oq0
+    vst1.u8     {d16}, [r0], r1             ; store u oq1
+    vst1.u8     {d17}, [r3], r1             ; store v oq1
+    vst1.u8     {d18}, [r0], r1             ; store u oq2
+    vst1.u8     {d19}, [r3], r1             ; store v oq2
+
+    ldmia       sp!, {pc}
+    ENDP        ; |vp8_mbloop_filter_horizontal_edge_uv_neon|
+
+; void vp8_mbloop_filter_vertical_edge_y_neon(unsigned char *src, int pitch,
+;                                             const signed char *flimit,
+;                                             const signed char *limit,
+;                                             const signed char *thresh,
+;                                             int count)
+; r0    unsigned char *src,
+; r1    int pitch,
+; r2    const signed char *flimit,
+; r3    const signed char *limit,
+; sp    const signed char *thresh,
+; sp+4  int count (unused)
+|vp8_mbloop_filter_vertical_edge_y_neon| PROC
+    stmdb       sp!, {lr}
+    sub         r0, r0, #4                  ; move src pointer down by 4 columns
+
+    vld1.u8     {d6}, [r0], r1              ; load first 8-line src data
+    ldr         r12, [sp, #4]               ; load thresh pointer
+    vld1.u8     {d8}, [r0], r1
+    sub         sp, sp, #32
+    vld1.u8     {d10}, [r0], r1
+    vld1.u8     {d12}, [r0], r1
+    vld1.u8     {d14}, [r0], r1
+    vld1.u8     {d16}, [r0], r1
+    vld1.u8     {d18}, [r0], r1
+    vld1.u8     {d20}, [r0], r1
+
+    vld1.u8     {d7}, [r0], r1              ; load second 8-line src data
+    vld1.u8     {d9}, [r0], r1
+    vld1.u8     {d11}, [r0], r1
+    vld1.u8     {d13}, [r0], r1
+    vld1.u8     {d15}, [r0], r1
+    vld1.u8     {d17}, [r0], r1
+    vld1.u8     {d19}, [r0], r1
+    vld1.u8     {d21}, [r0], r1
+
+    ;transpose to 8x16 matrix
+    vtrn.32     q3, q7
+    vtrn.32     q4, q8
+    vtrn.32     q5, q9
+    vtrn.32     q6, q10
+
+    vtrn.16     q3, q5
+    vtrn.16     q4, q6
+    vtrn.16     q7, q9
+    vtrn.16     q8, q10
+
+    vtrn.8      q3, q4
+    vtrn.8      q5, q6
+    vtrn.8      q7, q8
+    vtrn.8      q9, q10
+
+    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
+    vld1.s8     {d2[], d3[]}, [r3]          ; limit
+    mov         r12, sp
+    vst1.u8     {q3}, [r12]!
+    vst1.u8     {q10}, [r12]!
+
+    bl          vp8_mbloop_filter_neon
+
+    sub         r0, r0, r1, lsl #4
+
+    add         r2, r0, r1
+
+    add         r3, r2, r1
+
+    vld1.u8     {q3}, [sp]!
+    vld1.u8     {q10}, [sp]!
+
+    ;transpose to 16x8 matrix
+    vtrn.32     q3, q7
+    vtrn.32     q4, q8
+    vtrn.32     q5, q9
+    vtrn.32     q6, q10
+    add         r12, r3, r1
+
+    vtrn.16     q3, q5
+    vtrn.16     q4, q6
+    vtrn.16     q7, q9
+    vtrn.16     q8, q10
+
+    vtrn.8      q3, q4
+    vtrn.8      q5, q6
+    vtrn.8      q7, q8
+    vtrn.8      q9, q10
+
+    ;store op2, op1, op0, oq0, oq1, oq2
+    vst1.8      {d6}, [r0]
+    vst1.8      {d8}, [r2]
+    vst1.8      {d10}, [r3]
+    vst1.8      {d12}, [r12], r1
+    add         r0, r12, r1
+    vst1.8      {d14}, [r12]
+    vst1.8      {d16}, [r0], r1
+    add         r2, r0, r1
+    vst1.8      {d18}, [r0]
+    vst1.8      {d20}, [r2], r1
+    add         r3, r2, r1
+    vst1.8      {d7}, [r2]
+    vst1.8      {d9}, [r3], r1
+    add         r12, r3, r1
+    vst1.8      {d11}, [r3]
+    vst1.8      {d13}, [r12], r1
+    add         r0, r12, r1
+    vst1.8      {d15}, [r12]
+    vst1.8      {d17}, [r0], r1
+    add         r2, r0, r1
+    vst1.8      {d19}, [r0]
+    vst1.8      {d21}, [r2]
+
+    ldmia       sp!, {pc}
+    ENDP        ; |vp8_mbloop_filter_vertical_edge_y_neon|
+
+; void vp8_mbloop_filter_vertical_edge_uv_neon(unsigned char *u, int pitch,
+;                                              const signed char *flimit,
+;                                              const signed char *limit,
+;                                              const signed char *thresh,
+;                                              unsigned char *v)
+; r0    unsigned char *u,
+; r1    int pitch,
+; r2    const signed char *flimit,
+; r3    const signed char *limit,
+; sp    const signed char *thresh,
+; sp+4  unsigned char *v
+|vp8_mbloop_filter_vertical_edge_uv_neon| PROC
+    stmdb       sp!, {lr}
+    sub         r0, r0, #4                  ; move src pointer down by 4 columns
+    vld1.s8     {d2[], d3[]}, [r3]          ; limit
+    ldr         r3, [sp, #8]                ; load v ptr
+    ldr         r12, [sp, #4]               ; load thresh pointer
+
+    sub         r3, r3, #4                  ; move v pointer down by 4 columns
+
+    vld1.u8     {d6}, [r0], r1              ;load u data
+    vld1.u8     {d7}, [r3], r1              ;load v data
+    vld1.u8     {d8}, [r0], r1
+    vld1.u8     {d9}, [r3], r1
+    vld1.u8     {d10}, [r0], r1
+    vld1.u8     {d11}, [r3], r1
+    vld1.u8     {d12}, [r0], r1
+    vld1.u8     {d13}, [r3], r1
+    vld1.u8     {d14}, [r0], r1
+    vld1.u8     {d15}, [r3], r1
+    vld1.u8     {d16}, [r0], r1
+    vld1.u8     {d17}, [r3], r1
+    vld1.u8     {d18}, [r0], r1
+    vld1.u8     {d19}, [r3], r1
+    vld1.u8     {d20}, [r0], r1
+    vld1.u8     {d21}, [r3], r1
+
+    ;transpose to 8x16 matrix
+    vtrn.32     q3, q7
+    vtrn.32     q4, q8
+    vtrn.32     q5, q9
+    vtrn.32     q6, q10
+
+    vtrn.16     q3, q5
+    vtrn.16     q4, q6
+    vtrn.16     q7, q9
+    vtrn.16     q8, q10
+
+    vtrn.8      q3, q4
+    vtrn.8      q5, q6
+    vtrn.8      q7, q8
+    vtrn.8      q9, q10
+
+    sub         sp, sp, #32
+    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
+    mov         r12, sp
+    vst1.u8     {q3}, [r12]!
+    vst1.u8     {q10}, [r12]!
+
+    bl          vp8_mbloop_filter_neon
+
+    sub         r0, r0, r1, lsl #3
+    sub         r3, r3, r1, lsl #3
+
+    vld1.u8     {q3}, [sp]!
+    vld1.u8     {q10}, [sp]!
+
+    ;transpose to 16x8 matrix
+    vtrn.32     q3, q7
+    vtrn.32     q4, q8
+    vtrn.32     q5, q9
+    vtrn.32     q6, q10
+
+    vtrn.16     q3, q5
+    vtrn.16     q4, q6
+    vtrn.16     q7, q9
+    vtrn.16     q8, q10
+
+    vtrn.8      q3, q4
+    vtrn.8      q5, q6
+    vtrn.8      q7, q8
+    vtrn.8      q9, q10
+
+    ;store op2, op1, op0, oq0, oq1, oq2
+    vst1.8      {d6}, [r0], r1
+    vst1.8      {d7}, [r3], r1
+    vst1.8      {d8}, [r0], r1
+    vst1.8      {d9}, [r3], r1
+    vst1.8      {d10}, [r0], r1
+    vst1.8      {d11}, [r3], r1
+    vst1.8      {d12}, [r0], r1
+    vst1.8      {d13}, [r3], r1
+    vst1.8      {d14}, [r0], r1
+    vst1.8      {d15}, [r3], r1
+    vst1.8      {d16}, [r0], r1
+    vst1.8      {d17}, [r3], r1
+    vst1.8      {d18}, [r0], r1
+    vst1.8      {d19}, [r3], r1
+    vst1.8      {d20}, [r0], r1
+    vst1.8      {d21}, [r3], r1
+
+    ldmia       sp!, {pc}
+    ENDP        ; |vp8_mbloop_filter_vertical_edge_uv_neon|
+
+; void vp8_mbloop_filter_neon()
+; This is a helper function for the macroblock loopfilters. The individual
+; functions do the necessary load, transpose (if necessary), preserve (if
+; necessary) and store.
+
+; TODO:
+; The vertical filter writes p3/q3 back out because two 4 element writes are
+; much simpler than ordering and writing two 3 element sets (or three 2 elements
+; sets, or whichever other combinations are possible).
+; If we can preserve q3 and q10, the vertical filter will be able to avoid
+; storing those values on the stack and reading them back after the filter.
+
+; r0,r1 PRESERVE
+; r2    flimit
+; r3    PRESERVE
+; q1    limit
+; q2    thresh
+; q3    p3
+; q4    p2
+; q5    p1
+; q6    p0
+; q7    q0
+; q8    q1
+; q9    q2
+; q10   q3
+
+|vp8_mbloop_filter_neon| PROC
+    ldr         r12, _mblf_coeff_
+
+    ; vp8_filter_mask
+    vabd.u8     q11, q3, q4                 ; abs(p3 - p2)
+    vabd.u8     q12, q4, q5                 ; abs(p2 - p1)
+    vabd.u8     q13, q5, q6                 ; abs(p1 - p0)
+    vabd.u8     q14, q8, q7                 ; abs(q1 - q0)
+    vabd.u8     q3, q9, q8                  ; abs(q2 - q1)
+    vabd.u8     q0, q10, q9                 ; abs(q3 - q2)
+
+    vmax.u8     q11, q11, q12
+    vmax.u8     q12, q13, q14
+    vmax.u8     q3, q3, q0
+    vmax.u8     q15, q11, q12
+
+    vabd.u8     q12, q6, q7                 ; abs(p0 - q0)
+
+    ; vp8_hevmask
+    vcgt.u8     q13, q13, q2                ; (abs(p1 - p0) > thresh) * -1
+    vcgt.u8     q14, q14, q2                ; (abs(q1 - q0) > thresh) * -1
+    vmax.u8     q15, q15, q3
+
+    vld1.s8     {d4[], d5[]}, [r2]          ; flimit
+
+    vld1.u8     {q0}, [r12]!
+
+    vadd.u8     q2, q2, q2                  ; flimit * 2
+    vadd.u8     q2, q2, q1                  ; flimit * 2 +  limit
+    vcge.u8     q15, q1, q15
+
+    vabd.u8     q1, q5, q8                  ; a = abs(p1 - q1)
+    vqadd.u8    q12, q12, q12               ; b = abs(p0 - q0) * 2
+    vshr.u8     q1, q1, #1                  ; a = a / 2
+    vqadd.u8    q12, q12, q1                ; a = b + a
+    vcge.u8     q12, q2, q12                ; (a > flimit * 2 + limit) * -1
+
+    ; vp8_filter
+    ; convert to signed
+    veor        q7, q7, q0                  ; qs0
+    veor        q6, q6, q0                  ; ps0
+    veor        q5, q5, q0                  ; ps1
+    veor        q8, q8, q0                  ; qs1
+    veor        q4, q4, q0                  ; ps2
+    veor        q9, q9, q0                  ; qs2
+
+    vorr        q14, q13, q14               ; vp8_hevmask
+
+    vsubl.s8    q2, d14, d12                ; qs0 - ps0
+    vsubl.s8    q13, d15, d13
+
+    vqsub.s8    q1, q5, q8                  ; vp8_filter = clamp(ps1-qs1)
+
+    vadd.s16    q10, q2, q2                 ; 3 * (qs0 - ps0)
+    vadd.s16    q11, q13, q13
+    vand        q15, q15, q12               ; vp8_filter_mask
+
+    vadd.s16    q2, q2, q10
+    vadd.s16    q13, q13, q11
+
+    vld1.u8     {q12}, [r12]!               ; #3
+
+    vaddw.s8    q2, q2, d2                  ; vp8_filter + 3 * ( qs0 - ps0)
+    vaddw.s8    q13, q13, d3
+
+    vld1.u8     {q11}, [r12]!               ; #4
+
+    ; vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0))
+    vqmovn.s16  d2, q2
+    vqmovn.s16  d3, q13
+
+    vand        q1, q1, q15                 ; vp8_filter &= mask
+
+    vld1.u8     {q15}, [r12]!               ; #63
+    ;
+    vand        q13, q1, q14                ; Filter2 &= hev
+
+    vld1.u8     {d7}, [r12]!                ; #9
+
+    vqadd.s8    q2, q13, q11                ; Filter1 = clamp(Filter2+4)
+    vqadd.s8    q13, q13, q12               ; Filter2 = clamp(Filter2+3)
+
+    vld1.u8     {d6}, [r12]!                ; #18
+
+    vshr.s8     q2, q2, #3                  ; Filter1 >>= 3
+    vshr.s8     q13, q13, #3                ; Filter2 >>= 3
+
+    vmov        q10, q15
+    vmov        q12, q15
+
+    vqsub.s8    q7, q7, q2                  ; qs0 = clamp(qs0 - Filter1)
+
+    vld1.u8     {d5}, [r12]!                ; #27
+
+    vqadd.s8    q6, q6, q13                 ; ps0 = clamp(ps0 + Filter2)
+
+    vbic        q1, q1, q14                 ; vp8_filter &= ~hev
+
+    ; roughly 1/7th difference across boundary
+    ; roughly 2/7th difference across boundary
+    ; roughly 3/7th difference across boundary
+    vmov        q11, q15
+    vmov        q13, q15
+    vmov        q14, q15
+
+    vmlal.s8    q10, d2, d7                 ; Filter2 * 9
+    vmlal.s8    q11, d3, d7
+    vmlal.s8    q12, d2, d6                 ; Filter2 * 18
+    vmlal.s8    q13, d3, d6
+    vmlal.s8    q14, d2, d5                 ; Filter2 * 27
+    vmlal.s8    q15, d3, d5
+    vqshrn.s16  d20, q10, #7                ; u = clamp((63 + Filter2 * 9)>>7)
+    vqshrn.s16  d21, q11, #7
+    vqshrn.s16  d24, q12, #7                ; u = clamp((63 + Filter2 * 18)>>7)
+    vqshrn.s16  d25, q13, #7
+    vqshrn.s16  d28, q14, #7                ; u = clamp((63 + Filter2 * 27)>>7)
+    vqshrn.s16  d29, q15, #7
+
+    vqsub.s8    q11, q9, q10                ; s = clamp(qs2 - u)
+    vqadd.s8    q10, q4, q10                ; s = clamp(ps2 + u)
+    vqsub.s8    q13, q8, q12                ; s = clamp(qs1 - u)
+    vqadd.s8    q12, q5, q12                ; s = clamp(ps1 + u)
+    vqsub.s8    q15, q7, q14                ; s = clamp(qs0 - u)
+    vqadd.s8    q14, q6, q14                ; s = clamp(ps0 + u)
+    veor        q9, q11, q0                 ; *oq2 = s^0x80
+    veor        q4, q10, q0                 ; *op2 = s^0x80
+    veor        q8, q13, q0                 ; *oq1 = s^0x80
+    veor        q5, q12, q0                 ; *op2 = s^0x80
+    veor        q7, q15, q0                 ; *oq0 = s^0x80
+    veor        q6, q14, q0                 ; *op0 = s^0x80
+
+    bx          lr
+    ENDP        ; |vp8_mbloop_filter_neon|
+
+    AREA    mbloopfilter_dat, DATA, READONLY
+_mblf_coeff_
+    DCD     mblf_coeff
+mblf_coeff
+    DCD     0x80808080, 0x80808080, 0x80808080, 0x80808080
+    DCD     0x03030303, 0x03030303, 0x03030303, 0x03030303
+    DCD     0x04040404, 0x04040404, 0x04040404, 0x04040404
+    DCD     0x003f003f, 0x003f003f, 0x003f003f, 0x003f003f
+    DCD     0x09090909, 0x09090909, 0x12121212, 0x12121212
+    DCD     0x1b1b1b1b, 0x1b1b1b1b
+
+    END
diff --git a/vp8/common/arm/neon/mbloopfilterhorizontaledge_uv_neon.asm b/vp8/common/arm/neon/mbloopfilterhorizontaledge_uv_neon.asm

deleted file mode 100644 (file)

index 52ab059..0000000
--- a/vp8/common/arm/neon/mbloopfilterhorizontaledge_uv_neon.asm
+++ /dev/null
@@ -1,220 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_mbloop_filter_horizontal_edge_uv_neon|
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-;Note: flimit, limit, and thresh shpuld be positive numbers. All 16 elements in flimit
-;are equal. So, in the code, only one load is needed
-;for flimit. Same way applies to limit and thresh.
-; r0    unsigned char *u,
-; r1    int p, //pitch
-; r2    const signed char *flimit,
-; r3    const signed char *limit,
-; stack(r4) const signed char *thresh,
-; stack(r5) unsigned char *v
-|vp8_mbloop_filter_horizontal_edge_uv_neon| PROC
-    sub         r0, r0, r1, lsl #2          ; move u pointer down by 4 lines
-    vld1.s8     {d2[], d3[]}, [r3]          ; limit
-    ldr         r3, [sp, #4]                ; load v ptr
-    ldr         r12, [sp, #0]               ; load thresh pointer
-    sub         r3, r3, r1, lsl #2          ; move v pointer down by 4 lines
-
-    vld1.u8     {d6}, [r0], r1              ; p3
-    vld1.u8     {d7}, [r3], r1              ; p3
-    vld1.u8     {d8}, [r0], r1              ; p2
-    vld1.u8     {d9}, [r3], r1              ; p2
-    vld1.u8     {d10}, [r0], r1             ; p1
-    vld1.u8     {d11}, [r3], r1             ; p1
-    vld1.u8     {d12}, [r0], r1             ; p0
-    vld1.u8     {d13}, [r3], r1             ; p0
-    vld1.u8     {d14}, [r0], r1             ; q0
-    vld1.u8     {d15}, [r3], r1             ; q0
-    vld1.u8     {d16}, [r0], r1             ; q1
-    vld1.u8     {d17}, [r3], r1             ; q1
-    vld1.u8     {d18}, [r0], r1             ; q2
-    vld1.u8     {d19}, [r3], r1             ; q2
-    vld1.u8     {d20}, [r0], r1             ; q3
-    vld1.u8     {d21}, [r3], r1             ; q3
-
-    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
-
-    ldr         r12, _mbhlfuv_coeff_
-
-    ; vp8_filter_mask
-    vabd.u8     q11, q3, q4                 ; abs(p3 - p2)
-    vabd.u8     q12, q4, q5                 ; abs(p2 - p1)
-    vabd.u8     q13, q5, q6                 ; abs(p1 - p0)
-    vabd.u8     q14, q8, q7                 ; abs(q1 - q0)
-    vabd.u8     q3, q9, q8                  ; abs(q2 - q1)
-    vabd.u8     q0, q10, q9                 ; abs(q3 - q2)
-
-    vmax.u8     q11, q11, q12
-    vmax.u8     q12, q13, q14
-    vmax.u8     q3, q3, q0
-    vmax.u8     q15, q11, q12
-
-    vabd.u8     q12, q6, q7                 ; abs(p0 - q0)
-
-    ; vp8_hevmask
-    vcgt.u8     q13, q13, q2                ; (abs(p1 - p0) > thresh)*-1
-    vcgt.u8     q14, q14, q2                ; (abs(q1 - q0) > thresh)*-1
-    vmax.u8     q15, q15, q3
-
-    vld1.s8     {d4[], d5[]}, [r2]          ; flimit
-
-    vld1.u8     {q0}, [r12]!
-
-    vadd.u8     q2, q2, q2                  ; flimit * 2
-    vadd.u8     q2, q2, q1                  ; flimit * 2 +  limit
-    vcge.u8     q15, q1, q15
-
-    vabd.u8     q1, q5, q8                  ; abs(p1 - q1)
-    vqadd.u8    q12, q12, q12               ; abs(p0 - q0) * 2
-    vshr.u8     q1, q1, #1                  ; abs(p1 - q1) / 2
-    vqadd.u8    q12, q12, q1                ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
-    vcge.u8     q12, q2, q12                ; (abs(p0 - q0)*2 + abs(p1 - q1)/2 > flimit*2 + limit)*-1
-
-    ;vp8_filter() function
-    veor        q7, q7, q0                  ; qs0: q0 offset to convert to a signed value
-    veor        q6, q6, q0                  ; ps0: p0 offset to convert to a signed value
-    veor        q5, q5, q0                  ; ps1: p1 offset to convert to a signed value
-    veor        q8, q8, q0                  ; qs1: q1 offset to convert to a signed value
-    veor        q4, q4, q0                  ; ps2: p2 offset to convert to a signed value
-    veor        q9, q9, q0                  ; qs2: q2 offset to convert to a signed value
-;;;;;;;;;;;;;
-    vorr        q14, q13, q14               ; q14: vp8_hevmask
-
-    vsubl.s8    q2, d14, d12                ; ( qs0 - ps0)
-    vsubl.s8    q13, d15, d13
-
-    vqsub.s8    q1, q5, q8                  ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
-
-    vadd.s16    q10, q2, q2                 ; 3 * ( qs0 - ps0)
-    vadd.s16    q11, q13, q13
-    vand        q15, q15, q12               ; vp8_filter_mask
-
-    vadd.s16    q2, q2, q10
-    vadd.s16    q13, q13, q11
-
-    vld1.u8     {q12}, [r12]!               ;#3
-
-    vaddw.s8    q2, q2, d2                  ; vp8_filter + 3 * ( qs0 - ps0)
-    vaddw.s8    q13, q13, d3
-
-    vld1.u8     {q11}, [r12]!               ;#4
-
-    vqmovn.s16  d2, q2                      ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
-    vqmovn.s16  d3, q13
-
-;;;;;;;;;;;;;;
-    vand        q1, q1, q15                 ; vp8_filter &= mask
-
-    vld1.u8     {q15}, [r12]!               ;#63
-    ;
-    vand        q13, q1, q14                ; Filter2: q13; Filter2 &= hev
-
-    vld1.u8     {d7}, [r12]!                ;#9
-
-    vqadd.s8    q2, q13, q11                ; Filter1 = vp8_signed_char_clamp(Filter2+4)
-    vqadd.s8    q13, q13, q12               ; Filter2 = vp8_signed_char_clamp(Filter2+3)
-
-    vld1.u8     {d6}, [r12]!                ;#18
-
-    sub         r0, r0, r1, lsl #3
-    sub         r3, r3, r1, lsl #3
-
-    vshr.s8     q2, q2, #3                  ; Filter1 >>= 3
-    vshr.s8     q13, q13, #3                ; Filter2 >>= 3
-
-    vmov        q10, q15
-    vmov        q12, q15
-
-    vqsub.s8    q7, q7, q2                  ; qs0 = vp8_signed_char_clamp(qs0 - Filter1)
-
-    vld1.u8     {d5}, [r12]!                ;#27
-
-    add         r0, r0, r1
-    add         r3, r3, r1
-
-    vqadd.s8    q6, q6, q13                 ; ps0 = vp8_signed_char_clamp(ps0 + Filter2)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-    vbic        q1, q1, q14                 ; Filter2: q1; vp8_filter &= ~hev; Filter2 = vp8_filter
-
-    ; roughly 1/7th difference across boundary
-    ; roughly 2/7th difference across boundary
-    ; roughly 3/7th difference across boundary
-    vmov        q11, q15
-    vmov        q13, q15
-    vmov        q14, q15
-
-    vmlal.s8    q10, d2, d7                 ; Filter2 * 9
-    vmlal.s8    q11, d3, d7
-    vmlal.s8    q12, d2, d6                 ; Filter2 * 18
-    vmlal.s8    q13, d3, d6
-    vmlal.s8    q14, d2, d5                 ; Filter2 * 27
-    vmlal.s8    q15, d3, d5
-    vqshrn.s16  d20, q10, #7                ; u = vp8_signed_char_clamp((63 + Filter2 * 9)>>7)
-    vqshrn.s16  d21, q11, #7
-    vqshrn.s16  d24, q12, #7                ; u = vp8_signed_char_clamp((63 + Filter2 * 18)>>7)
-    vqshrn.s16  d25, q13, #7
-    vqshrn.s16  d28, q14, #7                ; u = vp8_signed_char_clamp((63 + Filter2 * 27)>>7)
-    vqshrn.s16  d29, q15, #7
-
-    vqsub.s8    q11, q9, q10                ; s = vp8_signed_char_clamp(qs2 - u)
-    vqadd.s8    q10, q4, q10                ; s = vp8_signed_char_clamp(ps2 + u)
-    vqsub.s8    q13, q8, q12                ; s = vp8_signed_char_clamp(qs1 - u)
-    vqadd.s8    q12, q5, q12                ; s = vp8_signed_char_clamp(ps1 + u)
-    vqsub.s8    q15, q7, q14                ; s = vp8_signed_char_clamp(qs0 - u)
-    vqadd.s8    q14, q6, q14                ; s = vp8_signed_char_clamp(ps0 + u)
-    veor        q9, q11, q0                 ; *oq2 = s^0x80
-    veor        q4, q10, q0                 ; *op2 = s^0x80
-    veor        q8, q13, q0                 ; *oq1 = s^0x80
-    veor        q5, q12, q0                 ; *op2 = s^0x80
-    veor        q7, q15, q0                 ; *oq0 = s^0x80
-    veor        q6, q14, q0                 ; *op0 = s^0x80
-
-    vst1.u8     {d8}, [r0], r1              ; store u op2
-    vst1.u8     {d9}, [r3], r1              ; store v op2
-    vst1.u8     {d10}, [r0], r1             ; store u op1
-    vst1.u8     {d11}, [r3], r1             ; store v op1
-    vst1.u8     {d12}, [r0], r1             ; store u op0
-    vst1.u8     {d13}, [r3], r1             ; store v op0
-    vst1.u8     {d14}, [r0], r1             ; store u oq0
-    vst1.u8     {d15}, [r3], r1             ; store v oq0
-    vst1.u8     {d16}, [r0], r1             ; store u oq1
-    vst1.u8     {d17}, [r3], r1             ; store v oq1
-    vst1.u8     {d18}, [r0], r1             ; store u oq2
-    vst1.u8     {d19}, [r3], r1             ; store v oq2
-
-    bx          lr
-    ENDP        ; |vp8_mbloop_filter_horizontal_edge_uv_neon|
-
-;-----------------
-    AREA    mbhloopfilteruv_dat, DATA, READWRITE            ;read/write by default
-;Data section with name data_area is specified. DCD reserves space in memory for 16 data.
-;One word each is reserved. Label filter_coeff can be used to access the data.
-;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
-_mbhlfuv_coeff_
-    DCD     mbhlfuv_coeff
-mbhlfuv_coeff
-    DCD     0x80808080, 0x80808080, 0x80808080, 0x80808080
-    DCD     0x03030303, 0x03030303, 0x03030303, 0x03030303
-    DCD     0x04040404, 0x04040404, 0x04040404, 0x04040404
-    DCD     0x003f003f, 0x003f003f, 0x003f003f, 0x003f003f
-    DCD     0x09090909, 0x09090909, 0x12121212, 0x12121212
-    DCD     0x1b1b1b1b, 0x1b1b1b1b
-
-    END
diff --git a/vp8/common/arm/neon/mbloopfilterhorizontaledge_y_neon.asm b/vp8/common/arm/neon/mbloopfilterhorizontaledge_y_neon.asm

deleted file mode 100644 (file)

index b0755b0..0000000
--- a/vp8/common/arm/neon/mbloopfilterhorizontaledge_y_neon.asm
+++ /dev/null
@@ -1,201 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_mbloop_filter_horizontal_edge_y_neon|
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-;Note: flimit, limit, and thresh shpuld be positive numbers. All 16 elements in flimit
-;are equal. So, in the code, only one load is needed
-;for flimit. Same way applies to limit and thresh.
-; r0    unsigned char *s,
-; r1    int p, //pitch
-; r2    const signed char *flimit,
-; r3    const signed char *limit,
-; stack(r4) const signed char *thresh,
-; //stack(r5)   int count --unused
-|vp8_mbloop_filter_horizontal_edge_y_neon| PROC
-    sub         r0, r0, r1, lsl #2          ; move src pointer down by 4 lines
-    ldr         r12, [sp, #0]               ; load thresh pointer
-
-    vld1.u8     {q3}, [r0], r1              ; p3
-    vld1.s8     {d2[], d3[]}, [r3]          ; limit
-    vld1.u8     {q4}, [r0], r1              ; p2
-    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
-    vld1.u8     {q5}, [r0], r1              ; p1
-    ldr         r12, _mbhlfy_coeff_
-    vld1.u8     {q6}, [r0], r1              ; p0
-
-    ; vp8_filter_mask
-    vabd.u8     q11, q3, q4                 ; abs(p3 - p2)
-    vld1.u8     {q7}, [r0], r1              ; q0
-    vabd.u8     q12, q4, q5                 ; abs(p2 - p1)
-    vld1.u8     {q8}, [r0], r1              ; q1
-    vabd.u8     q13, q5, q6                 ; abs(p1 - p0)
-    vld1.u8     {q9}, [r0], r1              ; q2
-    vabd.u8     q14, q8, q7                 ; abs(q1 - q0)
-    vld1.u8     {q10}, [r0], r1             ; q3
-    vabd.u8     q3, q9, q8                  ; abs(q2 - q1)
-    vabd.u8     q0, q10, q9                 ; abs(q3 - q2)
-
-    vmax.u8     q11, q11, q12
-    vmax.u8     q12, q13, q14
-    vmax.u8     q3, q3, q0
-    vmax.u8     q15, q11, q12
-
-    vabd.u8     q12, q6, q7                 ; abs(p0 - q0)
-
-    ; vp8_hevmask
-    vcgt.u8     q13, q13, q2                ; (abs(p1 - p0) > thresh)*-1
-    vcgt.u8     q14, q14, q2                ; (abs(q1 - q0) > thresh)*-1
-    vmax.u8     q15, q15, q3
-
-    vld1.s8     {d4[], d5[]}, [r2]          ; flimit
-
-    vld1.u8     {q0}, [r12]!
-
-    vadd.u8     q2, q2, q2                  ; flimit * 2
-    vadd.u8     q2, q2, q1                  ; flimit * 2 + limit
-    vcge.u8     q15, q1, q15
-
-    vabd.u8     q1, q5, q8                  ; abs(p1 - q1)
-    vqadd.u8    q12, q12, q12               ; abs(p0 - q0) * 2
-    vshr.u8     q1, q1, #1                  ; abs(p1 - q1) / 2
-    vqadd.u8    q12, q12, q1                ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
-    vcge.u8     q12, q2, q12                ; (abs(p0 - q0)*2 + abs(p1 - q1)/2 > flimit*2 + limit)*-1
-
-    ;vp8_filter() function
-    veor        q7, q7, q0                  ; qs0: q0 offset to convert to a signed value
-    veor        q6, q6, q0                  ; ps0: p0 offset to convert to a signed value
-    veor        q5, q5, q0                  ; ps1: p1 offset to convert to a signed value
-    veor        q8, q8, q0                  ; qs1: q1 offset to convert to a signed value
-    veor        q4, q4, q0                  ; ps2: p2 offset to convert to a signed value
-    veor        q9, q9, q0                  ; qs2: q2 offset to convert to a signed value
-;;;;;;;;;;;;;
-    vorr        q14, q13, q14               ; q14: vp8_hevmask
-
-    vsubl.s8    q2, d14, d12                ; ( qs0 - ps0)
-    vsubl.s8    q13, d15, d13
-
-    vqsub.s8    q1, q5, q8                  ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
-
-    vadd.s16    q10, q2, q2                 ; 3 * ( qs0 - ps0)
-    vadd.s16    q11, q13, q13
-    vand        q15, q15, q12               ; vp8_filter_mask
-
-    vadd.s16    q2, q2, q10
-    vadd.s16    q13, q13, q11
-
-    vld1.u8     {q12}, [r12]!               ;#3
-
-    vaddw.s8    q2, q2, d2                  ; vp8_filter + 3 * ( qs0 - ps0)
-    vaddw.s8    q13, q13, d3
-
-    vld1.u8     {q11}, [r12]!               ;#4
-
-    vqmovn.s16  d2, q2                      ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
-    vqmovn.s16  d3, q13
-
-;;;;;;;;;;;;;;
-    vand        q1, q1, q15                 ; vp8_filter &= mask
-
-    vld1.u8     {q15}, [r12]!               ;#63
-    ;
-    vand        q13, q1, q14                ; Filter2: q13; Filter2 &= hev
-
-    vld1.u8     {d7}, [r12]!                ;#9
-    sub         r0, r0, r1, lsl #3
-
-    vqadd.s8    q2, q13, q11                ; Filter1 = vp8_signed_char_clamp(Filter2+4)
-    vqadd.s8    q13, q13, q12               ; Filter2 = vp8_signed_char_clamp(Filter2+3)
-
-    vld1.u8     {d6}, [r12]!                ;#18
-    add         r0, r0, r1
-    add         r2, r0, r1
-
-    vshr.s8     q2, q2, #3                  ; Filter1 >>= 3
-    vshr.s8     q13, q13, #3                ; Filter2 >>= 3
-
-    vmov        q10, q15
-    vmov        q12, q15
-
-    vqsub.s8    q7, q7, q2                  ; qs0 = vp8_signed_char_clamp(qs0 - Filter1)
-
-    vld1.u8     {d5}, [r12]!                ;#27
-    add         r3, r2, r1
-
-    vqadd.s8    q6, q6, q13                 ; ps0 = vp8_signed_char_clamp(ps0 + Filter2)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-    vbic        q1, q1, q14                 ; Filter2: q1; vp8_filter &= ~hev; Filter2 = vp8_filter
-
-    ; roughly 1/7th difference across boundary
-    ; roughly 2/7th difference across boundary
-    ; roughly 3/7th difference across boundary
-    vmov        q11, q15
-    vmov        q13, q15
-    vmov        q14, q15
-
-    vmlal.s8    q10, d2, d7                 ; Filter2 * 9
-    vmlal.s8    q11, d3, d7
-    vmlal.s8    q12, d2, d6                 ; Filter2 * 18
-    vmlal.s8    q13, d3, d6
-    vmlal.s8    q14, d2, d5                 ; Filter2 * 27
-    vmlal.s8    q15, d3, d5
-    vqshrn.s16  d20, q10, #7                ; u = vp8_signed_char_clamp((63 + Filter2 * 9)>>7)
-    vqshrn.s16  d21, q11, #7
-    vqshrn.s16  d24, q12, #7                ; u = vp8_signed_char_clamp((63 + Filter2 * 18)>>7)
-    vqshrn.s16  d25, q13, #7
-    vqshrn.s16  d28, q14, #7                ; u = vp8_signed_char_clamp((63 + Filter2 * 27)>>7)
-    vqshrn.s16  d29, q15, #7
-
-    vqsub.s8    q11, q9, q10                ; s = vp8_signed_char_clamp(qs2 - u)
-    vqadd.s8    q10, q4, q10                ; s = vp8_signed_char_clamp(ps2 + u)
-    vqsub.s8    q13, q8, q12                ; s = vp8_signed_char_clamp(qs1 - u)
-    vqadd.s8    q12, q5, q12                ; s = vp8_signed_char_clamp(ps1 + u)
-    vqsub.s8    q15, q7, q14                ; s = vp8_signed_char_clamp(qs0 - u)
-    vqadd.s8    q14, q6, q14                ; s = vp8_signed_char_clamp(ps0 + u)
-    veor        q9, q11, q0                 ; *oq2 = s^0x80
-    veor        q4, q10, q0                 ; *op2 = s^0x80
-    veor        q5, q12, q0                 ; *op2 = s^0x80
-    veor        q6, q14, q0                 ; *op0 = s^0x80
-    veor        q8, q13, q0                 ; *oq1 = s^0x80
-    veor        q7, q15, q0                 ; *oq0 = s^0x80
-
-    vst1.u8     {q4}, [r0]                  ; store op2
-    vst1.u8     {q5}, [r2]                  ; store op1
-    vst1.u8     {q6}, [r3], r1              ; store op0
-    add         r12, r3, r1
-    vst1.u8     {q7}, [r3]                  ; store oq0
-    vst1.u8     {q8}, [r12], r1             ; store oq1
-    vst1.u8     {q9}, [r12]             ; store oq2
-
-    bx          lr
-    ENDP        ; |vp8_mbloop_filter_horizontal_edge_y_neon|
-
-;-----------------
-    AREA    mbhloopfiltery_dat, DATA, READWRITE         ;read/write by default
-;Data section with name data_area is specified. DCD reserves space in memory for 16 data.
-;One word each is reserved. Label filter_coeff can be used to access the data.
-;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
-_mbhlfy_coeff_
-    DCD     mbhlfy_coeff
-mbhlfy_coeff
-    DCD     0x80808080, 0x80808080, 0x80808080, 0x80808080
-    DCD     0x03030303, 0x03030303, 0x03030303, 0x03030303
-    DCD     0x04040404, 0x04040404, 0x04040404, 0x04040404
-    DCD     0x003f003f, 0x003f003f, 0x003f003f, 0x003f003f
-    DCD     0x09090909, 0x09090909, 0x12121212, 0x12121212
-    DCD     0x1b1b1b1b, 0x1b1b1b1b
-
-    END
diff --git a/vp8/common/arm/neon/mbloopfilterverticaledge_uv_neon.asm b/vp8/common/arm/neon/mbloopfilterverticaledge_uv_neon.asm

deleted file mode 100644 (file)

index 044b3a3..0000000
--- a/vp8/common/arm/neon/mbloopfilterverticaledge_uv_neon.asm
+++ /dev/null
@@ -1,261 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_mbloop_filter_vertical_edge_uv_neon|
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-;Note: flimit, limit, and thresh shpuld be positive numbers. All 16 elements in flimit
-;are equal. So, in the code, only one load is needed
-;for flimit. Same way applies to limit and thresh.
-; r0    unsigned char *u,
-; r1    int p, //pitch
-; r2    const signed char *flimit,
-; r3    const signed char *limit,
-; stack(r4) const signed char *thresh,
-; stack(r5) unsigned char *v
-|vp8_mbloop_filter_vertical_edge_uv_neon| PROC
-    sub         r0, r0, #4                  ; move src pointer down by 4 columns
-    vld1.s8     {d2[], d3[]}, [r3]          ; limit
-    ldr         r3, [sp, #4]                ; load v ptr
-    ldr         r12, [sp, #0]               ; load thresh pointer
-
-    sub         r3, r3, #4                  ; move v pointer down by 4 columns
-
-    vld1.u8     {d6}, [r0], r1              ;load u data
-    vld1.u8     {d7}, [r3], r1              ;load v data
-    vld1.u8     {d8}, [r0], r1
-    vld1.u8     {d9}, [r3], r1
-    vld1.u8     {d10}, [r0], r1
-    vld1.u8     {d11}, [r3], r1
-    vld1.u8     {d12}, [r0], r1
-    vld1.u8     {d13}, [r3], r1
-    vld1.u8     {d14}, [r0], r1
-    vld1.u8     {d15}, [r3], r1
-    vld1.u8     {d16}, [r0], r1
-    vld1.u8     {d17}, [r3], r1
-    vld1.u8     {d18}, [r0], r1
-    vld1.u8     {d19}, [r3], r1
-    vld1.u8     {d20}, [r0], r1
-    vld1.u8     {d21}, [r3], r1
-
-    ;transpose to 8x16 matrix
-    vtrn.32     q3, q7
-    vtrn.32     q4, q8
-    vtrn.32     q5, q9
-    vtrn.32     q6, q10
-
-    vtrn.16     q3, q5
-    vtrn.16     q4, q6
-    vtrn.16     q7, q9
-    vtrn.16     q8, q10
-
-    vtrn.8      q3, q4
-    vtrn.8      q5, q6
-    vtrn.8      q7, q8
-    vtrn.8      q9, q10
-
-    sub         sp, sp, #32
-    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
-    mov         r12, sp
-    vst1.u8     {q3}, [r12]!
-    vst1.u8     {q10}, [r12]!
-    ldr         r12, _mbvlfuv_coeff_
-
-    ; vp8_filter_mask
-    vabd.u8     q11, q3, q4                 ; abs(p3 - p2)
-    vabd.u8     q12, q4, q5                 ; abs(p2 - p1)
-    vabd.u8     q13, q5, q6                 ; abs(p1 - p0)
-    vabd.u8     q14, q8, q7                 ; abs(q1 - q0)
-    vabd.u8     q3, q9, q8                  ; abs(q2 - q1)
-    vabd.u8     q0, q10, q9                 ; abs(q3 - q2)
-
-    vmax.u8     q11, q11, q12
-    vmax.u8     q12, q13, q14
-    vmax.u8     q3, q3, q0
-    vmax.u8     q15, q11, q12
-
-    vabd.u8     q12, q6, q7                 ; abs(p0 - q0)
-
-    ; vp8_hevmask
-    vcgt.u8     q13, q13, q2                ; (abs(p1 - p0) > thresh)*-1
-    vcgt.u8     q14, q14, q2                ; (abs(q1 - q0) > thresh)*-1
-    vmax.u8     q15, q15, q3
-
-    vld1.s8     {d4[], d5[]}, [r2]          ; flimit
-
-    vld1.u8     {q0}, [r12]!
-
-    vadd.u8     q2, q2, q2                  ; flimit * 2
-    vadd.u8     q2, q2, q1                  ; flimit * 2 + limit
-    vcge.u8     q15, q1, q15
-
-    vabd.u8     q1, q5, q8                  ; abs(p1 - q1)
-    vqadd.u8    q12, q12, q12               ; abs(p0 - q0) * 2
-    vshr.u8     q1, q1, #1                  ; abs(p1 - q1) / 2
-    vqadd.u8    q12, q12, q1                ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
-    vcge.u8     q12, q2, q12                ; (abs(p0 - q0)*2 + abs(p1 - q1)/2 > flimit*2 + limit)*-1
-
-    ;vp8_filter() function
-    veor        q7, q7, q0                  ; qs0: q0 offset to convert to a signed value
-    veor        q6, q6, q0                  ; ps0: p0 offset to convert to a signed value
-    veor        q5, q5, q0                  ; ps1: p1 offset to convert to a signed value
-    veor        q8, q8, q0                  ; qs1: q1 offset to convert to a signed value
-    veor        q4, q4, q0                  ; ps2: p2 offset to convert to a signed value
-    veor        q9, q9, q0                  ; qs2: q2 offset to convert to a signed value
-;;;;;;;;;;;;;
-    vorr        q14, q13, q14               ; q14: vp8_hevmask
-
-    vsubl.s8    q2, d14, d12                ; ( qs0 - ps0)
-    vsubl.s8    q13, d15, d13
-
-    vqsub.s8    q1, q5, q8                  ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
-
-    vadd.s16    q10, q2, q2                 ; 3 * ( qs0 - ps0)
-    vadd.s16    q11, q13, q13
-    vand        q15, q15, q12               ; vp8_filter_mask
-
-    vadd.s16    q2, q2, q10
-    vadd.s16    q13, q13, q11
-
-    vld1.u8     {q12}, [r12]!               ;#3
-
-    vaddw.s8    q2, q2, d2                  ; vp8_filter + 3 * ( qs0 - ps0)
-    vaddw.s8    q13, q13, d3
-
-    vld1.u8     {q11}, [r12]!               ;#4
-
-    vqmovn.s16  d2, q2                      ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
-    vqmovn.s16  d3, q13
-
-;;;;;;;;;;;;;;
-    vand        q1, q1, q15                 ; vp8_filter &= mask
-
-    vld1.u8     {q15}, [r12]!               ;#63
-    ;
-    vand        q13, q1, q14                ; Filter2: q13; Filter2 &= hev
-
-    vld1.u8     {d7}, [r12]!                ;#9
-    ;
-
-    vqadd.s8    q2, q13, q11                ; Filter1 = vp8_signed_char_clamp(Filter2+4)
-    vqadd.s8    q13, q13, q12               ; Filter2 = vp8_signed_char_clamp(Filter2+3)
-
-    vld1.u8     {d6}, [r12]!                ;#18
-
-    sub         r0, r0, r1, lsl #3
-    sub         r3, r3, r1, lsl #3
-
-    vshr.s8     q2, q2, #3                  ; Filter1 >>= 3
-    vshr.s8     q13, q13, #3                ; Filter2 >>= 3
-
-    vmov        q10, q15
-    vmov        q12, q15
-
-    vqsub.s8    q7, q7, q2                  ; qs0 = vp8_signed_char_clamp(qs0 - Filter1)
-
-    vld1.u8     {d5}, [r12]!                ;#27
-
-    vqadd.s8    q6, q6, q13                 ; ps0 = vp8_signed_char_clamp(ps0 + Filter2)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-    vbic        q1, q1, q14                 ; Filter2: q1; vp8_filter &= ~hev; Filter2 = vp8_filter
-
-    ; roughly 1/7th difference across boundary
-    ; roughly 2/7th difference across boundary
-    ; roughly 3/7th difference across boundary
-    vmov        q11, q15
-    vmov        q13, q15
-    vmov        q14, q15
-
-    vmlal.s8    q10, d2, d7                 ; Filter2 * 9
-    vmlal.s8    q11, d3, d7
-    vmlal.s8    q12, d2, d6                 ; Filter2 * 18
-    vmlal.s8    q13, d3, d6
-    vmlal.s8    q14, d2, d5                 ; Filter2 * 27
-    vmlal.s8    q15, d3, d5
-    vqshrn.s16  d20, q10, #7                ; u = vp8_signed_char_clamp((63 + Filter2 * 9)>>7)
-    vqshrn.s16  d21, q11, #7
-    vqshrn.s16  d24, q12, #7                ; u = vp8_signed_char_clamp((63 + Filter2 * 18)>>7)
-    vqshrn.s16  d25, q13, #7
-    vqshrn.s16  d28, q14, #7                ; u = vp8_signed_char_clamp((63 + Filter2 * 27)>>7)
-    vqshrn.s16  d29, q15, #7
-
-    vqsub.s8    q11, q9, q10                ; s = vp8_signed_char_clamp(qs2 - u)
-    vqadd.s8    q10, q4, q10                ; s = vp8_signed_char_clamp(ps2 + u)
-    vqsub.s8    q13, q8, q12                ; s = vp8_signed_char_clamp(qs1 - u)
-    vqadd.s8    q12, q5, q12                ; s = vp8_signed_char_clamp(ps1 + u)
-    vqsub.s8    q15, q7, q14                ; s = vp8_signed_char_clamp(qs0 - u)
-    vqadd.s8    q14, q6, q14                ; s = vp8_signed_char_clamp(ps0 + u)
-    veor        q9, q11, q0                 ; *oq2 = s^0x80
-    veor        q4, q10, q0                 ; *op2 = s^0x80
-    veor        q8, q13, q0                 ; *oq1 = s^0x80
-    veor        q5, q12, q0                 ; *op2 = s^0x80
-    veor        q7, q15, q0                 ; *oq0 = s^0x80
-    vld1.u8     {q3}, [sp]!
-    veor        q6, q14, q0                 ; *op0 = s^0x80
-    vld1.u8     {q10}, [sp]!
-
-    ;transpose to 16x8 matrix
-    vtrn.32     q3, q7
-    vtrn.32     q4, q8
-    vtrn.32     q5, q9
-    vtrn.32     q6, q10
-
-    vtrn.16     q3, q5
-    vtrn.16     q4, q6
-    vtrn.16     q7, q9
-    vtrn.16     q8, q10
-
-    vtrn.8      q3, q4
-    vtrn.8      q5, q6
-    vtrn.8      q7, q8
-    vtrn.8      q9, q10
-
-    ;store op2, op1, op0, oq0, oq1, oq2
-    vst1.8      {d6}, [r0], r1
-    vst1.8      {d7}, [r3], r1
-    vst1.8      {d8}, [r0], r1
-    vst1.8      {d9}, [r3], r1
-    vst1.8      {d10}, [r0], r1
-    vst1.8      {d11}, [r3], r1
-    vst1.8      {d12}, [r0], r1
-    vst1.8      {d13}, [r3], r1
-    vst1.8      {d14}, [r0], r1
-    vst1.8      {d15}, [r3], r1
-    vst1.8      {d16}, [r0], r1
-    vst1.8      {d17}, [r3], r1
-    vst1.8      {d18}, [r0], r1
-    vst1.8      {d19}, [r3], r1
-    vst1.8      {d20}, [r0], r1
-    vst1.8      {d21}, [r3], r1
-
-    bx          lr
-    ENDP        ; |vp8_mbloop_filter_vertical_edge_uv_neon|
-
-;-----------------
-    AREA    mbvloopfilteruv_dat, DATA, READWRITE            ;read/write by default
-;Data section with name data_area is specified. DCD reserves space in memory for 16 data.
-;One word each is reserved. Label filter_coeff can be used to access the data.
-;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
-_mbvlfuv_coeff_
-    DCD     mbvlfuv_coeff
-mbvlfuv_coeff
-    DCD     0x80808080, 0x80808080, 0x80808080, 0x80808080
-    DCD     0x03030303, 0x03030303, 0x03030303, 0x03030303
-    DCD     0x04040404, 0x04040404, 0x04040404, 0x04040404
-    DCD     0x003f003f, 0x003f003f, 0x003f003f, 0x003f003f
-    DCD     0x09090909, 0x09090909, 0x12121212, 0x12121212
-    DCD     0x1b1b1b1b, 0x1b1b1b1b
-
-    END
diff --git a/vp8/common/arm/neon/mbloopfilterverticaledge_y_neon.asm b/vp8/common/arm/neon/mbloopfilterverticaledge_y_neon.asm

deleted file mode 100644 (file)

index e071662..0000000
--- a/vp8/common/arm/neon/mbloopfilterverticaledge_y_neon.asm
+++ /dev/null
@@ -1,267 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_mbloop_filter_vertical_edge_y_neon|
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-;Note: flimit, limit, and thresh shpuld be positive numbers. All 16 elements in flimit
-;are equal. So, in the code, only one load is needed
-;for flimit. Same way applies to limit and thresh.
-; r0    unsigned char *s,
-; r1    int p, //pitch
-; r2    const signed char *flimit,
-; r3    const signed char *limit,
-; stack(r4) const signed char *thresh,
-; //stack(r5)   int count --unused
-|vp8_mbloop_filter_vertical_edge_y_neon| PROC
-    sub         r0, r0, #4                  ; move src pointer down by 4 columns
-
-    vld1.u8     {d6}, [r0], r1              ; load first 8-line src data
-    ldr         r12, [sp, #0]               ; load thresh pointer
-    vld1.u8     {d8}, [r0], r1
-    sub         sp, sp, #32
-    vld1.u8     {d10}, [r0], r1
-    vld1.u8     {d12}, [r0], r1
-    vld1.u8     {d14}, [r0], r1
-    vld1.u8     {d16}, [r0], r1
-    vld1.u8     {d18}, [r0], r1
-    vld1.u8     {d20}, [r0], r1
-
-    vld1.u8     {d7}, [r0], r1              ; load second 8-line src data
-    vld1.u8     {d9}, [r0], r1
-    vld1.u8     {d11}, [r0], r1
-    vld1.u8     {d13}, [r0], r1
-    vld1.u8     {d15}, [r0], r1
-    vld1.u8     {d17}, [r0], r1
-    vld1.u8     {d19}, [r0], r1
-    vld1.u8     {d21}, [r0], r1
-
-    ;transpose to 8x16 matrix
-    vtrn.32     q3, q7
-    vtrn.32     q4, q8
-    vtrn.32     q5, q9
-    vtrn.32     q6, q10
-
-    vtrn.16     q3, q5
-    vtrn.16     q4, q6
-    vtrn.16     q7, q9
-    vtrn.16     q8, q10
-
-    vtrn.8      q3, q4
-    vtrn.8      q5, q6
-    vtrn.8      q7, q8
-    vtrn.8      q9, q10
-
-    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
-    vld1.s8     {d2[], d3[]}, [r3]          ; limit
-    mov         r12, sp
-    vst1.u8     {q3}, [r12]!
-    vst1.u8     {q10}, [r12]!
-    ldr         r12, _mbvlfy_coeff_
-
-    ; vp8_filter_mask
-    vabd.u8     q11, q3, q4                 ; abs(p3 - p2)
-    vabd.u8     q12, q4, q5                 ; abs(p2 - p1)
-    vabd.u8     q13, q5, q6                 ; abs(p1 - p0)
-    vabd.u8     q14, q8, q7                 ; abs(q1 - q0)
-    vabd.u8     q3, q9, q8                  ; abs(q2 - q1)
-    vabd.u8     q0, q10, q9                 ; abs(q3 - q2)
-
-    vmax.u8     q11, q11, q12
-    vmax.u8     q12, q13, q14
-    vmax.u8     q3, q3, q0
-    vmax.u8     q15, q11, q12
-
-    vabd.u8     q12, q6, q7                 ; abs(p0 - q0)
-
-    ; vp8_hevmask
-    vcgt.u8     q13, q13, q2                ; (abs(p1 - p0) > thresh)*-1
-    vcgt.u8     q14, q14, q2                ; (abs(q1 - q0) > thresh)*-1
-    vmax.u8     q15, q15, q3
-
-    vld1.s8     {d4[], d5[]}, [r2]          ; flimit
-
-    vld1.u8     {q0}, [r12]!
-
-    vadd.u8     q2, q2, q2                  ; flimit * 2
-    vadd.u8     q2, q2, q1                  ; flimit * 2 + limit
-    vcge.u8     q15, q1, q15
-
-    vabd.u8     q1, q5, q8                  ; abs(p1 - q1)
-    vqadd.u8    q12, q12, q12               ; abs(p0 - q0) * 2
-    vshr.u8     q1, q1, #1                  ; abs(p1 - q1) / 2
-    vqadd.u8    q12, q12, q1                ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
-    vcge.u8     q12, q2, q12                ; (abs(p0 - q0)*2 + abs(p1 - q1)/2 > flimit*2 + limit)*-1
-
-    ;vp8_filter() function
-    veor        q7, q7, q0                  ; qs0: q0 offset to convert to a signed value
-    veor        q6, q6, q0                  ; ps0: p0 offset to convert to a signed value
-    veor        q5, q5, q0                  ; ps1: p1 offset to convert to a signed value
-    veor        q8, q8, q0                  ; qs1: q1 offset to convert to a signed value
-    veor        q4, q4, q0                  ; ps2: p2 offset to convert to a signed value
-    veor        q9, q9, q0                  ; qs2: q2 offset to convert to a signed value
-;;;;;;;;;;;;;
-    vorr        q14, q13, q14               ; q14: vp8_hevmask
-
-    vsubl.s8    q2, d14, d12                ; ( qs0 - ps0)
-    vsubl.s8    q13, d15, d13
-
-    vqsub.s8    q1, q5, q8                  ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
-
-    vadd.s16    q10, q2, q2                 ; 3 * ( qs0 - ps0)
-    vadd.s16    q11, q13, q13
-    vand        q15, q15, q12               ; vp8_filter_mask
-
-    vadd.s16    q2, q2, q10
-    vadd.s16    q13, q13, q11
-
-    vld1.u8     {q12}, [r12]!               ;#3
-
-    vaddw.s8    q2, q2, d2                  ; vp8_filter + 3 * ( qs0 - ps0)
-    vaddw.s8    q13, q13, d3
-
-    vld1.u8     {q11}, [r12]!               ;#4
-
-    vqmovn.s16  d2, q2                      ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
-    vqmovn.s16  d3, q13
-
-;;;;;;;;;;;;;;
-    vand        q1, q1, q15                 ; vp8_filter &= mask
-
-    vld1.u8     {q15}, [r12]!               ;#63
-    ;
-    vand        q13, q1, q14                ; Filter2: q13; Filter2 &= hev
-
-    vld1.u8     {d7}, [r12]!                ;#9
-    ;
-
-    vqadd.s8    q2, q13, q11                ; Filter1 = vp8_signed_char_clamp(Filter2+4)
-    vqadd.s8    q13, q13, q12               ; Filter2 = vp8_signed_char_clamp(Filter2+3)
-
-    vld1.u8     {d6}, [r12]!                ;#18
-    sub         r0, r0, r1, lsl #4
-
-    add         r2, r0, r1
-
-    vshr.s8     q2, q2, #3                  ; Filter1 >>= 3
-    vshr.s8     q13, q13, #3                ; Filter2 >>= 3
-
-    vmov        q10, q15
-    vmov        q12, q15
-
-    vqsub.s8    q7, q7, q2                  ; qs0 = vp8_signed_char_clamp(qs0 - Filter1)
-
-    vld1.u8     {d5}, [r12]!                ;#27
-    add         r3, r2, r1
-
-    vqadd.s8    q6, q6, q13                 ; ps0 = vp8_signed_char_clamp(ps0 + Filter2)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-    vbic        q1, q1, q14                 ; Filter2: q1; vp8_filter &= ~hev; Filter2 = vp8_filter
-
-    ; roughly 1/7th difference across boundary
-    ; roughly 2/7th difference across boundary
-    ; roughly 3/7th difference across boundary
-    vmov        q11, q15
-    vmov        q13, q15
-    vmov        q14, q15
-
-    vmlal.s8    q10, d2, d7                 ; Filter2 * 9
-    vmlal.s8    q11, d3, d7
-    vmlal.s8    q12, d2, d6                 ; Filter2 * 18
-    vmlal.s8    q13, d3, d6
-    vmlal.s8    q14, d2, d5                 ; Filter2 * 27
-    vmlal.s8    q15, d3, d5
-    vqshrn.s16  d20, q10, #7                ; u = vp8_signed_char_clamp((63 + Filter2 * 9)>>7)
-    vqshrn.s16  d21, q11, #7
-    vqshrn.s16  d24, q12, #7                ; u = vp8_signed_char_clamp((63 + Filter2 * 18)>>7)
-    vqshrn.s16  d25, q13, #7
-    vqshrn.s16  d28, q14, #7                ; u = vp8_signed_char_clamp((63 + Filter2 * 27)>>7)
-    vqshrn.s16  d29, q15, #7
-
-    vqsub.s8    q11, q9, q10                ; s = vp8_signed_char_clamp(qs2 - u)
-    vqadd.s8    q10, q4, q10                ; s = vp8_signed_char_clamp(ps2 + u)
-    vqsub.s8    q13, q8, q12                ; s = vp8_signed_char_clamp(qs1 - u)
-    vqadd.s8    q12, q5, q12                ; s = vp8_signed_char_clamp(ps1 + u)
-    vqsub.s8    q15, q7, q14                ; s = vp8_signed_char_clamp(qs0 - u)
-    vqadd.s8    q14, q6, q14                ; s = vp8_signed_char_clamp(ps0 + u)
-    veor        q9, q11, q0                 ; *oq2 = s^0x80
-    veor        q4, q10, q0                 ; *op2 = s^0x80
-    veor        q8, q13, q0                 ; *oq1 = s^0x80
-    veor        q5, q12, q0                 ; *op2 = s^0x80
-    veor        q7, q15, q0                 ; *oq0 = s^0x80
-    vld1.u8     {q3}, [sp]!
-    veor        q6, q14, q0                 ; *op0 = s^0x80
-    vld1.u8     {q10}, [sp]!
-
-    ;transpose to 16x8 matrix
-    vtrn.32     q3, q7
-    vtrn.32     q4, q8
-    vtrn.32     q5, q9
-    vtrn.32     q6, q10
-    add         r12, r3, r1
-
-    vtrn.16     q3, q5
-    vtrn.16     q4, q6
-    vtrn.16     q7, q9
-    vtrn.16     q8, q10
-
-    vtrn.8      q3, q4
-    vtrn.8      q5, q6
-    vtrn.8      q7, q8
-    vtrn.8      q9, q10
-
-    ;store op2, op1, op0, oq0, oq1, oq2
-    vst1.8      {d6}, [r0]
-    vst1.8      {d8}, [r2]
-    vst1.8      {d10}, [r3]
-    vst1.8      {d12}, [r12], r1
-    add         r0, r12, r1
-    vst1.8      {d14}, [r12]
-    vst1.8      {d16}, [r0], r1
-    add         r2, r0, r1
-    vst1.8      {d18}, [r0]
-    vst1.8      {d20}, [r2], r1
-    add         r3, r2, r1
-    vst1.8      {d7}, [r2]
-    vst1.8      {d9}, [r3], r1
-    add         r12, r3, r1
-    vst1.8      {d11}, [r3]
-    vst1.8      {d13}, [r12], r1
-    add         r0, r12, r1
-    vst1.8      {d15}, [r12]
-    vst1.8      {d17}, [r0], r1
-    add         r2, r0, r1
-    vst1.8      {d19}, [r0]
-    vst1.8      {d21}, [r2]
-
-    bx          lr
-    ENDP        ; |vp8_mbloop_filter_vertical_edge_y_neon|
-
-;-----------------
-    AREA    mbvloopfiltery_dat, DATA, READWRITE         ;read/write by default
-;Data section with name data_area is specified. DCD reserves space in memory for 16 data.
-;One word each is reserved. Label filter_coeff can be used to access the data.
-;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
-_mbvlfy_coeff_
-    DCD     mbvlfy_coeff
-mbvlfy_coeff
-    DCD     0x80808080, 0x80808080, 0x80808080, 0x80808080
-    DCD     0x03030303, 0x03030303, 0x03030303, 0x03030303
-    DCD     0x04040404, 0x04040404, 0x04040404, 0x04040404
-    DCD     0x003f003f, 0x003f003f, 0x003f003f, 0x003f003f
-    DCD     0x09090909, 0x09090909, 0x12121212, 0x12121212
-    DCD     0x1b1b1b1b, 0x1b1b1b1b
-
-    END
diff --git a/vp8/common/arm/recon_arm.h b/vp8/common/arm/recon_arm.h

index 18855a3c0365fd83bed2190d5270ed8b57c1c30b..c30f6dc2dcf9a69612d8e0993ee8839477a0843d 100644 (file)
--- a/vp8/common/arm/recon_arm.h
+++ b/vp8/common/arm/recon_arm.h
@@ -21,6 +21,7 @@ extern prototype_copy_block(vp8_copy_mem8x8_v6);
  extern prototype_copy_block(vp8_copy_mem8x4_v6);
  extern prototype_copy_block(vp8_copy_mem16x16_v6);
  
+#if !CONFIG_RUNTIME_CPU_DETECT
  #undef  vp8_recon_recon
  #define vp8_recon_recon vp8_recon_b_armv6
  
@@ -39,6 +40,7 @@ extern prototype_copy_block(vp8_copy_mem16x16_v6);
  #undef  vp8_recon_copy16x16
  #define vp8_recon_copy16x16 vp8_copy_mem16x16_v6
  #endif
+#endif
  
  #if HAVE_ARMV7
  extern prototype_recon_block(vp8_recon_b_neon);
@@ -49,6 +51,7 @@ extern prototype_copy_block(vp8_copy_mem8x8_neon);
  extern prototype_copy_block(vp8_copy_mem8x4_neon);
  extern prototype_copy_block(vp8_copy_mem16x16_neon);
  
+#if !CONFIG_RUNTIME_CPU_DETECT
  #undef  vp8_recon_recon
  #define vp8_recon_recon vp8_recon_b_neon
  
@@ -67,5 +70,6 @@ extern prototype_copy_block(vp8_copy_mem16x16_neon);
  #undef  vp8_recon_copy16x16
  #define vp8_recon_copy16x16 vp8_copy_mem16x16_neon
  #endif
+#endif
  
  #endif
diff --git a/vp8/common/arm/subpixel_arm.h b/vp8/common/arm/subpixel_arm.h

index 53600e547968efee0d7fce071ee72d5606012ace..6288538d06fa19b208d54170009157e5a2682f64 100644 (file)
--- a/vp8/common/arm/subpixel_arm.h
+++ b/vp8/common/arm/subpixel_arm.h
@@ -22,6 +22,7 @@ extern prototype_subpixel_predict(vp8_bilinear_predict8x8_armv6);
  extern prototype_subpixel_predict(vp8_bilinear_predict8x4_armv6);
  extern prototype_subpixel_predict(vp8_bilinear_predict4x4_armv6);
  
+#if !CONFIG_RUNTIME_CPU_DETECT
  #undef  vp8_subpix_sixtap16x16
  #define vp8_subpix_sixtap16x16 vp8_sixtap_predict16x16_armv6
  
@@ -46,6 +47,7 @@ extern prototype_subpixel_predict(vp8_bilinear_predict4x4_armv6);
  #undef  vp8_subpix_bilinear4x4
  #define vp8_subpix_bilinear4x4 vp8_bilinear_predict4x4_armv6
  #endif
+#endif
  
  #if HAVE_ARMV7
  extern prototype_subpixel_predict(vp8_sixtap_predict16x16_neon);
@@ -57,6 +59,7 @@ extern prototype_subpixel_predict(vp8_bilinear_predict8x8_neon);
  extern prototype_subpixel_predict(vp8_bilinear_predict8x4_neon);
  extern prototype_subpixel_predict(vp8_bilinear_predict4x4_neon);
  
+#if !CONFIG_RUNTIME_CPU_DETECT
  #undef  vp8_subpix_sixtap16x16
  #define vp8_subpix_sixtap16x16 vp8_sixtap_predict16x16_neon
  
@@ -81,5 +84,6 @@ extern prototype_subpixel_predict(vp8_bilinear_predict4x4_neon);
  #undef  vp8_subpix_bilinear4x4
  #define vp8_subpix_bilinear4x4 vp8_bilinear_predict4x4_neon
  #endif
+#endif
  
  #endif
diff --git a/vp8/common/arm/systemdependent.c b/vp8/common/arm/systemdependent.c

deleted file mode 100644 (file)

index 1eed97e..0000000
--- a/vp8/common/arm/systemdependent.c
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "vpx_ports/config.h"
-#include "g_common.h"
-#include "pragmas.h"
-#include "subpixel.h"
-#include "loopfilter.h"
-#include "recon.h"
-#include "idct.h"
-#include "onyxc_int.h"
-
-void (*vp8_build_intra_predictors_mby_ptr)(MACROBLOCKD *x);
-extern void vp8_build_intra_predictors_mby(MACROBLOCKD *x);
-extern void vp8_build_intra_predictors_mby_neon(MACROBLOCKD *x);
-
-void (*vp8_build_intra_predictors_mby_s_ptr)(MACROBLOCKD *x);
-extern void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x);
-extern void vp8_build_intra_predictors_mby_s_neon(MACROBLOCKD *x);
-
-void vp8_machine_specific_config(VP8_COMMON *ctx)
-{
-#if CONFIG_RUNTIME_CPU_DETECT
-    VP8_COMMON_RTCD *rtcd = &ctx->rtcd;
-
-#if HAVE_ARMV7
-    rtcd->subpix.sixtap16x16   = vp8_sixtap_predict16x16_neon;
-    rtcd->subpix.sixtap8x8     = vp8_sixtap_predict8x8_neon;
-    rtcd->subpix.sixtap8x4     = vp8_sixtap_predict8x4_neon;
-    rtcd->subpix.sixtap4x4     = vp8_sixtap_predict_neon;
-    rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_neon;
-    rtcd->subpix.bilinear8x8   = vp8_bilinear_predict8x8_neon;
-    rtcd->subpix.bilinear8x4   = vp8_bilinear_predict8x4_neon;
-    rtcd->subpix.bilinear4x4   = vp8_bilinear_predict4x4_neon;
-
-    rtcd->idct.idct1        = vp8_short_idct4x4llm_1_neon;
-    rtcd->idct.idct16       = vp8_short_idct4x4llm_neon;
-    rtcd->idct.iwalsh1      = vp8_short_inv_walsh4x4_1_neon;
-    rtcd->idct.iwalsh16     = vp8_short_inv_walsh4x4_neon;
-
-    rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_neon;
-    rtcd->loopfilter.normal_b_v  = vp8_loop_filter_bv_neon;
-    rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_neon;
-    rtcd->loopfilter.normal_b_h  = vp8_loop_filter_bh_neon;
-    rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_neon;
-    rtcd->loopfilter.simple_b_v  = vp8_loop_filter_bvs_neon;
-    rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_neon;
-    rtcd->loopfilter.simple_b_h  = vp8_loop_filter_bhs_neon;
-
-    rtcd->recon.copy16x16   = vp8_copy_mem16x16_neon;
-    rtcd->recon.copy8x8     = vp8_copy_mem8x8_neon;
-    rtcd->recon.copy8x4     = vp8_copy_mem8x4_neon;
-    rtcd->recon.recon       = vp8_recon_b_neon;
-    rtcd->recon.recon2      = vp8_recon2b_neon;
-    rtcd->recon.recon4      = vp8_recon4b_neon;
-#elif HAVE_ARMV6
-
-    rtcd->subpix.sixtap16x16   = vp8_sixtap_predict16x16_armv6;
-    rtcd->subpix.sixtap8x8     = vp8_sixtap_predict8x8_armv6;
-    rtcd->subpix.sixtap8x4     = vp8_sixtap_predict8x4_armv6;
-    rtcd->subpix.sixtap4x4     = vp8_sixtap_predict_armv6;
-    rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_armv6;
-    rtcd->subpix.bilinear8x8   = vp8_bilinear_predict8x8_armv6;
-    rtcd->subpix.bilinear8x4   = vp8_bilinear_predict8x4_armv6;
-    rtcd->subpix.bilinear4x4   = vp8_bilinear_predict4x4_armv6;
-
-    rtcd->idct.idct1        = vp8_short_idct4x4llm_1_v6;
-    rtcd->idct.idct16       = vp8_short_idct4x4llm_v6_dual;
-    rtcd->idct.iwalsh1      = vp8_short_inv_walsh4x4_1_armv6;
-    rtcd->idct.iwalsh16     = vp8_short_inv_walsh4x4_armv6;
-
-    rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_armv6;
-    rtcd->loopfilter.normal_b_v  = vp8_loop_filter_bv_armv6;
-    rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_armv6;
-    rtcd->loopfilter.normal_b_h  = vp8_loop_filter_bh_armv6;
-    rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_armv6;
-    rtcd->loopfilter.simple_b_v  = vp8_loop_filter_bvs_armv6;
-    rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_armv6;
-    rtcd->loopfilter.simple_b_h  = vp8_loop_filter_bhs_armv6;
-
-    rtcd->recon.copy16x16   = vp8_copy_mem16x16_v6;
-    rtcd->recon.copy8x8     = vp8_copy_mem8x8_v6;
-    rtcd->recon.copy8x4     = vp8_copy_mem8x4_v6;
-    rtcd->recon.recon       = vp8_recon_b_armv6;
-    rtcd->recon.recon2      = vp8_recon2b_armv6;
-    rtcd->recon.recon4      = vp8_recon4b_armv6;
-#else
-//pure c
-    rtcd->idct.idct1        = vp8_short_idct4x4llm_1_c;
-    rtcd->idct.idct16       = vp8_short_idct4x4llm_c;
-    rtcd->idct.idct1_scalar = vp8_dc_only_idct_c;
-    rtcd->idct.iwalsh1      = vp8_short_inv_walsh4x4_1_c;
-    rtcd->idct.iwalsh16     = vp8_short_inv_walsh4x4_c;
-
-    rtcd->recon.copy16x16   = vp8_copy_mem16x16_c;
-    rtcd->recon.copy8x8     = vp8_copy_mem8x8_c;
-    rtcd->recon.copy8x4     = vp8_copy_mem8x4_c;
-    rtcd->recon.recon      = vp8_recon_b_c;
-    rtcd->recon.recon2      = vp8_recon2b_c;
-    rtcd->recon.recon4     = vp8_recon4b_c;
-
-    rtcd->subpix.sixtap16x16   = vp8_sixtap_predict16x16_c;
-    rtcd->subpix.sixtap8x8     = vp8_sixtap_predict8x8_c;
-    rtcd->subpix.sixtap8x4     = vp8_sixtap_predict8x4_c;
-    rtcd->subpix.sixtap4x4     = vp8_sixtap_predict_c;
-    rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_c;
-    rtcd->subpix.bilinear8x8   = vp8_bilinear_predict8x8_c;
-    rtcd->subpix.bilinear8x4   = vp8_bilinear_predict8x4_c;
-    rtcd->subpix.bilinear4x4   = vp8_bilinear_predict4x4_c;
-
-    rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_c;
-    rtcd->loopfilter.normal_b_v  = vp8_loop_filter_bv_c;
-    rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_c;
-    rtcd->loopfilter.normal_b_h  = vp8_loop_filter_bh_c;
-    rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_c;
-    rtcd->loopfilter.simple_b_v  = vp8_loop_filter_bvs_c;
-    rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_c;
-    rtcd->loopfilter.simple_b_h  = vp8_loop_filter_bhs_c;
-#endif
-
-#if CONFIG_POSTPROC || (CONFIG_VP8_ENCODER && CONFIG_PSNR)
-    rtcd->postproc.down        = vp8_mbpost_proc_down_c;
-    rtcd->postproc.across      = vp8_mbpost_proc_across_ip_c;
-    rtcd->postproc.downacross  = vp8_post_proc_down_and_across_c;
-    rtcd->postproc.addnoise    = vp8_plane_add_noise_c;
-#endif
-#endif
-
-#if HAVE_ARMV7
-    vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby_neon;
-    vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s_neon;
-#elif HAVE_ARMV6
-    vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby;
-    vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s;
-#else
-    vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby;
-    vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s;
-
-#endif
-
-}
diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h

index 75dd4f79d2af409a13c48b3196bf734af4c7ab48..a81bc8b95fb22500df57a487451dff3db683d9ad 100644 (file)
--- a/vp8/common/blockd.h
+++ b/vp8/common/blockd.h
@@ -169,7 +169,7 @@ typedef struct
          MV  as_mv;
      } mv;
  
-    char partitioning;
+    unsigned char partitioning;
      unsigned char mb_skip_coeff;                                //does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens
      unsigned char dc_diff;
      unsigned char need_to_clamp_mvs;
@@ -195,7 +195,7 @@ typedef struct
      short *diff;
      short *reference;
  
-    short(*dequant)[4];
+    short *dequant;
  
      // 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries
      unsigned char **base_pre;
diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c

index c04e31ffe3ca87a5b3f1f4cdaf693a044b5c975d..0ef375e334fa2b72a4f0a3638ab593d44bd2b3b7 100644 (file)
--- a/vp8/common/generic/systemdependent.c
+++ b/vp8/common/generic/systemdependent.c
@@ -18,6 +18,7 @@
  #include "onyxc_int.h"
  
  extern void vp8_arch_x86_common_init(VP8_COMMON *ctx);
+extern void vp8_arch_arm_common_init(VP8_COMMON *ctx);
  
  void (*vp8_build_intra_predictors_mby_ptr)(MACROBLOCKD *x);
  extern void vp8_build_intra_predictors_mby(MACROBLOCKD *x);
@@ -77,4 +78,8 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
      vp8_arch_x86_common_init(ctx);
  #endif
  
+#if ARCH_ARM
+    vp8_arch_arm_common_init(ctx);
+#endif
+
  }
diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h

index 132765d18542db51a882d668fcc7938c8b674167..d12143d4d911870171b524386ba7dd5c7937fd16 100644 (file)
--- a/vp8/common/onyxc_int.h
+++ b/vp8/common/onyxc_int.h
@@ -74,6 +74,7 @@ typedef struct VP8_COMMON_RTCD
      vp8_subpix_rtcd_vtable_t      subpix;
      vp8_loopfilter_rtcd_vtable_t  loopfilter;
      vp8_postproc_rtcd_vtable_t    postproc;
+    int                           flags;
  #else
      int unused;
  #endif
@@ -83,9 +84,9 @@ typedef struct VP8Common
  {
      struct vpx_internal_error_info  error;
  
-    DECLARE_ALIGNED(16, short, Y1dequant[QINDEX_RANGE][4][4]);
-    DECLARE_ALIGNED(16, short, Y2dequant[QINDEX_RANGE][4][4]);
-    DECLARE_ALIGNED(16, short, UVdequant[QINDEX_RANGE][4][4]);
+    DECLARE_ALIGNED(16, short, Y1dequant[QINDEX_RANGE][16]);
+    DECLARE_ALIGNED(16, short, Y2dequant[QINDEX_RANGE][16]);
+    DECLARE_ALIGNED(16, short, UVdequant[QINDEX_RANGE][16]);
  
      int Width;
      int Height;
diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c

index 0c8cf13bfbc5cab96d402ae33d9e3ad6e2de0ea9..df18c7c75fbe5bb8de366b8e94c9d0ffb47814a1 100644 (file)
--- a/vp8/common/postproc.c
+++ b/vp8/common/postproc.c
@@ -76,7 +76,7 @@ const short vp8_rv[] =
  
  
  extern void vp8_blit_text(const char *msg, unsigned char *address, const int pitch);
-
+extern void vp8_blit_line(int x0, int x1, int y0, int y1, unsigned char *image, const int pitch);
  /***********************************************************************************************************
   */
  void vp8_post_proc_down_and_across_c
@@ -450,6 +450,45 @@ void vp8_plane_add_noise_c(unsigned char *Start, char *noise,
  #define RTCD_VTABLE(oci) NULL
  #endif
  
+static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int height)
+{
+    int dx = *x1 - x0;
+    int dy = *y1 - y0;
+
+    if (*x1 > width)
+    {
+        *x1 = width;
+        if (dy)
+            *y1 = ((width-x0)*dy)/dx + y0;
+        dx = *x1 - x0;
+        dy = *y1 - y0;
+    }
+    if (*x1 < 0)
+    {
+        *x1 = 0;
+        if (dy)
+            *y1 = ((0-x0)*dy)/dx + y0;
+        dx = *x1 - x0;
+        dy = *y1 - y0;
+    }
+    if (*y1 > height)
+    {
+        *y1 = height;
+        if (dx)
+            *x1 = ((height-y0)*dx)/dy + x0;
+        dx = *x1 - x0;
+        dy = *y1 - y0;
+    }
+    if (*y1 < 0)
+    {
+        *y1 = 0;
+        if (dx)
+            *x1 = ((0-y0)*dx)/dy + x0;
+        dx = *x1 - x0;
+        dy = *y1 - y0;
+    }
+}
+
  int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_level, int noise_level, int flags)
  {
      char message[512];
@@ -622,8 +661,37 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
  #endif
  
      }
+    else if (flags & VP8D_DEBUG_LEVEL5)
+    {
+        YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
+        int width  = post->y_width;
+        int height = post->y_height;
+        int mb_cols = width  >> 4;
+        unsigned char *y_buffer = oci->post_proc_buffer.y_buffer;
+        int y_stride = oci->post_proc_buffer.y_stride;
+        MODE_INFO *mi = oci->mi;
+        int x0, y0;
+
+        for (y0 = 8; y0 < (height + 8); y0 += 16)
+        {
+            for (x0 = 8; x0 < (width + 8); x0 += 16)
+            {
+               int x1, y1;
+               if (mi->mbmi.mode >= NEARESTMV)
+                {
+                    MV *mv = &mi->mbmi.mv.as_mv;
  
+                    x1 = x0 + (mv->col >> 3);
+                    y1 = y0 + (mv->row >> 3);
  
+                    constrain_line (x0, &x1, y0, &y1, width, height);
+                    vp8_blit_line (x0, x1, y0, y1, y_buffer, y_stride);
+                }
+                mi++;
+            }
+            mi++;
+        }
+    }
  
      *dest = oci->post_proc_buffer;
  
diff --git a/vp8/common/ppflags.h b/vp8/common/ppflags.h

index b1f925c4481181c7437ac0b127f5e178cfbcbc5c..a1e2330bbc781bfb7c70992664ddb04a9eddefa1 100644 (file)
--- a/vp8/common/ppflags.h
+++ b/vp8/common/ppflags.h
@@ -21,6 +21,7 @@ enum
      VP8D_DEBUG_LEVEL2   = 16,
      VP8D_DEBUG_LEVEL3   = 32,
      VP8D_DEBUG_LEVEL4   = 64,
+    VP8D_DEBUG_LEVEL5   = 128,
  };
  
  #endif
diff --git a/vp8/common/reconinter.c b/vp8/common/reconinter.c

index ffdc660c2e90ac67c547b51d6e70b9066d45531e..2a7f1290808d447c3f442ccffd0ff419f3438487 100644 (file)
--- a/vp8/common/reconinter.c
+++ b/vp8/common/reconinter.c
@@ -669,11 +669,15 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)
  
                  if (d0->bmi.mv.as_mv.row & 7 || d0->bmi.mv.as_mv.col & 7)
                  {
-                    x->subpixel_predict8x4(ptr, d0->pre_stride, d0->bmi.mv.as_mv.col & 7, d0->bmi.mv.as_mv.row & 7, dst_ptr, x->dst.y_stride);
+                    x->subpixel_predict8x4(ptr, d0->pre_stride,
+                        d0->bmi.mv.as_mv.col & 7,
+                        d0->bmi.mv.as_mv.row & 7,
+                        dst_ptr, x->dst.uv_stride);
                  }
                  else
                  {
-                    RECON_INVOKE(&x->rtcd->recon, copy8x4)(ptr, d0->pre_stride, dst_ptr, x->dst.y_stride);
+                    RECON_INVOKE(&x->rtcd->recon, copy8x4)(ptr,
+                        d0->pre_stride, dst_ptr, x->dst.uv_stride);
                  }
              }
              else
diff --git a/vp8/common/textblit.c b/vp8/common/textblit.c

index da40f93525781bd8b7709753631913bc61d27049..b7922d38531aa94ffbb3c6e31dc7ff5f662d957f 100644 (file)
--- a/vp8/common/textblit.c
+++ b/vp8/common/textblit.c
@@ -8,7 +8,7 @@
   *  be found in the AUTHORS file in the root of the source tree.
   */
  
-
+#include <stdlib.h>
  
  
  void vp8_blit_text(const char *msg, unsigned char *address, const int pitch)
@@ -51,3 +51,80 @@ void vp8_blit_text(const char *msg, unsigned char *address, const int pitch)
          colpos++;
      }
  }
+
+static void plot (const int x, const int y, unsigned char *image, const int pitch)
+{
+    image [x+y*pitch] ^= 255;
+}
+
+// Bresenham line algorithm
+void vp8_blit_line(int x0, int x1, int y0, int y1, unsigned char *image, const int pitch)
+{
+    int steep = abs(y1 - y0) > abs(x1 - x0);
+    int deltax, deltay;
+    int error, ystep, y, x;
+
+    if (steep)
+    {
+        int t;
+        t = x0;
+        x0 = y0;
+        y0 = t;
+
+        t = x1;
+        x1 = y1;
+        y1 = t;
+    }
+
+    if (x0 > x1)
+    {
+        int t;
+        t = x0;
+        x0 = x1;
+        x1 = t;
+
+        t = y0;
+        y0 = y1;
+        y1 = t;
+    }
+
+    deltax = x1 - x0;
+    deltay = abs(y1 - y0);
+    error  = deltax / 2;
+
+    y = y0;
+
+    if (y0 < y1)
+        ystep = 1;
+    else
+        ystep = -1;
+
+    if (steep)
+    {
+        for (x = x0; x <= x1; x++)
+        {
+            plot(y,x, image, pitch);
+
+            error = error - deltay;
+            if (error < 0)
+            {
+                y = y + ystep;
+                error = error + deltax;
+            }
+        }
+    }
+    else
+    {
+        for (x = x0; x <= x1; x++)
+        {
+            plot(x,y, image, pitch);
+
+            error = error - deltay;
+            if (error < 0)
+            {
+                y = y + ystep;
+                error = error + deltax;
+            }
+        }
+    }
+}
diff --git a/vp8/decoder/arm/arm_dsystemdependent.c b/vp8/decoder/arm/arm_dsystemdependent.c

new file mode 100644 (file)

index 0000000..77cff47
--- /dev/null
+++ b/vp8/decoder/arm/arm_dsystemdependent.c
@@ -0,0 +1,66 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_ports/config.h"
+#include "vpx_ports/arm.h"
+#include "blockd.h"
+#include "pragmas.h"
+#include "postproc.h"
+#include "dboolhuff.h"
+#include "dequantize.h"
+#include "onyxd_int.h"
+
+void vp8_arch_arm_decode_init(VP8D_COMP *pbi)
+{
+#if CONFIG_RUNTIME_CPU_DETECT
+    int flags = pbi->common.rtcd.flags;
+    int has_edsp = flags & HAS_EDSP;
+    int has_media = flags & HAS_MEDIA;
+    int has_neon = flags & HAS_NEON;
+
+#if HAVE_ARMV6
+    if (has_media)
+    {
+        pbi->dequant.block               = vp8_dequantize_b_v6;
+        pbi->dequant.idct_add            = vp8_dequant_idct_add_v6;
+        pbi->dequant.dc_idct_add         = vp8_dequant_dc_idct_add_v6;
+        pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_v6;
+        pbi->dequant.idct_add_y_block    = vp8_dequant_idct_add_y_block_v6;
+        pbi->dequant.idct_add_uv_block   = vp8_dequant_idct_add_uv_block_v6;
+#if 0 //For use with RTCD, when implemented
+        pbi->dboolhuff.start             = vp8dx_start_decode_c;
+        pbi->dboolhuff.fill              = vp8dx_bool_decoder_fill_c;
+        pbi->dboolhuff.debool            = vp8dx_decode_bool_c;
+        pbi->dboolhuff.devalue           = vp8dx_decode_value_c;
+#endif
+    }
+#endif
+
+#if HAVE_ARMV7
+    if (has_neon)
+    {
+        pbi->dequant.block               = vp8_dequantize_b_neon;
+        pbi->dequant.idct_add            = vp8_dequant_idct_add_neon;
+        /*This is not used: NEON always dequants two blocks at once.
+        pbi->dequant.dc_idct_add         = vp8_dequant_dc_idct_add_neon;*/
+        pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_neon;
+        pbi->dequant.idct_add_y_block    = vp8_dequant_idct_add_y_block_neon;
+        pbi->dequant.idct_add_uv_block   = vp8_dequant_idct_add_uv_block_neon;
+#if 0 //For use with RTCD, when implemented
+        pbi->dboolhuff.start             = vp8dx_start_decode_c;
+        pbi->dboolhuff.fill              = vp8dx_bool_decoder_fill_c;
+        pbi->dboolhuff.debool            = vp8dx_decode_bool_c;
+        pbi->dboolhuff.devalue           = vp8dx_decode_value_c;
+#endif
+    }
+#endif
+#endif
+}
diff --git a/vp8/decoder/arm/dequantize_arm.c b/vp8/decoder/arm/dequantize_arm.c

index 39265879b69eed4e40079a6473d8ca71f9d3d306..b3e14b7935c2c94e57d1144b2da04e48438f48be 100644 (file)
--- a/vp8/decoder/arm/dequantize_arm.c
+++ b/vp8/decoder/arm/dequantize_arm.c
@@ -30,7 +30,7 @@ void vp8_dequantize_b_neon(BLOCKD *d)
      int i;
      short *DQ  = d->dqcoeff;
      short *Q   = d->qcoeff;
-    short *DQC = &d->dequant[0][0];
+    short *DQC = d->dequant;
  
      vp8_dequantize_b_loop_neon(Q, DQC, DQ);
  }
@@ -42,7 +42,7 @@ void vp8_dequantize_b_v6(BLOCKD *d)
      int i;
      short *DQ  = d->dqcoeff;
      short *Q   = d->qcoeff;
-    short *DQC = &d->dequant[0][0];
+    short *DQC = d->dequant;
  
      vp8_dequantize_b_loop_v6(Q, DQC, DQ);
  }
diff --git a/vp8/decoder/arm/dequantize_arm.h b/vp8/decoder/arm/dequantize_arm.h

index 40151e01ab9a1676df4f51c5257baf965c561bf6..b7d800d2603c639b39071e1f9fa367a0cde49f0c 100644 (file)
--- a/vp8/decoder/arm/dequantize_arm.h
+++ b/vp8/decoder/arm/dequantize_arm.h
@@ -20,6 +20,7 @@ extern prototype_dequant_dc_idct_add_y_block(vp8_dequant_dc_idct_add_y_block_v6)
  extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block_v6);
  extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_v6);
  
+#if !CONFIG_RUNTIME_CPU_DETECT
  #undef  vp8_dequant_block
  #define vp8_dequant_block vp8_dequantize_b_v6
  
@@ -38,6 +39,7 @@ extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_v6);
  #undef vp8_dequant_idct_add_uv_block
  #define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_v6
  #endif
+#endif
  
  #if HAVE_ARMV7
  extern prototype_dequant_block(vp8_dequantize_b_neon);
@@ -47,6 +49,7 @@ extern prototype_dequant_dc_idct_add_y_block(vp8_dequant_dc_idct_add_y_block_neo
  extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block_neon);
  extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_neon);
  
+#if !CONFIG_RUNTIME_CPU_DETECT
  #undef  vp8_dequant_block
  #define vp8_dequant_block vp8_dequantize_b_neon
  
@@ -65,5 +68,6 @@ extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_neon);
  #undef vp8_dequant_idct_add_uv_block
  #define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_neon
  #endif
+#endif
  
  #endif
diff --git a/vp8/decoder/arm/dsystemdependent.c b/vp8/decoder/arm/dsystemdependent.c

deleted file mode 100644 (file)

index 9dcf7b6..0000000
--- a/vp8/decoder/arm/dsystemdependent.c
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "vpx_ports/config.h"
-#include "blockd.h"
-#include "pragmas.h"
-#include "postproc.h"
-#include "dboolhuff.h"
-#include "dequantize.h"
-#include "onyxd_int.h"
-
-void vp8_dmachine_specific_config(VP8D_COMP *pbi)
-{
-#if CONFIG_RUNTIME_CPU_DETECT
-    pbi->mb.rtcd         = &pbi->common.rtcd;
-#if HAVE_ARMV7
-    pbi->dequant.block   = vp8_dequantize_b_neon;
-    pbi->dboolhuff.start = vp8dx_start_decode_c;
-    pbi->dboolhuff.fill  = vp8dx_bool_decoder_fill_c;
-    pbi->dboolhuff.debool = vp8dx_decode_bool_c;
-    pbi->dboolhuff.devalue = vp8dx_decode_value_c;
-
-#elif HAVE_ARMV6
-    pbi->dequant.block   = vp8_dequantize_b_v6;
-    pbi->dboolhuff.start = vp8dx_start_decode_c;
-    pbi->dboolhuff.fill  = vp8dx_bool_decoder_fill_c;
-    pbi->dboolhuff.debool = vp8dx_decode_bool_c;
-    pbi->dboolhuff.devalue = vp8dx_decode_value_c;
-#endif
-#endif
-}
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c

index efe0ad8e2d209a917cdbc8ff9401e9db7f1ed3c5..273bdb694c2eb6102e4fcc8f6fe88378568870f2 100644 (file)
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -40,27 +40,24 @@
  
  void vp8cx_init_de_quantizer(VP8D_COMP *pbi)
  {
-    int r, c;
      int i;
      int Q;
      VP8_COMMON *const pc = & pbi->common;
  
      for (Q = 0; Q < QINDEX_RANGE; Q++)
      {
-        pc->Y1dequant[Q][0][0] = (short)vp8_dc_quant(Q, pc->y1dc_delta_q);
-        pc->Y2dequant[Q][0][0] = (short)vp8_dc2quant(Q, pc->y2dc_delta_q);
-        pc->UVdequant[Q][0][0] = (short)vp8_dc_uv_quant(Q, pc->uvdc_delta_q);
+        pc->Y1dequant[Q][0] = (short)vp8_dc_quant(Q, pc->y1dc_delta_q);
+        pc->Y2dequant[Q][0] = (short)vp8_dc2quant(Q, pc->y2dc_delta_q);
+        pc->UVdequant[Q][0] = (short)vp8_dc_uv_quant(Q, pc->uvdc_delta_q);
  
          // all the ac values = ;
          for (i = 1; i < 16; i++)
          {
              int rc = vp8_default_zig_zag1d[i];
-            r = (rc >> 2);
-            c = (rc & 3);
  
-            pc->Y1dequant[Q][r][c] = (short)vp8_ac_yquant(Q);
-            pc->Y2dequant[Q][r][c] = (short)vp8_ac2quant(Q, pc->y2ac_delta_q);
-            pc->UVdequant[Q][r][c] = (short)vp8_ac_uv_quant(Q, pc->uvac_delta_q);
+            pc->Y1dequant[Q][rc] = (short)vp8_ac_yquant(Q);
+            pc->Y2dequant[Q][rc] = (short)vp8_ac2quant(Q, pc->y2ac_delta_q);
+            pc->UVdequant[Q][rc] = (short)vp8_ac_uv_quant(Q, pc->uvac_delta_q);
          }
      }
  }
@@ -253,7 +250,7 @@ void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd)
          }
  
          DEQUANT_INVOKE (&pbi->dequant, dc_idct_add_y_block)
-                        (xd->qcoeff, &xd->block[0].dequant[0][0],
+                        (xd->qcoeff, xd->block[0].dequant,
                           xd->predictor, xd->dst.y_buffer,
                           xd->dst.y_stride, xd->eobs, xd->block[24].diff);
      }
@@ -268,13 +265,13 @@ void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd)
              if (xd->eobs[i] > 1)
              {
                  DEQUANT_INVOKE(&pbi->dequant, idct_add)
-                    (b->qcoeff, &b->dequant[0][0],  b->predictor,
+                    (b->qcoeff, b->dequant,  b->predictor,
                      *(b->base_dst) + b->dst, 16, b->dst_stride);
              }
              else
              {
                  IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add)
-                    (b->qcoeff[0] * b->dequant[0][0], b->predictor,
+                    (b->qcoeff[0] * b->dequant[0], b->predictor,
                      *(b->base_dst) + b->dst, 16, b->dst_stride);
                  ((int *)b->qcoeff)[0] = 0;
              }
@@ -284,13 +281,13 @@ void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd)
      else
      {
          DEQUANT_INVOKE (&pbi->dequant, idct_add_y_block)
-                        (xd->qcoeff, &xd->block[0].dequant[0][0],
+                        (xd->qcoeff, xd->block[0].dequant,
                           xd->predictor, xd->dst.y_buffer,
                           xd->dst.y_stride, xd->eobs);
      }
  
      DEQUANT_INVOKE (&pbi->dequant, idct_add_uv_block)
-                    (xd->qcoeff+16*16, &xd->block[16].dequant[0][0],
+                    (xd->qcoeff+16*16, xd->block[16].dequant,
                       xd->predictor+16*16, xd->dst.u_buffer, xd->dst.v_buffer,
                       xd->dst.uv_stride, xd->eobs+16);
  }
@@ -462,7 +459,7 @@ static void setup_token_decoder(VP8D_COMP *pbi,
              partition_size = user_data_end - partition;
          }
  
-        if (partition + partition_size > user_data_end)
+        if (user_data_end - partition < partition_size)
              vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
                                 "Truncated packet or corrupt partition "
                                 "%d length", i + 1);
@@ -564,12 +561,15 @@ int vp8_decode_frame(VP8D_COMP *pbi)
      MACROBLOCKD *const xd  = & pbi->mb;
      const unsigned char *data = (const unsigned char *)pbi->Source;
      const unsigned char *const data_end = data + pbi->source_sz;
-    int first_partition_length_in_bytes;
+    unsigned int first_partition_length_in_bytes;
  
      int mb_row;
      int i, j, k, l;
      const int *const mb_feature_data_bits = vp8_mb_feature_data_bits;
  
+    if (data_end - data < 3)
+        vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
+                           "Truncated packet");
      pc->frame_type = (FRAME_TYPE)(data[0] & 1);
      pc->version = (data[0] >> 1) & 7;
      pc->show_frame = (data[0] >> 4) & 1;
@@ -577,7 +577,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
          (data[0] | (data[1] << 8) | (data[2] << 16)) >> 5;
      data += 3;
  
-    if (data + first_partition_length_in_bytes > data_end)
+    if (data_end - data < first_partition_length_in_bytes)
          vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
                             "Truncated packet or corrupt partition 0 length");
      vp8_setup_version(pc);
diff --git a/vp8/decoder/dequantize.c b/vp8/decoder/dequantize.c

index 8cfa2a32e786bcdf39c2961278365e02126a5c05..f5d576ac7492e8d660a0132ae8a5c4238ef7e130 100644 (file)
--- a/vp8/decoder/dequantize.c
+++ b/vp8/decoder/dequantize.c
@@ -24,7 +24,7 @@ void vp8_dequantize_b_c(BLOCKD *d)
      int i;
      short *DQ  = d->dqcoeff;
      short *Q   = d->qcoeff;
-    short *DQC = &d->dequant[0][0];
+    short *DQC = d->dequant;
  
      for (i = 0; i < 16; i++)
      {
diff --git a/vp8/decoder/generic/dsystemdependent.c b/vp8/decoder/generic/dsystemdependent.c

index 60f2af5b823773608ea4da9466dbab60d5465f59..84de7af435ddf69a08299077d6696207e0704a8d 100644 (file)
--- a/vp8/decoder/generic/dsystemdependent.c
+++ b/vp8/decoder/generic/dsystemdependent.c
@@ -14,6 +14,7 @@
  #include "onyxd_int.h"
  
  extern void vp8_arch_x86_decode_init(VP8D_COMP *pbi);
+extern void vp8_arch_arm_decode_init(VP8D_COMP *pbi);
  
  void vp8_dmachine_specific_config(VP8D_COMP *pbi)
  {
@@ -37,4 +38,8 @@ void vp8_dmachine_specific_config(VP8D_COMP *pbi)
  #if ARCH_X86 || ARCH_X86_64
      vp8_arch_x86_decode_init(pbi);
  #endif
+
+#if ARCH_ARM
+    vp8_arch_arm_decode_init(pbi);
+#endif
  }
diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c

index 7d716b0a069d7566e4fa5fe01c28088128a13fb9..b5a6e3e858bb918fccf4dd49e9d598e21c35b94a 100644 (file)
--- a/vp8/decoder/onyxd_if.c
+++ b/vp8/decoder/onyxd_if.c
@@ -30,6 +30,9 @@
  #include "systemdependent.h"
  #include "vpx_ports/vpx_timer.h"
  #include "detokenize.h"
+#if ARCH_ARM
+#include "vpx_ports/arm.h"
+#endif
  
  extern void vp8_init_loop_filter(VP8_COMMON *cm);
  extern void vp8cx_init_de_quantizer(VP8D_COMP *pbi);
@@ -224,7 +227,6 @@ int vp8dx_set_reference(VP8D_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_C
  #if HAVE_ARMV7
  extern void vp8_push_neon(INT64 *store);
  extern void vp8_pop_neon(INT64 *store);
-static INT64 dx_store_reg[8];
  #endif
  
  static int get_free_fb (VP8_COMMON *cm)
@@ -312,6 +314,9 @@ static int swap_frame_buffers (VP8_COMMON *cm)
  
  int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsigned char *source, INT64 time_stamp)
  {
+#if HAVE_ARMV7
+    INT64 dx_store_reg[8];
+#endif
      VP8D_COMP *pbi = (VP8D_COMP *) ptr;
      VP8_COMMON *cm = &pbi->common;
      int retcode = 0;
@@ -327,35 +332,57 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
  
      pbi->common.error.error_code = VPX_CODEC_OK;
  
+#if HAVE_ARMV7
+#if CONFIG_RUNTIME_CPU_DETECT
+    if (cm->rtcd.flags & HAS_NEON)
+#endif
+    {
+        vp8_push_neon(dx_store_reg);
+    }
+#endif
+
+    cm->new_fb_idx = get_free_fb (cm);
+
      if (setjmp(pbi->common.error.jmp))
      {
+#if HAVE_ARMV7
+#if CONFIG_RUNTIME_CPU_DETECT
+        if (cm->rtcd.flags & HAS_NEON)
+#endif
+        {
+            vp8_pop_neon(dx_store_reg);
+        }
+#endif
          pbi->common.error.setjmp = 0;
+        if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0)
+          cm->fb_idx_ref_cnt[cm->new_fb_idx]--;
          return -1;
      }
  
      pbi->common.error.setjmp = 1;
  
-#if HAVE_ARMV7
-    vp8_push_neon(dx_store_reg);
-#endif
-
      vpx_usec_timer_start(&timer);
  
      //cm->current_video_frame++;
      pbi->Source = source;
      pbi->source_sz = size;
  
-    cm->new_fb_idx = get_free_fb (cm);
-
      retcode = vp8_decode_frame(pbi);
  
      if (retcode < 0)
      {
  #if HAVE_ARMV7
-        vp8_pop_neon(dx_store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+        if (cm->rtcd.flags & HAS_NEON)
+#endif
+        {
+            vp8_pop_neon(dx_store_reg);
+        }
  #endif
          pbi->common.error.error_code = VPX_CODEC_ERROR;
          pbi->common.error.setjmp = 0;
+        if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0)
+          cm->fb_idx_ref_cnt[cm->new_fb_idx]--;
          return retcode;
      }
  
@@ -363,6 +390,14 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
      {
          if (swap_frame_buffers (cm))
          {
+#if HAVE_ARMV7
+#if CONFIG_RUNTIME_CPU_DETECT
+            if (cm->rtcd.flags & HAS_NEON)
+#endif
+            {
+                vp8_pop_neon(dx_store_reg);
+            }
+#endif
              pbi->common.error.error_code = VPX_CODEC_ERROR;
              pbi->common.error.setjmp = 0;
              return -1;
@@ -371,6 +406,14 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
      {
          if (swap_frame_buffers (cm))
          {
+#if HAVE_ARMV7
+#if CONFIG_RUNTIME_CPU_DETECT
+            if (cm->rtcd.flags & HAS_NEON)
+#endif
+            {
+                vp8_pop_neon(dx_store_reg);
+            }
+#endif
              pbi->common.error.error_code = VPX_CODEC_ERROR;
              pbi->common.error.setjmp = 0;
              return -1;
@@ -451,7 +494,12 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
  #endif
  
  #if HAVE_ARMV7
-    vp8_pop_neon(dx_store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+    if (cm->rtcd.flags & HAS_NEON)
+#endif
+    {
+        vp8_pop_neon(dx_store_reg);
+    }
  #endif
      pbi->common.error.setjmp = 0;
      return retcode;
diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c

index 56dd5ef8e259043876483c7d678c7e2833fdea8d..2d7f7b9b818973e959f6f0bd031b95e30c1d92b5 100644 (file)
--- a/vp8/decoder/threading.c
+++ b/vp8/decoder/threading.c
@@ -184,7 +184,7 @@ void vp8mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb
          }
  
          DEQUANT_INVOKE (&pbi->dequant, dc_idct_add_y_block)
-                        (xd->qcoeff, &xd->block[0].dequant[0][0],
+                        (xd->qcoeff, xd->block[0].dequant,
                           xd->predictor, xd->dst.y_buffer,
                           xd->dst.y_stride, xd->eobs, xd->block[24].diff);
      }
@@ -198,13 +198,13 @@ void vp8mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb
              if (xd->eobs[i] > 1)
              {
                  DEQUANT_INVOKE(&pbi->dequant, idct_add)
-                    (b->qcoeff, &b->dequant[0][0],  b->predictor,
+                    (b->qcoeff, b->dequant,  b->predictor,
                      *(b->base_dst) + b->dst, 16, b->dst_stride);
              }
              else
              {
                  IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add)
-                    (b->qcoeff[0] * b->dequant[0][0], b->predictor,
+                    (b->qcoeff[0] * b->dequant[0], b->predictor,
                      *(b->base_dst) + b->dst, 16, b->dst_stride);
                  ((int *)b->qcoeff)[0] = 0;
              }
@@ -213,13 +213,13 @@ void vp8mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb
      else
      {
          DEQUANT_INVOKE (&pbi->dequant, idct_add_y_block)
-                        (xd->qcoeff, &xd->block[0].dequant[0][0],
+                        (xd->qcoeff, xd->block[0].dequant,
                           xd->predictor, xd->dst.y_buffer,
                           xd->dst.y_stride, xd->eobs);
      }
  
      DEQUANT_INVOKE (&pbi->dequant, idct_add_uv_block)
-                    (xd->qcoeff+16*16, &xd->block[16].dequant[0][0],
+                    (xd->qcoeff+16*16, xd->block[16].dequant,
                       xd->predictor+16*16, xd->dst.u_buffer, xd->dst.v_buffer,
                       xd->dst.uv_stride, xd->eobs+16);
  #else
diff --git a/vp8/encoder/arm/arm_csystemdependent.c b/vp8/encoder/arm/arm_csystemdependent.c

new file mode 100644 (file)

index 0000000..8736fcf
--- /dev/null
+++ b/vp8/encoder/arm/arm_csystemdependent.c
@@ -0,0 +1,136 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_ports/config.h"
+#include "vpx_ports/arm.h"
+#include "variance.h"
+#include "onyx_int.h"
+
+extern void (*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
+extern void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
+extern void vpxyv12_copy_partial_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
+
+void vp8_arch_arm_encoder_init(VP8_COMP *cpi)
+{
+#if CONFIG_RUNTIME_CPU_DETECT
+    int flags = cpi->common.rtcd.flags;
+    int has_edsp = flags & HAS_EDSP;
+    int has_media = flags & HAS_MEDIA;
+    int has_neon = flags & HAS_NEON;
+
+#if HAVE_ARMV6
+    if (has_media)
+    {
+        /*cpi->rtcd.variance.sad16x16              = vp8_sad16x16_c;
+        cpi->rtcd.variance.sad16x8               = vp8_sad16x8_c;
+        cpi->rtcd.variance.sad8x16               = vp8_sad8x16_c;
+        cpi->rtcd.variance.sad8x8                = vp8_sad8x8_c;
+        cpi->rtcd.variance.sad4x4                = vp8_sad4x4_c;*/
+
+        /*cpi->rtcd.variance.var4x4                = vp8_variance4x4_c;
+        cpi->rtcd.variance.var8x8                = vp8_variance8x8_c;
+        cpi->rtcd.variance.var8x16               = vp8_variance8x16_c;
+        cpi->rtcd.variance.var16x8               = vp8_variance16x8_c;
+        cpi->rtcd.variance.var16x16              = vp8_variance16x16_c;*/
+
+        /*cpi->rtcd.variance.subpixvar4x4          = vp8_sub_pixel_variance4x4_c;
+        cpi->rtcd.variance.subpixvar8x8          = vp8_sub_pixel_variance8x8_c;
+        cpi->rtcd.variance.subpixvar8x16         = vp8_sub_pixel_variance8x16_c;
+        cpi->rtcd.variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_c;
+        cpi->rtcd.variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_c;*/
+
+        /*cpi->rtcd.variance.mse16x16              = vp8_mse16x16_c;
+        cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_c;*/
+
+        /*cpi->rtcd.variance.get16x16prederror     = vp8_get16x16pred_error_c;
+        cpi->rtcd.variance.get8x8var             = vp8_get8x8var_c;
+        cpi->rtcd.variance.get16x16var           = vp8_get16x16var_c;;
+        cpi->rtcd.variance.get4x4sse_cs          = vp8_get4x4sse_cs_c;*/
+
+        /*cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_c;
+        cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_c;
+        cpi->rtcd.fdct.fast4x4                   = vp8_fast_fdct4x4_c;
+        cpi->rtcd.fdct.fast8x4                   = vp8_fast_fdct8x4_c;*/
+        cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_armv6;
+
+        /*cpi->rtcd.encodemb.berr                  = vp8_block_error_c;
+        cpi->rtcd.encodemb.mberr                 = vp8_mbblock_error_c;
+        cpi->rtcd.encodemb.mbuverr               = vp8_mbuverror_c;
+        cpi->rtcd.encodemb.subb                  = vp8_subtract_b_c;
+        cpi->rtcd.encodemb.submby                = vp8_subtract_mby_c;
+        cpi->rtcd.encodemb.submbuv               = vp8_subtract_mbuv_c;*/
+
+        /*cpi->rtcd.quantize.quantb                = vp8_regular_quantize_b;
+        cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_c;*/
+    }
+#endif
+
+#if HAVE_ARMV7
+    if (has_neon)
+    {
+        cpi->rtcd.variance.sad16x16              = vp8_sad16x16_neon;
+        cpi->rtcd.variance.sad16x8               = vp8_sad16x8_neon;
+        cpi->rtcd.variance.sad8x16               = vp8_sad8x16_neon;
+        cpi->rtcd.variance.sad8x8                = vp8_sad8x8_neon;
+        cpi->rtcd.variance.sad4x4                = vp8_sad4x4_neon;
+
+        /*cpi->rtcd.variance.var4x4                = vp8_variance4x4_c;*/
+        cpi->rtcd.variance.var8x8                = vp8_variance8x8_neon;
+        cpi->rtcd.variance.var8x16               = vp8_variance8x16_neon;
+        cpi->rtcd.variance.var16x8               = vp8_variance16x8_neon;
+        cpi->rtcd.variance.var16x16              = vp8_variance16x16_neon;
+
+        /*cpi->rtcd.variance.subpixvar4x4          = vp8_sub_pixel_variance4x4_c;*/
+        cpi->rtcd.variance.subpixvar8x8          = vp8_sub_pixel_variance8x8_neon;
+        /*cpi->rtcd.variance.subpixvar8x16         = vp8_sub_pixel_variance8x16_c;
+        cpi->rtcd.variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_c;*/
+        cpi->rtcd.variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_neon;
+
+        cpi->rtcd.variance.mse16x16              = vp8_mse16x16_neon;
+        /*cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_c;*/
+
+        cpi->rtcd.variance.get16x16prederror     = vp8_get16x16pred_error_neon;
+        /*cpi->rtcd.variance.get8x8var             = vp8_get8x8var_c;
+        cpi->rtcd.variance.get16x16var           = vp8_get16x16var_c;*/
+        cpi->rtcd.variance.get4x4sse_cs          = vp8_get4x4sse_cs_neon;
+
+        cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_neon;
+        cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_neon;
+        cpi->rtcd.fdct.fast4x4                   = vp8_fast_fdct4x4_neon;
+        cpi->rtcd.fdct.fast8x4                   = vp8_fast_fdct8x4_neon;
+        cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_neon;
+
+        /*cpi->rtcd.encodemb.berr                  = vp8_block_error_c;
+        cpi->rtcd.encodemb.mberr                 = vp8_mbblock_error_c;
+        cpi->rtcd.encodemb.mbuverr               = vp8_mbuverror_c;*/
+        cpi->rtcd.encodemb.subb                  = vp8_subtract_b_neon;
+        cpi->rtcd.encodemb.submby                = vp8_subtract_mby_neon;
+        cpi->rtcd.encodemb.submbuv               = vp8_subtract_mbuv_neon;
+
+        /*cpi->rtcd.quantize.quantb                = vp8_regular_quantize_b;
+        cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_c;*/
+        /* The neon quantizer has not been updated to match the new exact
+         * quantizer introduced in commit e04e2935
+         */
+        /*cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_neon;*/
+    }
+#endif
+
+#if HAVE_ARMV7
+#if CONFIG_RUNTIME_CPU_DETECT
+    if (has_neon)
+#endif
+    {
+        vp8_yv12_copy_partial_frame_ptr = vpxyv12_copy_partial_frame_neon;
+    }
+#endif
+#endif
+}
diff --git a/vp8/encoder/arm/neon/boolhuff_armv7.asm b/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm

similarity index 95%

rename from vp8/encoder/arm/neon/boolhuff_armv7.asm

rename to vp8/encoder/arm/armv5te/boolhuff_armv5te.asm

index 9c4823c51fc2ce3869016f1bf87a076e3e877393..e78dc33229fe872ad9891f65de2dd8e0c9a0e1b0 100644 (file)
--- a/vp8/encoder/arm/neon/boolhuff_armv7.asm
+++ b/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm
@@ -205,17 +205,10 @@ token_count_lt_zero_se
      ldr     r5, [r0, #vp8_writer_range]
      ldr     r3, [r0, #vp8_writer_count]
  
-    ; reverse the stream of bits to be packed.  Normally
-    ; the most significant bit is peeled off and compared
-    ; in the form of (v >> --n) & 1.  ARM architecture has
-    ; the ability to set a flag based on the value of the
-    ; bit shifted off the bottom of the register.  To make
-    ; that happen the bitstream is reversed.
-    rbit    r11, r1
      rsb     r4, r10, #32                 ; 32-n
  
      ; v is kept in r1 during the token pack loop
-    lsr     r1, r11, r4                 ; v >>= 32 - n
+    lsl     r1, r1, r4                  ; r1 = v << 32 - n
  
  encode_value_loop
      sub     r7, r5, #1                  ; range-1
@@ -223,7 +216,7 @@ encode_value_loop
      ; Decisions are made based on the bit value shifted
      ; off of v, so set a flag here based on this.
      ; This value is refered to as "bb"
-    lsrs    r1, r1, #1                  ; bit = v >> n
+    lsls    r1, r1, #1                  ; bit = v >> n
      mov     r4, r7, lsl #7              ; ((range-1) * 128)
  
      mov     r7, #1
diff --git a/vp8/encoder/arm/neon/vp8_packtokens_armv7.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm

similarity index 93%

rename from vp8/encoder/arm/neon/vp8_packtokens_armv7.asm

rename to vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm

index c19ac8250649a5b9d4f2153f65294446daad6574..3233d2a96688d87018bab8e872c21058134940fb 100644 (file)
--- a/vp8/encoder/arm/neon/vp8_packtokens_armv7.asm
+++ b/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm
@@ -9,7 +9,7 @@
  ;
  
  
-    EXPORT |vp8cx_pack_tokens_armv7|
+    EXPORT |vp8cx_pack_tokens_armv5|
  
      INCLUDE vpx_vp8_enc_asm_offsets.asm
  
@@ -25,7 +25,7 @@
  ; r3 vp8_coef_encodings
  ; s0 vp8_extra_bits
  ; s1 vp8_coef_tree
-|vp8cx_pack_tokens_armv7| PROC
+|vp8cx_pack_tokens_armv5| PROC
      push    {r4-r11, lr}
  
      ; Add size of xcount * sizeof (TOKENEXTRA) to get stop
@@ -57,18 +57,11 @@ while_p_lt_stop
      movne   lr, #2                      ; i = 2
      subne   r8, r8, #1                  ; --n
  
-    ; reverse the stream of bits to be packed.  Normally
-    ; the most significant bit is peeled off and compared
-    ; in the form of (v >> --n) & 1.  ARM architecture has
-    ; the ability to set a flag based on the value of the
-    ; bit shifted off the bottom of the register.  To make
-    ; that happen the bitstream is reversed.
-    rbit    r12, r6
      rsb     r4, r8, #32                 ; 32-n
      ldr     r10, [sp, #52]              ; vp8_coef_tree
  
      ; v is kept in r12 during the token pack loop
-    lsr     r12, r12, r4                ; v >>= 32 - n
+    lsl     r12, r6, r4                ; r12 = v << 32 - n
  
  ; loop start
  token_loop
@@ -78,7 +71,7 @@ token_loop
      ; Decisions are made based on the bit value shifted
      ; off of v, so set a flag here based on this.
      ; This value is refered to as "bb"
-    lsrs    r12, r12, #1                ; bb = v >> n
+    lsls    r12, r12, #1                ; bb = v >> n
      mul     r4, r4, r7                  ; ((range-1) * pp[i>>1]))
  
      ; bb can only be 0 or 1.  So only execute this statement
@@ -172,16 +165,15 @@ token_count_lt_zero
      ldr     r10, [r12, #vp8_extra_bit_struct_tree]
      str     r10, [sp, #4]               ; b->tree
  
-    rbit    r12, r7                     ; reverse v
      rsb     r4, r8, #32
-    lsr     r12, r12, r4
+    lsl     r12, r7, r4
  
      mov     lr, #0                      ; i = 0
  
  extra_bits_loop
      ldrb    r4, [r9, lr, asr #1]            ; pp[i>>1]
      sub     r7, r5, #1                  ; range-1
-    lsrs    r12, r12, #1                ; v >> n
+    lsls    r12, r12, #1                ; v >> n
      mul     r4, r4, r7                  ; (range-1) * pp[i>>1]
      addcs   lr, lr, #1                  ; i + bb
  
diff --git a/vp8/encoder/arm/neon/vp8_packtokens_mbrow_armv7.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm

similarity index 94%

rename from vp8/encoder/arm/neon/vp8_packtokens_mbrow_armv7.asm

rename to vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm

index 0756455869dc595c2d529d87dd5fb5ce74c705f9..a9b552ae175bfb7f30528c116fdf95f537aa6abe 100644 (file)
--- a/vp8/encoder/arm/neon/vp8_packtokens_mbrow_armv7.asm
+++ b/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm
@@ -9,7 +9,7 @@
  ;
  
  
-    EXPORT |vp8cx_pack_mb_row_tokens_armv7|
+    EXPORT |vp8cx_pack_mb_row_tokens_armv5|
  
      INCLUDE vpx_vp8_enc_asm_offsets.asm
  
@@ -25,7 +25,7 @@
  ; r3 vp8_extra_bits
  ; s0 vp8_coef_tree
  
-|vp8cx_pack_mb_row_tokens_armv7| PROC
+|vp8cx_pack_mb_row_tokens_armv5| PROC
      push    {r4-r11, lr}
      sub     sp, sp, #24
  
@@ -78,18 +78,11 @@ while_p_lt_stop
      movne   lr, #2                      ; i = 2
      subne   r8, r8, #1                  ; --n
  
-    ; reverse the stream of bits to be packed.  Normally
-    ; the most significant bit is peeled off and compared
-    ; in the form of (v >> --n) & 1.  ARM architecture has
-    ; the ability to set a flag based on the value of the
-    ; bit shifted off the bottom of the register.  To make
-    ; that happen the bitstream is reversed.
-    rbit    r12, r6
      rsb     r4, r8, #32                 ; 32-n
      ldr     r10, [sp, #60]              ; vp8_coef_tree
  
      ; v is kept in r12 during the token pack loop
-    lsr     r12, r12, r4                ; v >>= 32 - n
+    lsl     r12, r6, r4                 ; r12 = v << 32 - n
  
  ; loop start
  token_loop
@@ -99,7 +92,7 @@ token_loop
      ; Decisions are made based on the bit value shifted
      ; off of v, so set a flag here based on this.
      ; This value is refered to as "bb"
-    lsrs    r12, r12, #1                ; bb = v >> n
+    lsls    r12, r12, #1                ; bb = v >> n
      mul     r4, r4, r7                  ; ((range-1) * pp[i>>1]))
  
      ; bb can only be 0 or 1.  So only execute this statement
@@ -193,16 +186,15 @@ token_count_lt_zero
      ldr     r10, [r12, #vp8_extra_bit_struct_tree]
      str     r10, [sp, #4]               ; b->tree
  
-    rbit    r12, r7                     ; reverse v
      rsb     r4, r8, #32
-    lsr     r12, r12, r4
+    lsl     r12, r7, r4
  
      mov     lr, #0                      ; i = 0
  
  extra_bits_loop
      ldrb    r4, [r9, lr, asr #1]            ; pp[i>>1]
      sub     r7, r5, #1                  ; range-1
-    lsrs    r12, r12, #1                ; v >> n
+    lsls    r12, r12, #1                ; v >> n
      mul     r4, r4, r7                  ; (range-1) * pp[i>>1]
      addcs   lr, lr, #1                  ; i + bb
  
diff --git a/vp8/encoder/arm/neon/vp8_packtokens_partitions_armv7.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm

similarity index 95%

rename from vp8/encoder/arm/neon/vp8_packtokens_partitions_armv7.asm

rename to vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm

index 10a3d985125572df07bd4fc750e388c2dcc29894..0835164e5e8755ed0b648eca1263a668e4f84e2f 100644 (file)
--- a/vp8/encoder/arm/neon/vp8_packtokens_partitions_armv7.asm
+++ b/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm
@@ -9,7 +9,7 @@
  ;
  
  
-    EXPORT |vp8cx_pack_tokens_into_partitions_armv7|
+    EXPORT |vp8cx_pack_tokens_into_partitions_armv5|
  
      INCLUDE vpx_vp8_enc_asm_offsets.asm
  
@@ -27,7 +27,7 @@
  ; s1 vp8_extra_bits,
  ; s2 const vp8_tree_index *,
  
-|vp8cx_pack_tokens_into_partitions_armv7| PROC
+|vp8cx_pack_tokens_into_partitions_armv5| PROC
      push    {r4-r11, lr}
      sub     sp, sp, #44
  
@@ -106,18 +106,11 @@ while_p_lt_stop
      movne   lr, #2                      ; i = 2
      subne   r8, r8, #1                  ; --n
  
-    ; reverse the stream of bits to be packed.  Normally
-    ; the most significant bit is peeled off and compared
-    ; in the form of (v >> --n) & 1.  ARM architecture has
-    ; the ability to set a flag based on the value of the
-    ; bit shifted off the bottom of the register.  To make
-    ; that happen the bitstream is reversed.
-    rbit    r12, r6
      rsb     r4, r8, #32                 ; 32-n
      ldr     r10, [sp, #88]              ; vp8_coef_tree
  
      ; v is kept in r12 during the token pack loop
-    lsr     r12, r12, r4                ; v >>= 32 - n
+    lsl     r12, r6, r4                ; r12 = v << 32 - n
  
  ; loop start
  token_loop
@@ -127,7 +120,7 @@ token_loop
      ; Decisions are made based on the bit value shifted
      ; off of v, so set a flag here based on this.
      ; This value is refered to as "bb"
-    lsrs    r12, r12, #1                ; bb = v >> n
+    lsls    r12, r12, #1                ; bb = v >> n
      mul     r4, r4, r7                  ; ((range-1) * pp[i>>1]))
  
      ; bb can only be 0 or 1.  So only execute this statement
@@ -221,16 +214,15 @@ token_count_lt_zero
      ldr     r10, [r12, #vp8_extra_bit_struct_tree]
      str     r10, [sp, #4]               ; b->tree
  
-    rbit    r12, r7                     ; reverse v
      rsb     r4, r8, #32
-    lsr     r12, r12, r4
+    lsl     r12, r7, r4
  
      mov     lr, #0                      ; i = 0
  
  extra_bits_loop
      ldrb    r4, [r9, lr, asr #1]        ; pp[i>>1]
      sub     r7, r5, #1                  ; range-1
-    lsrs    r12, r12, #1                ; v >> n
+    lsls    r12, r12, #1                ; v >> n
      mul     r4, r4, r7                  ; (range-1) * pp[i>>1]
      addcs   lr, lr, #1                  ; i + bb
  
diff --git a/vp8/encoder/arm/csystemdependent.c b/vp8/encoder/arm/csystemdependent.c

deleted file mode 100644 (file)

index 8d70d63..0000000
--- a/vp8/encoder/arm/csystemdependent.c
+++ /dev/null
@@ -1,164 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "vpx_ports/config.h"
-#include "variance.h"
-#include "onyx_int.h"
-
-void (*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
-extern void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
-extern void vpxyv12_copy_partial_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
-
-void vp8_cmachine_specific_config(VP8_COMP *cpi)
-{
-#if CONFIG_RUNTIME_CPU_DETECT
-    cpi->rtcd.common                         = &cpi->common.rtcd;
-
-#if HAVE_ARMV7
-    cpi->rtcd.variance.sad16x16              = vp8_sad16x16_neon;
-    cpi->rtcd.variance.sad16x8               = vp8_sad16x8_neon;
-    cpi->rtcd.variance.sad8x16               = vp8_sad8x16_neon;
-    cpi->rtcd.variance.sad8x8                = vp8_sad8x8_neon;
-    cpi->rtcd.variance.sad4x4                = vp8_sad4x4_neon;
-
-    cpi->rtcd.variance.var4x4                = vp8_variance4x4_c;
-    cpi->rtcd.variance.var8x8                = vp8_variance8x8_neon;
-    cpi->rtcd.variance.var8x16               = vp8_variance8x16_neon;
-    cpi->rtcd.variance.var16x8               = vp8_variance16x8_neon;
-    cpi->rtcd.variance.var16x16              = vp8_variance16x16_neon;
-
-    cpi->rtcd.variance.subpixvar4x4          = vp8_sub_pixel_variance4x4_c;
-    cpi->rtcd.variance.subpixvar8x8          = vp8_sub_pixel_variance8x8_neon;
-    cpi->rtcd.variance.subpixvar8x16         = vp8_sub_pixel_variance8x16_c;
-    cpi->rtcd.variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_c;
-    cpi->rtcd.variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_neon;
-
-    cpi->rtcd.variance.mse16x16              = vp8_mse16x16_neon;
-    cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_c;
-
-    cpi->rtcd.variance.get16x16prederror     = vp8_get16x16pred_error_neon;
-    cpi->rtcd.variance.get8x8var             = vp8_get8x8var_c;
-    cpi->rtcd.variance.get16x16var           = vp8_get16x16var_c;;
-    cpi->rtcd.variance.get4x4sse_cs          = vp8_get4x4sse_cs_neon;
-
-    cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_neon;
-    cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_neon;
-    cpi->rtcd.fdct.fast4x4                   = vp8_fast_fdct4x4_neon;
-    cpi->rtcd.fdct.fast8x4                   = vp8_fast_fdct8x4_neon;
-    cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_neon;
-
-    cpi->rtcd.encodemb.berr                  = vp8_block_error_c;
-    cpi->rtcd.encodemb.mberr                 = vp8_mbblock_error_c;
-    cpi->rtcd.encodemb.mbuverr               = vp8_mbuverror_c;
-    cpi->rtcd.encodemb.subb                  = vp8_subtract_b_neon;
-    cpi->rtcd.encodemb.submby                = vp8_subtract_mby_neon;
-    cpi->rtcd.encodemb.submbuv               = vp8_subtract_mbuv_neon;
-
-    cpi->rtcd.quantize.quantb                = vp8_regular_quantize_b;
-    cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_c;
-    /* The neon quantizer has not been updated to match the new exact
-     * quantizer introduced in commit e04e2935
-     */
-    /*cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_neon;*/
-#elif HAVE_ARMV6
-    cpi->rtcd.variance.sad16x16              = vp8_sad16x16_c;
-    cpi->rtcd.variance.sad16x8               = vp8_sad16x8_c;
-    cpi->rtcd.variance.sad8x16               = vp8_sad8x16_c;
-    cpi->rtcd.variance.sad8x8                = vp8_sad8x8_c;
-    cpi->rtcd.variance.sad4x4                = vp8_sad4x4_c;
-
-    cpi->rtcd.variance.var4x4                = vp8_variance4x4_c;
-    cpi->rtcd.variance.var8x8                = vp8_variance8x8_c;
-    cpi->rtcd.variance.var8x16               = vp8_variance8x16_c;
-    cpi->rtcd.variance.var16x8               = vp8_variance16x8_c;
-    cpi->rtcd.variance.var16x16              = vp8_variance16x16_c;
-
-    cpi->rtcd.variance.subpixvar4x4          = vp8_sub_pixel_variance4x4_c;
-    cpi->rtcd.variance.subpixvar8x8          = vp8_sub_pixel_variance8x8_c;
-    cpi->rtcd.variance.subpixvar8x16         = vp8_sub_pixel_variance8x16_c;
-    cpi->rtcd.variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_c;
-    cpi->rtcd.variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_c;
-
-    cpi->rtcd.variance.mse16x16              = vp8_mse16x16_c;
-    cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_c;
-
-    cpi->rtcd.variance.get16x16prederror     = vp8_get16x16pred_error_c;
-    cpi->rtcd.variance.get8x8var             = vp8_get8x8var_c;
-    cpi->rtcd.variance.get16x16var           = vp8_get16x16var_c;;
-    cpi->rtcd.variance.get4x4sse_cs          = vp8_get4x4sse_cs_c;
-
-    cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_c;
-    cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_c;
-    cpi->rtcd.fdct.fast4x4                   = vp8_fast_fdct4x4_c;
-    cpi->rtcd.fdct.fast8x4                   = vp8_fast_fdct8x4_c;
-    cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_armv6;
-
-    cpi->rtcd.encodemb.berr                  = vp8_block_error_c;
-    cpi->rtcd.encodemb.mberr                 = vp8_mbblock_error_c;
-    cpi->rtcd.encodemb.mbuverr               = vp8_mbuverror_c;
-    cpi->rtcd.encodemb.subb                  = vp8_subtract_b_c;
-    cpi->rtcd.encodemb.submby                = vp8_subtract_mby_c;
-    cpi->rtcd.encodemb.submbuv               = vp8_subtract_mbuv_c;
-
-    cpi->rtcd.quantize.quantb                = vp8_regular_quantize_b;
-    cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_c;
-#else
-    //pure c
-    cpi->rtcd.variance.sad16x16              = vp8_sad16x16_c;
-    cpi->rtcd.variance.sad16x8               = vp8_sad16x8_c;
-    cpi->rtcd.variance.sad8x16               = vp8_sad8x16_c;
-    cpi->rtcd.variance.sad8x8                = vp8_sad8x8_c;
-    cpi->rtcd.variance.sad4x4                = vp8_sad4x4_c;
-
-    cpi->rtcd.variance.var4x4                = vp8_variance4x4_c;
-    cpi->rtcd.variance.var8x8                = vp8_variance8x8_c;
-    cpi->rtcd.variance.var8x16               = vp8_variance8x16_c;
-    cpi->rtcd.variance.var16x8               = vp8_variance16x8_c;
-    cpi->rtcd.variance.var16x16              = vp8_variance16x16_c;
-
-    cpi->rtcd.variance.subpixvar4x4          = vp8_sub_pixel_variance4x4_c;
-    cpi->rtcd.variance.subpixvar8x8          = vp8_sub_pixel_variance8x8_c;
-    cpi->rtcd.variance.subpixvar8x16         = vp8_sub_pixel_variance8x16_c;
-    cpi->rtcd.variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_c;
-    cpi->rtcd.variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_c;
-
-    cpi->rtcd.variance.mse16x16              = vp8_mse16x16_c;
-    cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_c;
-
-    cpi->rtcd.variance.get16x16prederror     = vp8_get16x16pred_error_c;
-    cpi->rtcd.variance.get8x8var             = vp8_get8x8var_c;
-    cpi->rtcd.variance.get16x16var           = vp8_get16x16var_c;;
-    cpi->rtcd.variance.get4x4sse_cs          = vp8_get4x4sse_cs_c;
-
-    cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_c;
-    cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_c;
-    cpi->rtcd.fdct.fast4x4                   = vp8_fast_fdct4x4_c;
-    cpi->rtcd.fdct.fast8x4                   = vp8_fast_fdct8x4_c;
-    cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_c;
-
-    cpi->rtcd.encodemb.berr                  = vp8_block_error_c;
-    cpi->rtcd.encodemb.mberr                 = vp8_mbblock_error_c;
-    cpi->rtcd.encodemb.mbuverr               = vp8_mbuverror_c;
-    cpi->rtcd.encodemb.subb                  = vp8_subtract_b_c;
-    cpi->rtcd.encodemb.submby                = vp8_subtract_mby_c;
-    cpi->rtcd.encodemb.submbuv               = vp8_subtract_mbuv_c;
-
-    cpi->rtcd.quantize.quantb                = vp8_regular_quantize_b;
-    cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_c;
-#endif
-#endif
-
-#if HAVE_ARMV7
-    vp8_yv12_copy_partial_frame_ptr = vpxyv12_copy_partial_frame_neon;
-#else
-    vp8_yv12_copy_partial_frame_ptr = vp8_yv12_copy_partial_frame;
-#endif
-}
diff --git a/vp8/encoder/arm/dct_arm.h b/vp8/encoder/arm/dct_arm.h

index 774599bf030c893f434817c8c008906f64ed1321..41fa5d1928df9bba55dc30c5858f5236baabcd64 100644 (file)
--- a/vp8/encoder/arm/dct_arm.h
+++ b/vp8/encoder/arm/dct_arm.h
@@ -15,9 +15,11 @@
  #if HAVE_ARMV6
  extern prototype_fdct(vp8_short_walsh4x4_armv6);
  
+#if !CONFIG_RUNTIME_CPU_DETECT
  #undef  vp8_fdct_walsh_short4x4
  #define vp8_fdct_walsh_short4x4 vp8_short_walsh4x4_armv6
  #endif
+#endif
  
  #if HAVE_ARMV7
  extern prototype_fdct(vp8_short_fdct4x4_neon);
@@ -26,6 +28,7 @@ extern prototype_fdct(vp8_fast_fdct4x4_neon);
  extern prototype_fdct(vp8_fast_fdct8x4_neon);
  extern prototype_fdct(vp8_short_walsh4x4_neon);
  
+#if !CONFIG_RUNTIME_CPU_DETECT
  #undef  vp8_fdct_short4x4
  #define vp8_fdct_short4x4 vp8_short_fdct4x4_neon
  
@@ -40,6 +43,7 @@ extern prototype_fdct(vp8_short_walsh4x4_neon);
  
  #undef  vp8_fdct_walsh_short4x4
  #define vp8_fdct_walsh_short4x4 vp8_short_walsh4x4_neon
+#endif
  
  #endif
  
diff --git a/vp8/encoder/arm/encodemb_arm.h b/vp8/encoder/arm/encodemb_arm.h

index eb699433f4c79d3967dc75efaf7a5a201cac90a2..8fe453735594d97effe18b2c4d77d0dca8df3082 100644 (file)
--- a/vp8/encoder/arm/encodemb_arm.h
+++ b/vp8/encoder/arm/encodemb_arm.h
@@ -30,6 +30,7 @@ extern prototype_submbuv(vp8_subtract_mbuv_neon);
  //#undef  vp8_encodemb_mbuverr
  //#define vp8_encodemb_mbuverr vp8_mbuverror_c
  
+#if !CONFIG_RUNTIME_CPU_DETECT
  #undef  vp8_encodemb_subb
  #define vp8_encodemb_subb vp8_subtract_b_neon
  
@@ -38,6 +39,7 @@ extern prototype_submbuv(vp8_subtract_mbuv_neon);
  
  #undef  vp8_encodemb_submbuv
  #define vp8_encodemb_submbuv vp8_subtract_mbuv_neon
+#endif
  
  #endif
  
diff --git a/vp8/encoder/arm/mcomp_arm.c b/vp8/encoder/arm/mcomp_arm.c

index 4e95c47ac902bcdf5befff1921463055106fd29e..56358328eb774418bdec6d5440239db8bb8a303e 100644 (file)
--- a/vp8/encoder/arm/mcomp_arm.c
+++ b/vp8/encoder/arm/mcomp_arm.c
@@ -786,8 +786,6 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
      return bestmse;
  }
  
-#if 1
-
  #define MVC(r,c) (((mvsadcost[0][((r)<<2)-rr] + mvsadcost[1][((c)<<2) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c)
  #define PRE(r,c) (*(d->base_pre) + d->pre + (r) * d->pre_stride + (c)) // pointer to predictor base of a motionvector
  #define DIST(r,c,v) sf( src,src_stride,PRE(r,c),d->pre_stride, v) // returns sad error score.
@@ -937,120 +935,6 @@ cal_neighbors:
  #undef ERR
  #undef CHECK_BETTER
  
-#else
-
-#define MVC(r,c) (((mvsadcost[0][((r)<<2)-rr] + mvsadcost[1][((c)<<2) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c)
-#define PRE(r,c) (*(d->base_pre) + d->pre + (r) * d->pre_stride + (c)) // pointer to predictor base of a motionvector
-#define DIST(r,c,v) sf( src,src_stride,PRE(r,c),d->pre_stride, v) // returns sad error score.
-#define ERR(r,c,v) (MVC(r,c)+DIST(r,c,v)) // returns distortion + motion vector cost
-#define CHECK_BETTER(v,r,c) if ((v = ERR(r,c,besterr)) < besterr) { besterr = v; br=r; bc=c; } // checks if (r,c) has better score than previous best
-
-int vp8_hex_search
-(
-    MACROBLOCK *x,
-    BLOCK *b,
-    BLOCKD *d,
-    MV *ref_mv,
-    MV *best_mv,
-    int search_param,
-    int error_per_bit,
-    int *num00,
-    vp8_variance_fn_t vf,
-    vp8_sad_fn_t      sf,
-    int *mvsadcost[2],
-    int *mvcost[2]
-)
-{
-    MV hex[6] = { { -2, 0}, { -1, -2}, { -1, 2}, {2, 0}, {1, 2}, {1, -2} } ;
-    MV neighbors[8] = { { -1, -1}, { -1, 0}, { -1, 1}, {0, -1}, {0, 1}, {1, -1}, {1, 0}, {1, 1} } ;
-    int i, j;
-    unsigned char *src = (*(b->base_src) + b->src);
-    int src_stride = b->src_stride;
-    //int rr= ref_mv->row,rc= ref_mv->col,br=rr,bc=rc,tr,tc;
-    int rr = ref_mv->row, rc = ref_mv->col, br = rr >> 3, bc = rc >> 3, tr, tc;
-    unsigned int besterr, thiserr = 0x7fffffff;
-
-    /*
-        if ( rc < x->mv_col_min) bc = x->mv_col_min;
-        if ( rc > x->mv_col_max) bc = x->mv_col_max;
-        if ( rr < x->mv_row_min) br = x->mv_row_min;
-        if ( rr > x->mv_row_max) br = x->mv_row_max;
-        rr>>=1;
-        rc>>=1;
-        br>>=3;
-        bc>>=3;
-    */
-    if (bc < x->mv_col_min) bc = x->mv_col_min;
-
-    if (bc > x->mv_col_max) bc = x->mv_col_max;
-
-    if (br < x->mv_row_min) br = x->mv_row_min;
-
-    if (br > x->mv_row_max) br = x->mv_row_max;
-
-    rr >>= 1;
-    rc >>= 1;
-
-    besterr = ERR(br, bc, thiserr);
-
-    // hex search  jbb changed to 127 to avoid max 256 problem steping by 2.
-    for (j = 0; j < 127; j++)
-    {
-        tr = br;
-        tc = bc;
-
-        for (i = 0; i < 6; i++)
-        {
-            int nr = tr + hex[i].row, nc = tc + hex[i].col;
-
-            if (nc < x->mv_col_min) continue;
-
-            if (nc > x->mv_col_max) continue;
-
-            if (nr < x->mv_row_min) continue;
-
-            if (nr > x->mv_row_max) continue;
-
-            CHECK_BETTER(thiserr, nr, nc);
-        }
-
-        if (tr == br && tc == bc)
-            break;
-    }
-
-    // check 8 1 away neighbors
-    tr = br;
-    tc = bc;
-
-    for (i = 0; i < 8; i++)
-    {
-        int nr = tr + neighbors[i].row, nc = tc + neighbors[i].col;
-
-        if (nc < x->mv_col_min) continue;
-
-        if (nc > x->mv_col_max) continue;
-
-        if (nr < x->mv_row_min) continue;
-
-        if (nr > x->mv_row_max) continue;
-
-        CHECK_BETTER(thiserr, nr, nc);
-    }
-
-    best_mv->row = br;
-    best_mv->col = bc;
-
-    return vf(src, src_stride, PRE(br, bc), d->pre_stride, &thiserr) + MVC(br, bc) ;
-}
-#undef MVC
-#undef PRE
-#undef SP
-#undef DIST
-#undef ERR
-#undef CHECK_BETTER
-
-#endif
-
  int vp8_diamond_search_sad
  (
      MACROBLOCK *x,
@@ -1166,173 +1050,6 @@ int vp8_diamond_search_sad
      + vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
  }
  
-int vp8_diamond_search_sadx4
-(
-    MACROBLOCK *x,
-    BLOCK *b,
-    BLOCKD *d,
-    MV *ref_mv,
-    MV *best_mv,
-    int search_param,
-    int error_per_bit,
-    int *num00,
-    vp8_variance_fn_ptr_t *fn_ptr,
-    int *mvsadcost[2],
-    int *mvcost[2]
-)
-{
-    int i, j, step;
-
-    unsigned char *what = (*(b->base_src) + b->src);
-    int what_stride = b->src_stride;
-    unsigned char *in_what;
-    int in_what_stride = d->pre_stride;
-    unsigned char *best_address;
-
-    int tot_steps;
-    MV this_mv;
-
-    int bestsad = INT_MAX;
-    int best_site = 0;
-    int last_site = 0;
-
-    int ref_row = ref_mv->row >> 3;
-    int ref_col = ref_mv->col >> 3;
-    int this_row_offset;
-    int this_col_offset;
-    search_site *ss;
-
-    unsigned char *check_here;
-    int thissad;
-
-    // Work out the start point for the search
-    in_what = (unsigned char *)(*(d->base_pre) + d->pre + (ref_row * (d->pre_stride)) + ref_col);
-    best_address = in_what;
-
-    // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits
-    if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) &&
-    (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
-    {
-        // Check the starting position
-        bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit);
-    }
-
-    // search_param determines the length of the initial step and hence the number of iterations
-    // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
-    ss = &x->ss[search_param * x->searches_per_step];
-    tot_steps = (x->ss_count / x->searches_per_step) - search_param;
-
-    i = 1;
-    best_mv->row = ref_row;
-    best_mv->col = ref_col;
-
-    *num00 = 0;
-
-    for (step = 0; step < tot_steps ; step++)
-    {
-        int check_row_min, check_col_min, check_row_max, check_col_max;
-
-        check_row_min = x->mv_row_min - best_mv->row;
-        check_row_max = x->mv_row_max - best_mv->row;
-        check_col_min = x->mv_col_min - best_mv->col;
-        check_col_max = x->mv_col_max - best_mv->col;
-
-        for (j = 0 ; j < x->searches_per_step ; j += 4)
-        {
-            char *block_offset[4];
-            unsigned int valid_block[4];
-            int all_in = 1, t;
-
-            for (t = 0; t < 4; t++)
-            {
-                valid_block [t]  = (ss[t+i].mv.col > check_col_min);
-                valid_block [t] &= (ss[t+i].mv.col < check_col_max);
-                valid_block [t] &= (ss[t+i].mv.row > check_row_min);
-                valid_block [t] &= (ss[t+i].mv.row < check_row_max);
-
-                all_in &= valid_block[t];
-                block_offset[t] = ss[i+t].offset + best_address;
-            }
-
-            if (all_in)
-            {
-                int sad_array[4];
-
-                fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array);
-
-                for (t = 0; t < 4; t++, i++)
-                {
-                    thissad = sad_array[t];
-
-                    if (thissad < bestsad)
-                    {
-                        this_mv.row = (best_mv->row + ss[i].mv.row) << 3;
-                        this_mv.col = (best_mv->col + ss[i].mv.col) << 3;
-                        thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
-
-                        if (thissad < bestsad)
-                        {
-                            bestsad = thissad;
-                            best_site = i;
-                        }
-                    }
-                }
-            }
-            else
-            {
-                int t;
-
-                for (t = 0; t < 4; i++, t++)
-                {
-                    // Trap illegal vectors
-                    if (valid_block[t])
-
-                    {
-                        check_here = block_offset[t];
-                        thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
-
-                        if (thissad < bestsad)
-                        {
-                            this_row_offset = best_mv->row + ss[i].mv.row;
-                            this_col_offset = best_mv->col + ss[i].mv.col;
-
-                            this_mv.row = this_row_offset << 3;
-                            this_mv.col = this_col_offset << 3;
-                            thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
-
-                            if (thissad < bestsad)
-                            {
-                                bestsad = thissad;
-                                best_site = i;
-                            }
-                        }
-                    }
-                }
-            }
-        }
-
-        if (best_site != last_site)
-        {
-            best_mv->row += ss[best_site].mv.row;
-            best_mv->col += ss[best_site].mv.col;
-            best_address += ss[best_site].offset;
-            last_site = best_site;
-        }
-        else if (best_address == in_what)
-            (*num00)++;
-    }
-
-    this_mv.row = best_mv->row << 3;
-    this_mv.col = best_mv->col << 3;
-
-    if (bestsad == INT_MAX)
-        return INT_MAX;
-
-    return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad))
-    + vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-}
-
-
  #if !(CONFIG_REALTIME_ONLY)
  int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2])
  {
@@ -1438,7 +1155,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
      int r, c;
  
      unsigned char *check_here;
-    int thissad;
+    unsigned int thissad;
  
      int ref_row = ref_mv->row >> 3;
      int ref_col = ref_mv->col >> 3;
@@ -1448,7 +1165,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
      int col_min = ref_col - distance;
      int col_max = ref_col + distance;
  
-    int sad_array[3];
+    unsigned int sad_array[3];
  
      // Work out the mid point for the search
      in_what = *(d->base_pre) + d->pre;
diff --git a/vp8/encoder/arm/quantize_arm.c b/vp8/encoder/arm/quantize_arm.c

index 50f58bf0882c02cac2f102e81288049126f9a65e..65c616614000eb29d0970c21d9a4376ffc8c5b2c 100644 (file)
--- a/vp8/encoder/arm/quantize_arm.c
+++ b/vp8/encoder/arm/quantize_arm.c
@@ -29,7 +29,7 @@ extern int vp8_fast_quantize_b_neon_func(short *coeff_ptr, short *zbin_ptr, shor
  
  void vp8_fast_quantize_b_neon(BLOCK *b, BLOCKD *d)
  {
-    d->eob = vp8_fast_quantize_b_neon_func(b->coeff, &b->zbin[0][0], d->qcoeff, d->dqcoeff, d->dequant[0], vp8_rvsplus1_default_zig_zag1d, &b->round[0][0], &b->quant[0][0]);
+    d->eob = vp8_fast_quantize_b_neon_func(b->coeff, b->zbin, d->qcoeff, d->dqcoeff, d->dequant, vp8_rvsplus1_default_zig_zag1d, b->round, b->quant);
  }
  
  /*
diff --git a/vp8/encoder/arm/variance_arm.h b/vp8/encoder/arm/variance_arm.h

index 859e43f51524214bf88b4555c8860de8199c492d..fb9dd5a5b0402a94778b7d0a1084c6bec9119e01 100644 (file)
--- a/vp8/encoder/arm/variance_arm.h
+++ b/vp8/encoder/arm/variance_arm.h
@@ -38,6 +38,7 @@ extern prototype_sad(vp8_get16x16pred_error_neon);
  //extern prototype_variance2(vp8_get16x16var_c);
  extern prototype_sad(vp8_get4x4sse_cs_neon);
  
+#if !CONFIG_RUNTIME_CPU_DETECT
  #undef  vp8_variance_sad4x4
  #define vp8_variance_sad4x4 vp8_sad4x4_neon
  
@@ -100,6 +101,7 @@ extern prototype_sad(vp8_get4x4sse_cs_neon);
  
  #undef  vp8_variance_get4x4sse_cs
  #define vp8_variance_get4x4sse_cs vp8_get4x4sse_cs_neon
+#endif
  
  #endif
  
diff --git a/vp8/encoder/bitstream.h b/vp8/encoder/bitstream.h

index 559631338d7d33639c6b8cbffb58a83557a08f0f..f5d148ea477482d2833324ea191a0c14526b58bc 100644 (file)
--- a/vp8/encoder/bitstream.h
+++ b/vp8/encoder/bitstream.h
@@ -12,25 +12,25 @@
  #ifndef __INC_BITSTREAM_H
  #define __INC_BITSTREAM_H
  
-#if HAVE_ARMV7
-void vp8cx_pack_tokens_armv7(vp8_writer *w, const TOKENEXTRA *p, int xcount,
+#if HAVE_ARMV5TE
+void vp8cx_pack_tokens_armv5(vp8_writer *w, const TOKENEXTRA *p, int xcount,
                               vp8_token *,
                               vp8_extra_bit_struct *,
                               const vp8_tree_index *);
-void vp8cx_pack_tokens_into_partitions_armv7(VP8_COMP *, unsigned char *, int , int *,
+void vp8cx_pack_tokens_into_partitions_armv5(VP8_COMP *, unsigned char *, int , int *,
          vp8_token *,
          vp8_extra_bit_struct *,
          const vp8_tree_index *);
-void vp8cx_pack_mb_row_tokens_armv7(VP8_COMP *cpi, vp8_writer *w,
+void vp8cx_pack_mb_row_tokens_armv5(VP8_COMP *cpi, vp8_writer *w,
                                      vp8_token *,
                                      vp8_extra_bit_struct *,
                                      const vp8_tree_index *);
  # define pack_tokens(a,b,c)                  \
-    vp8cx_pack_tokens_armv7(a,b,c,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
+    vp8cx_pack_tokens_armv5(a,b,c,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
  # define pack_tokens_into_partitions(a,b,c,d)  \
-    vp8cx_pack_tokens_into_partitions_armv7(a,b,c,d,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
+    vp8cx_pack_tokens_into_partitions_armv5(a,b,c,d,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
  # define pack_mb_row_tokens(a,b)               \
-    vp8cx_pack_mb_row_tokens_armv7(a,b,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
+    vp8cx_pack_mb_row_tokens_armv5(a,b,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
  #else
  # define pack_tokens(a,b,c)                  pack_tokens_c(a,b,c)
  # define pack_tokens_into_partitions(a,b,c,d)  pack_tokens_into_partitions_c(a,b,c,d)
diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h

index ffb88904e7e8d2e8b7d5572a5ac2b4248460c8f9..e94e549761cdad7e9d3b9d38f01913be4e44004c 100644 (file)
--- a/vp8/encoder/block.h
+++ b/vp8/encoder/block.h
@@ -32,11 +32,11 @@ typedef struct
      short *coeff;
  
      // 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries
-    short(*quant)[4];
-    short(*quant_shift)[4];
-    short(*zbin)[4];
-    short(*zrun_zbin_boost);
-    short(*round)[4];
+    short *quant;
+    short *quant_shift;
+    short *zbin;
+    short *zrun_zbin_boost;
+    short *round;
  
      // Zbin Over Quant value
      short zbin_extra;
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c

index 91384c73f7973840d3bf26f2a44733dbb26e222a..85e121be35b37a3556d29bde3a019f124effbbbe 100644 (file)
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -160,7 +160,6 @@ static void vp8cx_invert_quant(short *quant, short *shift, short d)
  
  void vp8cx_init_quantizer(VP8_COMP *cpi)
  {
-    int r, c;
      int i;
      int quant_val;
      int Q;
@@ -171,58 +170,56 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
      {
          // dc values
          quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
-        vp8cx_invert_quant(cpi->Y1quant[Q][0] + 0,
-                           cpi->Y1quant_shift[Q][0] + 0, quant_val);
-        cpi->Y1zbin[Q][0][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
-        cpi->Y1round[Q][0][0] = (qrounding_factors[Q] * quant_val) >> 7;
-        cpi->common.Y1dequant[Q][0][0] = quant_val;
+        vp8cx_invert_quant(cpi->Y1quant[Q] + 0,
+                           cpi->Y1quant_shift[Q] + 0, quant_val);
+        cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
+        cpi->Y1round[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
+        cpi->common.Y1dequant[Q][0] = quant_val;
          cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
  
          quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
-        vp8cx_invert_quant(cpi->Y2quant[Q][0] + 0,
-                           cpi->Y2quant_shift[Q][0] + 0, quant_val);
-        cpi->Y2zbin[Q][0][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
-        cpi->Y2round[Q][0][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;
-        cpi->common.Y2dequant[Q][0][0] = quant_val;
+        vp8cx_invert_quant(cpi->Y2quant[Q] + 0,
+                           cpi->Y2quant_shift[Q] + 0, quant_val);
+        cpi->Y2zbin[Q][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
+        cpi->Y2round[Q][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;
+        cpi->common.Y2dequant[Q][0] = quant_val;
          cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
  
          quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
-        vp8cx_invert_quant(cpi->UVquant[Q][0] + 0,
-                           cpi->UVquant_shift[Q][0] + 0, quant_val);
-        cpi->UVzbin[Q][0][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
-        cpi->UVround[Q][0][0] = (qrounding_factors[Q] * quant_val) >> 7;
-        cpi->common.UVdequant[Q][0][0] = quant_val;
+        vp8cx_invert_quant(cpi->UVquant[Q] + 0,
+                           cpi->UVquant_shift[Q] + 0, quant_val);
+        cpi->UVzbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
+        cpi->UVround[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
+        cpi->common.UVdequant[Q][0] = quant_val;
          cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
  
          // all the ac values = ;
          for (i = 1; i < 16; i++)
          {
              int rc = vp8_default_zig_zag1d[i];
-            r = (rc >> 2);
-            c = (rc & 3);
  
              quant_val = vp8_ac_yquant(Q);
-            vp8cx_invert_quant(cpi->Y1quant[Q][r] + c,
-                               cpi->Y1quant_shift[Q][r] + c, quant_val);
-            cpi->Y1zbin[Q][r][c] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
-            cpi->Y1round[Q][r][c] = (qrounding_factors[Q] * quant_val) >> 7;
-            cpi->common.Y1dequant[Q][r][c] = quant_val;
+            vp8cx_invert_quant(cpi->Y1quant[Q] + rc,
+                               cpi->Y1quant_shift[Q] + rc, quant_val);
+            cpi->Y1zbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
+            cpi->Y1round[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
+            cpi->common.Y1dequant[Q][rc] = quant_val;
              cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;
  
              quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
-            vp8cx_invert_quant(cpi->Y2quant[Q][r] + c,
-                               cpi->Y2quant_shift[Q][r] + c, quant_val);
-            cpi->Y2zbin[Q][r][c] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
-            cpi->Y2round[Q][r][c] = (qrounding_factors_y2[Q] * quant_val) >> 7;
-            cpi->common.Y2dequant[Q][r][c] = quant_val;
+            vp8cx_invert_quant(cpi->Y2quant[Q] + rc,
+                               cpi->Y2quant_shift[Q] + rc, quant_val);
+            cpi->Y2zbin[Q][rc] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
+            cpi->Y2round[Q][rc] = (qrounding_factors_y2[Q] * quant_val) >> 7;
+            cpi->common.Y2dequant[Q][rc] = quant_val;
              cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
  
              quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
-            vp8cx_invert_quant(cpi->UVquant[Q][r] + c,
-                               cpi->UVquant_shift[Q][r] + c, quant_val);
-            cpi->UVzbin[Q][r][c] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
-            cpi->UVround[Q][r][c] = (qrounding_factors[Q] * quant_val) >> 7;
-            cpi->common.UVdequant[Q][r][c] = quant_val;
+            vp8cx_invert_quant(cpi->UVquant[Q] + rc,
+                               cpi->UVquant_shift[Q] + rc, quant_val);
+            cpi->UVzbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
+            cpi->UVround[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
+            cpi->common.UVdequant[Q][rc] = quant_val;
              cpi->zrun_zbin_boost_uv[Q][i] = (quant_val * zbin_boost[i]) >> 7;
          }
      }
@@ -230,7 +227,6 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
  #else
  void vp8cx_init_quantizer(VP8_COMP *cpi)
  {
-    int r, c;
      int i;
      int quant_val;
      int Q;
@@ -241,52 +237,50 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
      {
          // dc values
          quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
-        cpi->Y1quant[Q][0][0] = (1 << 16) / quant_val;
-        cpi->Y1zbin[Q][0][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
-        cpi->Y1round[Q][0][0] = (qrounding_factors[Q] * quant_val) >> 7;
-        cpi->common.Y1dequant[Q][0][0] = quant_val;
+        cpi->Y1quant[Q][0] = (1 << 16) / quant_val;
+        cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
+        cpi->Y1round[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
+        cpi->common.Y1dequant[Q][0] = quant_val;
          cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
  
          quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
-        cpi->Y2quant[Q][0][0] = (1 << 16) / quant_val;
-        cpi->Y2zbin[Q][0][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
-        cpi->Y2round[Q][0][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;
-        cpi->common.Y2dequant[Q][0][0] = quant_val;
+        cpi->Y2quant[Q][0] = (1 << 16) / quant_val;
+        cpi->Y2zbin[Q][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
+        cpi->Y2round[Q][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;
+        cpi->common.Y2dequant[Q][0] = quant_val;
          cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
  
          quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
-        cpi->UVquant[Q][0][0] = (1 << 16) / quant_val;
-        cpi->UVzbin[Q][0][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
-        cpi->UVround[Q][0][0] = (qrounding_factors[Q] * quant_val) >> 7;
-        cpi->common.UVdequant[Q][0][0] = quant_val;
+        cpi->UVquant[Q][0] = (1 << 16) / quant_val;
+        cpi->UVzbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
+        cpi->UVround[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
+        cpi->common.UVdequant[Q][0] = quant_val;
          cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
  
          // all the ac values = ;
          for (i = 1; i < 16; i++)
          {
              int rc = vp8_default_zig_zag1d[i];
-            r = (rc >> 2);
-            c = (rc & 3);
  
              quant_val = vp8_ac_yquant(Q);
-            cpi->Y1quant[Q][r][c] = (1 << 16) / quant_val;
-            cpi->Y1zbin[Q][r][c] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
-            cpi->Y1round[Q][r][c] = (qrounding_factors[Q] * quant_val) >> 7;
-            cpi->common.Y1dequant[Q][r][c] = quant_val;
+            cpi->Y1quant[Q][rc] = (1 << 16) / quant_val;
+            cpi->Y1zbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
+            cpi->Y1round[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
+            cpi->common.Y1dequant[Q][rc] = quant_val;
              cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;
  
              quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
-            cpi->Y2quant[Q][r][c] = (1 << 16) / quant_val;
-            cpi->Y2zbin[Q][r][c] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
-            cpi->Y2round[Q][r][c] = (qrounding_factors_y2[Q] * quant_val) >> 7;
-            cpi->common.Y2dequant[Q][r][c] = quant_val;
+            cpi->Y2quant[Q][rc] = (1 << 16) / quant_val;
+            cpi->Y2zbin[Q][rc] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
+            cpi->Y2round[Q][rc] = (qrounding_factors_y2[Q] * quant_val) >> 7;
+            cpi->common.Y2dequant[Q][rc] = quant_val;
              cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
  
              quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
-            cpi->UVquant[Q][r][c] = (1 << 16) / quant_val;
-            cpi->UVzbin[Q][r][c] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
-            cpi->UVround[Q][r][c] = (qrounding_factors[Q] * quant_val) >> 7;
-            cpi->common.UVdequant[Q][r][c] = quant_val;
+            cpi->UVquant[Q][rc] = (1 << 16) / quant_val;
+            cpi->UVzbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
+            cpi->UVround[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
+            cpi->common.UVdequant[Q][rc] = quant_val;
              cpi->zrun_zbin_boost_uv[Q][i] = (quant_val * zbin_boost[i]) >> 7;
          }
      }
@@ -317,7 +311,7 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
          QIndex = cpi->common.base_qindex;
  
      // Y
-    zbin_extra = (cpi->common.Y1dequant[QIndex][0][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
+    zbin_extra = (cpi->common.Y1dequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
  
      for (i = 0; i < 16; i++)
      {
@@ -331,7 +325,7 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
      }
  
      // UV
-    zbin_extra = (cpi->common.UVdequant[QIndex][0][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
+    zbin_extra = (cpi->common.UVdequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
  
      for (i = 16; i < 24; i++)
      {
@@ -345,7 +339,7 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
      }
  
      // Y2
-    zbin_extra = (cpi->common.Y2dequant[QIndex][0][1] * ((cpi->zbin_over_quant / 2) + cpi->zbin_mode_boost)) >> 7;
+    zbin_extra = (cpi->common.Y2dequant[QIndex][1] * ((cpi->zbin_over_quant / 2) + cpi->zbin_mode_boost)) >> 7;
      x->block[24].quant = cpi->Y2quant[QIndex];
      x->block[24].quant_shift = cpi->Y2quant_shift[QIndex];
      x->block[24].zbin = cpi->Y2zbin[QIndex];
diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c

index 6611e0077ba591182175b7e841603dc170fffcb0..a790456f89a1ecdef13a23325e651ed8528115c2 100644 (file)
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -105,7 +105,7 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
  
  #if !(CONFIG_REALTIME_ONLY)
  #if 1
-    if (x->optimize && x->rddiv > 1)
+    if (x->optimize==2 ||(x->optimize && x->rddiv > 1))
          vp8_optimize_mby(x, rtcd);
  
  #endif
@@ -200,7 +200,7 @@ void vp8_encode_intra16x16mbuv(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
  #if !(CONFIG_REALTIME_ONLY)
  #if 1
  
-    if (x->optimize && x->rddiv > 1)
+    if (x->optimize==2 ||(x->optimize && x->rddiv > 1))
          vp8_optimize_mbuv(x, rtcd);
  
  #endif
diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c

index 1f9568902b56814277ea193c8814f2eee1163d6d..99ee2b8067fc26cd3de1a0396c2087e11f85dda0 100644 (file)
--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c
@@ -301,8 +301,8 @@ void vp8_optimize_b(MACROBLOCK *mb, int ib, int type,
      vp8_strict_quantize_b(b, d);
  #endif
  
-    dequant_ptr = &d->dequant[0][0];
-    coeff_ptr = &b->coeff[0];
+    dequant_ptr = d->dequant;
+    coeff_ptr = b->coeff;
      qcoeff_ptr = d->qcoeff;
      dqcoeff_ptr = d->dqcoeff;
      i0 = !type;
@@ -633,7 +633,7 @@ void vp8_encode_inter16x16(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
      vp8_quantize_mb(x);
  
  #if !(CONFIG_REALTIME_ONLY)
-    if (x->optimize && x->rddiv > 1)
+    if (x->optimize==2 ||(x->optimize && x->rddiv > 1))
          vp8_optimize_mb(x, rtcd);
  #endif
  
diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c

index 13633e9a4b3d84bddc3faa20aa6897de3465d1c3..607c3d23645b81e88aa2752e6e40172f68ae9512 100644 (file)
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -30,7 +30,6 @@
  #include "encodemv.h"
  
  //#define OUTPUT_FPF 1
-#define FIRSTPASS_MM 1
  
  #if CONFIG_RUNTIME_CPU_DETECT
  #define IF_RTCD(x) (x)
@@ -108,15 +107,6 @@ static void reset_fpf_position(VP8_COMP *cpi, FIRSTPASS_STATS *Position)
  
  static int lookup_next_frame_stats(VP8_COMP *cpi, FIRSTPASS_STATS *next_frame)
  {
-    /*FIRSTPASS_STATS * start_pos;
-    int ret_val;
-
-    start_pos = cpi->stats_in;
-    ret_val = vp8_input_stats(cpi, next_frame);
-    reset_fpf_position(cpi, start_pos);
-
-    return ret_val;*/
-
      if (cpi->stats_in >= cpi->stats_in_end)
          return EOF;
  
@@ -127,7 +117,7 @@ static int lookup_next_frame_stats(VP8_COMP *cpi, FIRSTPASS_STATS *next_frame)
  // Calculate a modified Error used in distributing bits between easier and harder frames
  static double calculate_modified_err(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
  {
-    double av_err = cpi->total_stats.ssim_weighted_pred_err;
+    double av_err = cpi->total_stats->ssim_weighted_pred_err;
      double this_err = this_frame->ssim_weighted_pred_err;
      double modified_err;
  
@@ -238,7 +228,7 @@ int frame_max_bits(VP8_COMP *cpi)
      else
      {
          // For VBR base this on the bits and frames left plus the two_pass_vbrmax_section rate passed in by the user
-        max_bits = (int)(((double)cpi->bits_left / (cpi->total_stats.count - (double)cpi->common.current_video_frame)) * ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0));
+        max_bits = (int)(((double)cpi->bits_left / (cpi->total_stats->count - (double)cpi->common.current_video_frame)) * ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0));
      }
  
      // Trap case where we are out of bits
@@ -248,13 +238,31 @@ int frame_max_bits(VP8_COMP *cpi)
      return max_bits;
  }
  
-void vp8_output_stats(struct vpx_codec_pkt_list *pktlist,
+
+extern size_t vp8_firstpass_stats_sz(unsigned int mb_count)
+{
+    /* Calculate the size of a stats packet, which is dependent on the frame
+     * resolution. The FIRSTPASS_STATS struct has a single element array,
+     * motion_map, which is virtually expanded to have one element per
+     * macroblock.
+     */
+    size_t stats_sz;
+    FIRSTPASS_STATS stats;
+
+    stats_sz = sizeof(FIRSTPASS_STATS) + mb_count;
+    stats_sz = (stats_sz + 7) & ~7;
+    return stats_sz;
+}
+
+
+void vp8_output_stats(const VP8_COMP            *cpi,
+                      struct vpx_codec_pkt_list *pktlist,
                        FIRSTPASS_STATS            *stats)
  {
      struct vpx_codec_cx_pkt pkt;
      pkt.kind = VPX_CODEC_STATS_PKT;
      pkt.data.twopass_stats.buf = stats;
-    pkt.data.twopass_stats.sz = sizeof(*stats);
+    pkt.data.twopass_stats.sz = vp8_firstpass_stats_sz(cpi->common.MBs);
      vpx_codec_pkt_list_add(pktlist, &pkt);
  
  // TEMP debug code
@@ -280,16 +288,24 @@ void vp8_output_stats(struct vpx_codec_pkt_list *pktlist,
                  stats->mv_in_out_count,
                  stats->count);
          fclose(fpfile);
+
+
+        fpfile = fopen("fpmotionmap.stt", "a");
+        fwrite(cpi->fp_motion_map, 1, cpi->common.MBs, fpfile);
+        fclose(fpfile);
      }
  #endif
  }
  
  int vp8_input_stats(VP8_COMP *cpi, FIRSTPASS_STATS *fps)
  {
+    size_t stats_sz = vp8_firstpass_stats_sz(cpi->common.MBs);
+
      if (cpi->stats_in >= cpi->stats_in_end)
          return EOF;
  
-    *fps = *cpi->stats_in++;
+    *fps = *cpi->stats_in;
+    cpi->stats_in = (void*)((char *)cpi->stats_in + stats_sz);
      return 1;
  }
  
@@ -352,59 +368,47 @@ void vp8_avg_stats(FIRSTPASS_STATS *section)
      section->duration   /= section->count;
  }
  
-int vp8_fpmm_get_pos(VP8_COMP *cpi)
+unsigned char *vp8_fpmm_get_pos(VP8_COMP *cpi)
  {
-    return ftell(cpi->fp_motion_mapfile);
+    return cpi->fp_motion_map_stats;
  }
-void vp8_fpmm_reset_pos(VP8_COMP *cpi, int target_pos)
+void vp8_fpmm_reset_pos(VP8_COMP *cpi, unsigned char *target_pos)
  {
      int Offset;
  
-    if (cpi->fp_motion_mapfile)
-    {
-        Offset = ftell(cpi->fp_motion_mapfile) - target_pos;
-        fseek(cpi->fp_motion_mapfile, (int) - Offset, SEEK_CUR);
-    }
+    cpi->fp_motion_map_stats = target_pos;
  }
  
  void vp8_advance_fpmm(VP8_COMP *cpi, int count)
  {
-#if FIRSTPASS_MM
-    fseek(cpi->fp_motion_mapfile, (int)(count * cpi->common.MBs), SEEK_CUR);
-#endif
+    cpi->fp_motion_map_stats = (void*)((char*)cpi->fp_motion_map_stats +
+        count * vp8_firstpass_stats_sz(cpi->common.MBs));
  }
  
  void vp8_input_fpmm(VP8_COMP *cpi)
  {
-#if FIRSTPASS_MM
+    unsigned char *fpmm = cpi->fp_motion_map;
      int MBs = cpi->common.MBs;
      int max_frames = cpi->active_arnr_frames;
+    int i;
  
-    if (!cpi->fp_motion_mapfile)
-        return;                 // Error
-
-    // Read the specified number of frame motion maps
-    if (fread(cpi->fp_motion_map, 1,
-              max_frames * MBs,
-              cpi->fp_motion_mapfile) != max_frames*MBs)
+    for (i=0; i<max_frames; i++)
      {
-        // Read error
-        return;
+        char *motion_map = (char*)cpi->fp_motion_map_stats
+                           + sizeof(FIRSTPASS_STATS);
+
+        memcpy(fpmm, motion_map, MBs);
+        fpmm += MBs;
+        vp8_advance_fpmm(cpi, 1);
      }
  
      // Flag the use of weights in the temporal filter
      cpi->use_weighted_temporal_filter = 1;
-
-#endif
  }
  
  void vp8_init_first_pass(VP8_COMP *cpi)
  {
-    vp8_zero_stats(&cpi->total_stats);
-
-#ifdef FIRSTPASS_MM
-    cpi->fp_motion_mapfile = fopen("fpmotionmap.stt", "wb");
-#endif
+    vp8_zero_stats(cpi->total_stats);
  
  // TEMP debug code
  #ifdef OUTPUT_FPF
@@ -412,6 +416,8 @@ void vp8_init_first_pass(VP8_COMP *cpi)
          FILE *fpfile;
          fpfile = fopen("firstpass.stt", "w");
          fclose(fpfile);
+        fpfile = fopen("fpmotionmap.stt", "wb");
+        fclose(fpfile);
      }
  #endif
  
@@ -419,16 +425,10 @@ void vp8_init_first_pass(VP8_COMP *cpi)
  
  void vp8_end_first_pass(VP8_COMP *cpi)
  {
-    vp8_output_stats(cpi->output_pkt_list, &cpi->total_stats);
-
-#if FIRSTPASS_MM
-
-    if (cpi->fp_motion_mapfile)
-        fclose(cpi->fp_motion_mapfile);
+    vp8_output_stats(cpi, cpi->output_pkt_list, cpi->total_stats);
+}
  
-#endif
  
-}
  void vp8_zz_motion_search( VP8_COMP *cpi, MACROBLOCK * x, YV12_BUFFER_CONFIG * recon_buffer, int * best_motion_err, int recon_yoffset )
  {
      MACROBLOCKD * const xd = & x->e_mbd;
@@ -839,19 +839,20 @@ void vp8_first_pass(VP8_COMP *cpi)
          fps.duration = cpi->source_end_time_stamp - cpi->source_time_stamp;
  
          // don't want to do outputstats with a stack variable!
-        cpi->this_frame_stats = fps;
-        vp8_output_stats(cpi->output_pkt_list, &cpi->this_frame_stats);
-        vp8_accumulate_stats(&cpi->total_stats, &fps);
-
-#if FIRSTPASS_MM
-        fwrite(cpi->fp_motion_map, 1, cpi->common.MBs, cpi->fp_motion_mapfile);
-#endif
+        memcpy(cpi->this_frame_stats,
+               &fps,
+               sizeof(FIRSTPASS_STATS));
+        memcpy((char*)cpi->this_frame_stats + sizeof(FIRSTPASS_STATS),
+               cpi->fp_motion_map,
+               sizeof(cpi->fp_motion_map[0]) * cpi->common.MBs);
+        vp8_output_stats(cpi, cpi->output_pkt_list, cpi->this_frame_stats);
+        vp8_accumulate_stats(cpi->total_stats, &fps);
      }
  
      // Copy the previous Last Frame into the GF buffer if specific conditions for doing so are met
      if ((cm->current_video_frame > 0) &&
-        (cpi->this_frame_stats.pcnt_inter > 0.20) &&
-        ((cpi->this_frame_stats.intra_error / cpi->this_frame_stats.coded_error) > 2.0))
+        (cpi->this_frame_stats->pcnt_inter > 0.20) &&
+        ((cpi->this_frame_stats->intra_error / cpi->this_frame_stats->coded_error) > 2.0))
      {
          vp8_yv12_copy_frame_ptr(lst_yv12, gld_yv12);
      }
@@ -1120,33 +1121,33 @@ void vp8_init_second_pass(VP8_COMP *cpi)
  
      double two_pass_min_rate = (double)(cpi->oxcf.target_bandwidth * cpi->oxcf.two_pass_vbrmin_section / 100);
  
-    vp8_zero_stats(&cpi->total_stats);
+    vp8_zero_stats(cpi->total_stats);
  
      if (!cpi->stats_in_end)
          return;
  
-    cpi->total_stats = *cpi->stats_in_end;
+    *cpi->total_stats = *cpi->stats_in_end;
  
-    cpi->total_error_left = cpi->total_stats.ssim_weighted_pred_err;
-    cpi->total_intra_error_left = cpi->total_stats.intra_error;
-    cpi->total_coded_error_left = cpi->total_stats.coded_error;
+    cpi->total_error_left = cpi->total_stats->ssim_weighted_pred_err;
+    cpi->total_intra_error_left = cpi->total_stats->intra_error;
+    cpi->total_coded_error_left = cpi->total_stats->coded_error;
      cpi->start_tot_err_left = cpi->total_error_left;
  
-    //cpi->bits_left = (long long)(cpi->total_stats.count * cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.frame_rate));
-    //cpi->bits_left -= (long long)(cpi->total_stats.count * two_pass_min_rate / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.frame_rate));
+    //cpi->bits_left = (long long)(cpi->total_stats->count * cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.frame_rate));
+    //cpi->bits_left -= (long long)(cpi->total_stats->count * two_pass_min_rate / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.frame_rate));
  
      // each frame can have a different duration, as the frame rate in the source
      // isn't guaranteed to be constant.   The frame rate prior to the first frame
      // encoded in the second pass is a guess.  However the sum duration is not.
      // Its calculated based on the actual durations of all frames from the first
      // pass.
-    vp8_new_frame_rate(cpi, 10000000.0 * cpi->total_stats.count / cpi->total_stats.duration);
+    vp8_new_frame_rate(cpi, 10000000.0 * cpi->total_stats->count / cpi->total_stats->duration);
  
      cpi->output_frame_rate = cpi->oxcf.frame_rate;
-    cpi->bits_left = (long long)(cpi->total_stats.duration * cpi->oxcf.target_bandwidth / 10000000.0) ;
-    cpi->bits_left -= (long long)(cpi->total_stats.duration * two_pass_min_rate / 10000000.0);
+    cpi->bits_left = (long long)(cpi->total_stats->duration * cpi->oxcf.target_bandwidth / 10000000.0) ;
+    cpi->bits_left -= (long long)(cpi->total_stats->duration * two_pass_min_rate / 10000000.0);
  
-    vp8_avg_stats(&cpi->total_stats);
+    vp8_avg_stats(cpi->total_stats);
  
      // Scan the first pass file and calculate an average Intra / Inter error score ratio for the sequence
      {
@@ -1162,7 +1163,7 @@ void vp8_init_second_pass(VP8_COMP *cpi)
              sum_iiratio += IIRatio;
          }
  
-        cpi->avg_iiratio = sum_iiratio / DOUBLE_DIVIDE_CHECK((double)cpi->total_stats.count);
+        cpi->avg_iiratio = sum_iiratio / DOUBLE_DIVIDE_CHECK((double)cpi->total_stats->count);
  
          // Reset file position
          reset_fpf_position(cpi, start_pos);
@@ -1184,21 +1185,11 @@ void vp8_init_second_pass(VP8_COMP *cpi)
  
      }
  
-#if FIRSTPASS_MM
-    cpi->fp_motion_mapfile = 0;
-    cpi->fp_motion_mapfile = fopen("fpmotionmap.stt", "rb");
-#endif
-
+    cpi->fp_motion_map_stats = (unsigned char *)cpi->stats_in;
  }
  
  void vp8_end_second_pass(VP8_COMP *cpi)
  {
-#if FIRSTPASS_MM
-
-    if (cpi->fp_motion_mapfile)
-        fclose(cpi->fp_motion_mapfile);
-
-#endif
  }
  
  // Analyse and define a gf/arf group .
@@ -1231,18 +1222,14 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
  
      int max_bits = frame_max_bits(cpi);    // Max for a single frame
  
-#if FIRSTPASS_MM
-    int fpmm_pos;
-#endif
+    unsigned char *fpmm_pos;
  
      cpi->gf_group_bits = 0;
      cpi->gf_decay_rate = 0;
  
      vp8_clear_system_state();  //__asm emms;
  
-#if FIRSTPASS_MM
      fpmm_pos = vp8_fpmm_get_pos(cpi);
-#endif
  
      start_pos = cpi->stats_in;
  
@@ -1494,7 +1481,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
              // Note: this_frame->frame has been updated in the loop
              // so it now points at the ARF frame.
              half_gf_int = cpi->baseline_gf_interval >> 1;
-            frames_after_arf = cpi->total_stats.count - this_frame->frame - 1;
+            frames_after_arf = cpi->total_stats->count - this_frame->frame - 1;
  
              switch (cpi->oxcf.arnr_type)
              {
@@ -1531,12 +1518,11 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
  
              cpi->active_arnr_frames = frames_bwd + 1 + frames_fwd;
  
-#if FIRSTPASS_MM
              {
                  // Advance to & read in the motion map for those frames
                  // to be considered for filtering based on the position
                  // of the ARF
-                vp8_fpmm_reset_pos(cpi, cpi->fpmm_pos);
+                vp8_fpmm_reset_pos(cpi, cpi->fp_motion_map_stats_save);
  
                  // Position at the 'earliest' frame to be filtered
                  vp8_advance_fpmm(cpi,
@@ -1545,7 +1531,6 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
                  // Read / create a motion map for the region of interest
                  vp8_input_fpmm(cpi);
              }
-#endif
          }
          else
          {
@@ -1581,7 +1566,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
      // Now decide how many bits should be allocated to the GF group as  a proportion of those remaining in the kf group.
      // The final key frame group in the clip is treated as a special case where cpi->kf_group_bits is tied to cpi->bits_left.
      // This is also important for short clips where there may only be one key frame.
-    if (cpi->frames_to_key >= (int)(cpi->total_stats.count - cpi->common.current_video_frame))
+    if (cpi->frames_to_key >= (int)(cpi->total_stats->count - cpi->common.current_video_frame))
      {
          cpi->kf_group_bits = (cpi->bits_left > 0) ? cpi->bits_left : 0;
      }
@@ -1781,10 +1766,8 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
          reset_fpf_position(cpi, start_pos);
      }
  
-#if FIRSTPASS_MM
      // Reset the First pass motion map file position
      vp8_fpmm_reset_pos(cpi, fpmm_pos);
-#endif
  }
  
  // Allocate bits to a normal frame that is neither a gf an arf or a key frame.
@@ -1798,7 +1781,7 @@ static void assign_std_frame_bits(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
      int max_bits = frame_max_bits(cpi);    // Max for a single frame
  
      // The final few frames have special treatment
-    if (cpi->frames_till_gf_update_due >= (int)(cpi->total_stats.count - cpi->common.current_video_frame))
+    if (cpi->frames_till_gf_update_due >= (int)(cpi->total_stats->count - cpi->common.current_video_frame))
      {
          cpi->gf_group_bits = (cpi->bits_left > 0) ? cpi->bits_left : 0;;
      }
@@ -1843,7 +1826,7 @@ static void assign_std_frame_bits(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
  void vp8_second_pass(VP8_COMP *cpi)
  {
      int tmp_q;
-    int frames_left = (int)(cpi->total_stats.count - cpi->common.current_video_frame);
+    int frames_left = (int)(cpi->total_stats->count - cpi->common.current_video_frame);
  
      FIRSTPASS_STATS this_frame;
      FIRSTPASS_STATS this_frame_copy;
@@ -1866,14 +1849,12 @@ void vp8_second_pass(VP8_COMP *cpi)
      if (EOF == vp8_input_stats(cpi, &this_frame))
          return;
  
-#if FIRSTPASS_MM
      vpx_memset(cpi->fp_motion_map, 0,
                  cpi->oxcf.arnr_max_frames*cpi->common.MBs);
-    cpi->fpmm_pos = vp8_fpmm_get_pos(cpi);
+    cpi->fp_motion_map_stats_save = vp8_fpmm_get_pos(cpi);
  
      // Step over this frame's first pass motion map
      vp8_advance_fpmm(cpi, 1);
-#endif
  
      this_frame_error = this_frame.ssim_weighted_pred_err;
      this_frame_intra_error = this_frame.intra_error;
@@ -2562,7 +2543,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
          cpi->common.vert_scale = NORMAL;
  
          // Calculate Average bits per frame.
-        //av_bits_per_frame = cpi->bits_left/(double)(cpi->total_stats.count - cpi->common.current_video_frame);
+        //av_bits_per_frame = cpi->bits_left/(double)(cpi->total_stats->count - cpi->common.current_video_frame);
          av_bits_per_frame = cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.frame_rate);
          //if ( av_bits_per_frame < 0.0 )
          //  av_bits_per_frame = 0.0
@@ -2625,7 +2606,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
          }
          else
          {
-            long long clip_bits = (long long)(cpi->total_stats.count * cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.frame_rate));
+            long long clip_bits = (long long)(cpi->total_stats->count * cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.frame_rate));
              long long over_spend = cpi->oxcf.starting_buffer_level - cpi->buffer_level;
              long long over_spend2 = cpi->oxcf.starting_buffer_level - projected_buffer_level;
  
diff --git a/vp8/encoder/firstpass.h b/vp8/encoder/firstpass.h

index c7f3e0e4510e090937dfed2fc40ae37c8844f65d..95e1e5463c6dcb44c460d4b4f6a175252817a825 100644 (file)
--- a/vp8/encoder/firstpass.h
+++ b/vp8/encoder/firstpass.h
@@ -20,4 +20,5 @@ extern void vp8_init_second_pass(VP8_COMP *cpi);
  extern void vp8_second_pass(VP8_COMP *cpi);
  extern void vp8_end_second_pass(VP8_COMP *cpi);
  
+extern size_t vp8_firstpass_stats_sz(unsigned int mb_count);
  #endif
diff --git a/vp8/encoder/generic/csystemdependent.c b/vp8/encoder/generic/csystemdependent.c

index 1acb73d9cbd04ac085be89c98975ac14e4ae3c77..520b08f51cae258f9d49287dcdfeff3a0b1ecc0b 100644 (file)
--- a/vp8/encoder/generic/csystemdependent.c
+++ b/vp8/encoder/generic/csystemdependent.c
@@ -15,6 +15,7 @@
  
  
  void vp8_arch_x86_encoder_init(VP8_COMP *cpi);
+void vp8_arch_arm_encoder_init(VP8_COMP *cpi);
  
  
  void (*vp8_fast_quantize_b)(BLOCK *b, BLOCKD *d);
@@ -94,4 +95,8 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi)
      vp8_arch_x86_encoder_init(cpi);
  #endif
  
+#if ARCH_ARM
+    vp8_arch_arm_encoder_init(cpi);
+#endif
+
  }
diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c

index b89354eaaa0689a5f290b14113781b47c8d7aac7..4d60b92d67769667caffdb4424a5db2198616f29 100644 (file)
--- a/vp8/encoder/mcomp.c
+++ b/vp8/encoder/mcomp.c
@@ -1035,84 +1035,73 @@ int vp8_diamond_search_sadx4
  
      for (step = 0; step < tot_steps ; step++)
      {
-        int check_row_min, check_col_min, check_row_max, check_col_max;
+        int all_in = 1, t;
  
-        check_row_min = x->mv_row_min - best_mv->row;
-        check_row_max = x->mv_row_max - best_mv->row;
-        check_col_min = x->mv_col_min - best_mv->col;
-        check_col_max = x->mv_col_max - best_mv->col;
+        // To know if all neighbor points are within the bounds, 4 bounds checking are enough instead of
+        // checking 4 bounds for each points.
+        all_in &= ((best_mv->row + ss[i].mv.row)> x->mv_row_min);
+        all_in &= ((best_mv->row + ss[i+1].mv.row) < x->mv_row_max);
+        all_in &= ((best_mv->col + ss[i+2].mv.col) > x->mv_col_min);
+        all_in &= ((best_mv->col + ss[i+3].mv.col) < x->mv_col_max);
  
-        for (j = 0 ; j < x->searches_per_step ; j += 4)
+        if (all_in)
          {
-            unsigned char *block_offset[4];
-            unsigned int valid_block[4];
-            int all_in = 1, t;
+            unsigned int sad_array[4];
  
-            for (t = 0; t < 4; t++)
+            for (j = 0 ; j < x->searches_per_step ; j += 4)
              {
-                valid_block [t]  = (ss[t+i].mv.col > check_col_min);
-                valid_block [t] &= (ss[t+i].mv.col < check_col_max);
-                valid_block [t] &= (ss[t+i].mv.row > check_row_min);
-                valid_block [t] &= (ss[t+i].mv.row < check_row_max);
+                unsigned char *block_offset[4];
  
-                all_in &= valid_block[t];
-                block_offset[t] = ss[i+t].offset + best_address;
-            }
-
-            if (all_in)
-            {
-                unsigned int sad_array[4];
+                for (t = 0; t < 4; t++)
+                    block_offset[t] = ss[i+t].offset + best_address;
  
                  fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array);
  
                  for (t = 0; t < 4; t++, i++)
                  {
-                    thissad = sad_array[t];
-
-                    if (thissad < bestsad)
+                    if (sad_array[t] < bestsad)
                      {
                          this_mv.row = (best_mv->row + ss[i].mv.row) << 3;
                          this_mv.col = (best_mv->col + ss[i].mv.col) << 3;
-                        thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
+                        sad_array[t] += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
  
-                        if (thissad < bestsad)
+                        if (sad_array[t] < bestsad)
                          {
-                            bestsad = thissad;
+                            bestsad = sad_array[t];
                              best_site = i;
                          }
                      }
                  }
              }
-            else
+        }
+        else
+        {
+            for (j = 0 ; j < x->searches_per_step ; j++)
              {
-                int t;
+                // Trap illegal vectors
+                this_row_offset = best_mv->row + ss[i].mv.row;
+                this_col_offset = best_mv->col + ss[i].mv.col;
  
-                for (t = 0; t < 4; i++, t++)
+                if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
+                (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
                  {
-                    // Trap illegal vectors
-                    if (valid_block[t])
+                    check_here = ss[i].offset + best_address;
+                    thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
  
+                    if (thissad < bestsad)
                      {
-                        check_here = block_offset[t];
-                        thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
+                        this_mv.row = this_row_offset << 3;
+                        this_mv.col = this_col_offset << 3;
+                        thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
  
                          if (thissad < bestsad)
                          {
-                            this_row_offset = best_mv->row + ss[i].mv.row;
-                            this_col_offset = best_mv->col + ss[i].mv.col;
-
-                            this_mv.row = this_row_offset << 3;
-                            this_mv.col = this_col_offset << 3;
-                            thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
-
-                            if (thissad < bestsad)
-                            {
-                                bestsad = thissad;
-                                best_site = i;
-                            }
+                            bestsad = thissad;
+                            best_site = i;
                          }
                      }
                  }
+                i++;
              }
          }
  
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c

index 54eb31be8abeae0a9011e1562a021730d4cc9d3b..bd41b2cb6a88da76b42c201cdda142946dabb988 100644 (file)
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -30,6 +30,10 @@
  #include "threading.h"
  #include "vpx_ports/vpx_timer.h"
  #include "vpxerrors.h"
+#include "temporal_filter.h"
+#if ARCH_ARM
+#include "vpx_ports/arm.h"
+#endif
  
  #include <math.h>
  #include <stdio.h>
@@ -43,9 +47,6 @@
  #define RTCD(x) NULL
  #endif
  
-#define ALT_REF_MC_ENABLED 1    // dis/enable MC in AltRef filtering
-#define ALT_REF_SUBPEL_ENABLED 1 // dis/enable subpel in MC AltRef filtering
-
  extern void vp8cx_init_mv_bits_sadcost();
  extern void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi);
  extern void vp8cx_set_alt_lf_level(VP8_COMP *cpi, int filt_val);
@@ -330,6 +331,8 @@ void vp8_dealloc_compressor_data(VP8_COMP *cpi)
  
      cpi->mb.pip = 0;
  
+    vpx_free(cpi->total_stats);
+    vpx_free(cpi->this_frame_stats);
  }
  
  static void enable_segmentation(VP8_PTR ptr)
@@ -1282,7 +1285,7 @@ void vp8_set_speed_features(VP8_COMP *cpi)
      }
  
      if (cpi->sf.optimize_coefficients == 1)
-        cpi->mb.optimize = 1;
+        cpi->mb.optimize = 1 + cpi->is_next_src_alt_ref;
      else
          cpi->mb.optimize = 0;
  
@@ -1392,6 +1395,12 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi)
      CHECK_MEM_ERROR(cpi->gf_active_flags, vpx_calloc(1, cm->mb_rows * cm->mb_cols));
  
      cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
+
+    cpi->total_stats = vpx_calloc(1, vp8_firstpass_stats_sz(cpi->common.MBs));
+    cpi->this_frame_stats = vpx_calloc(1, vp8_firstpass_stats_sz(cpi->common.MBs));
+    if(!cpi->total_stats || !cpi->this_frame_stats)
+        vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
+                           "Failed to allocate firstpass stats");
  }
  
  
@@ -1752,6 +1761,7 @@ void vp8_init_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
      // YX Temp
      cpi->last_alt_ref_sei    = -1;
      cpi->is_src_frame_alt_ref = 0;
+    cpi->is_next_src_alt_ref = 0;
  
  #if 0
      // Experimental RD Code
@@ -2037,6 +2047,7 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
      // YX Temp
      cpi->last_alt_ref_sei    = -1;
      cpi->is_src_frame_alt_ref = 0;
+    cpi->is_next_src_alt_ref = 0;
  
  #if 0
      // Experimental RD Code
@@ -2101,8 +2112,8 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
      CHECK_MEM_ERROR(cpi->rdtok, vpx_calloc(256 * 3 / 2, sizeof(TOKENEXTRA)));
      CHECK_MEM_ERROR(cpi->mb.ss, vpx_calloc(sizeof(search_site), (MAX_MVSEARCH_STEPS * 8) + 1));
  
-    vp8_cmachine_specific_config(cpi);
      vp8_create_common(&cpi->common);
+    vp8_cmachine_specific_config(cpi);
  
      vp8_init_config((VP8_PTR)cpi, oxcf);
  
@@ -2291,10 +2302,12 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
      }
      else if (cpi->pass == 2)
      {
+        size_t packet_sz = vp8_firstpass_stats_sz(cpi->common.MBs);
+        int packets = oxcf->two_pass_stats_in.sz / packet_sz;
+
          cpi->stats_in = oxcf->two_pass_stats_in.buf;
-        cpi->stats_in_end = cpi->stats_in
-                            + oxcf->two_pass_stats_in.sz / sizeof(FIRSTPASS_STATS)
-                            - 1;
+        cpi->stats_in_end = (void*)((char *)cpi->stats_in
+                            + (packets - 1) * packet_sz);
          vp8_init_second_pass(cpi);
      }
  
@@ -2519,6 +2532,7 @@ void vp8_remove_compressor(VP8_PTR *ptr)
              }
  
              fprintf(fmode, "};\n");
+            fclose(fmode);
          }
  #endif
  
@@ -2845,9 +2859,20 @@ static void scale_and_extend_source(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
      {
          //vp8_yv12_copy_frame_ptr(sd, &cpi->scaled_source);
  #if HAVE_ARMV7
-        vp8_yv12_copy_src_frame_func_neon(sd, &cpi->scaled_source);
-#else
-        vp8_yv12_copy_frame_ptr(sd, &cpi->scaled_source);
+#if CONFIG_RUNTIME_CPU_DETECT
+        if (cm->rtcd.flags & HAS_NEON)
+#endif
+        {
+            vp8_yv12_copy_src_frame_func_neon(sd, &cpi->scaled_source);
+        }
+#if CONFIG_RUNTIME_CPU_DETECT
+        else
+#endif
+#endif
+#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT
+        {
+            vp8_yv12_copy_frame_ptr(sd, &cpi->scaled_source);
+        }
  #endif
  
          cpi->Source = &cpi->scaled_source;
@@ -3362,619 +3387,6 @@ void write_cx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame)
  }
  #endif
  // return of 0 means drop frame
-#define USE_FILTER_LUT 1
-#if VP8_TEMPORAL_ALT_REF
-
-#if USE_FILTER_LUT
-static int modifier_lut[7][19] =
-{
-    // Strength=0
-    {16, 13, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-    // Strength=1
-    {16, 15, 10, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-    // Strength=2
-    {16, 15, 13, 9, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-    // Strength=3
-    {16, 16, 15, 13, 10, 7, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-    // Strength=4
-    {16, 16, 15, 14, 13, 11, 9, 7, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-    // Strength=5
-    {16, 16, 16, 15, 15, 14, 13, 11, 10, 8, 7, 5, 3, 0, 0, 0, 0, 0, 0},
-    // Strength=6
-    {16, 16, 16, 16, 15, 15, 14, 14, 13, 12, 11, 10, 9, 8, 7, 5, 4, 2, 1}
-};
-#endif
-static void build_predictors_mb
-(
-    MACROBLOCKD *x,
-    unsigned char *y_mb_ptr,
-    unsigned char *u_mb_ptr,
-    unsigned char *v_mb_ptr,
-    int stride,
-    int mv_row,
-    int mv_col,
-    unsigned char *pred
-)
-{
-    int offset;
-    unsigned char *yptr, *uptr, *vptr;
-
-    // Y
-    yptr = y_mb_ptr + (mv_row >> 3) * stride + (mv_col >> 3);
-
-    if ((mv_row | mv_col) & 7)
-    {
-//        vp8_sixtap_predict16x16_c(yptr, stride,
-//                                    mv_col & 7, mv_row & 7, &pred[0], 16);
-        x->subpixel_predict16x16(yptr, stride,
-                                    mv_col & 7, mv_row & 7, &pred[0], 16);
-    }
-    else
-    {
-        //vp8_copy_mem16x16_c (yptr, stride, &pred[0], 16);
-        RECON_INVOKE(&x->rtcd->recon, copy16x16)(yptr, stride, &pred[0], 16);
-    }
-
-    // U & V
-    mv_row >>= 1;
-    mv_col >>= 1;
-    stride >>= 1;
-    offset = (mv_row >> 3) * stride + (mv_col >> 3);
-    uptr = u_mb_ptr + offset;
-    vptr = v_mb_ptr + offset;
-
-    if ((mv_row | mv_col) & 7)
-    {
-        x->subpixel_predict8x8(uptr, stride,
-                            mv_col & 7, mv_row & 7, &pred[256], 8);
-        x->subpixel_predict8x8(vptr, stride,
-                            mv_col & 7, mv_row & 7, &pred[320], 8);
-    }
-    else
-    {
-        RECON_INVOKE(&x->rtcd->recon, copy8x8)(uptr, stride, &pred[256], 8);
-        RECON_INVOKE(&x->rtcd->recon, copy8x8)(vptr, stride, &pred[320], 8);
-    }
-}
-static void apply_temporal_filter
-(
-    unsigned char *frame1,
-    unsigned int stride,
-    unsigned char *frame2,
-    unsigned int block_size,
-    int strength,
-    int filter_weight,
-    int *accumulator,
-    int *count
-)
-{
-    int i, j, k;
-    int modifier;
-    int byte = 0;
-
-#if USE_FILTER_LUT
-    int *lut = modifier_lut[strength];
-#endif
-
-    for (i = 0,k = 0; i < block_size; i++)
-    {
-        for (j = 0; j < block_size; j++, k++)
-        {
-
-            int src_byte = frame1[byte];
-            int pixel_value = *frame2++;
-
-#if USE_FILTER_LUT
-            // LUT implementation --
-            // improves precision of filter
-            modifier = abs(src_byte-pixel_value);
-            modifier = modifier>18 ? 0 : lut[modifier];
-#else
-            modifier   = src_byte;
-            modifier  -= pixel_value;
-            modifier  *= modifier;
-            modifier >>= strength;
-            modifier  *= 3;
-
-            if (modifier > 16)
-                modifier = 16;
-
-            modifier = 16 - modifier;
-#endif
-            modifier *= filter_weight;
-            
-            count[k] += modifier;
-            accumulator[k] += modifier * pixel_value;
-
-            byte++;
-        }
-
-        byte += stride - block_size;
-    }
-}
-
-#if ALT_REF_MC_ENABLED
-static int dummy_cost[2*mv_max+1];
-
-static int find_matching_mb
-(
-    VP8_COMP *cpi,
-    YV12_BUFFER_CONFIG *arf_frame,
-    YV12_BUFFER_CONFIG *frame_ptr,
-    int mb_offset,
-    int error_thresh
-)
-{
-    MACROBLOCK *x = &cpi->mb;
-    int thissme;
-    int step_param;
-    int further_steps;
-    int n = 0;
-    int sadpb = x->sadperbit16;
-    int bestsme = INT_MAX;
-    int num00 = 0;
-
-    BLOCK *b = &x->block[0];
-    BLOCKD *d = &x->e_mbd.block[0];
-    MV best_ref_mv1 = {0,0};
-
-    int *mvcost[2]    = { &dummy_cost[mv_max+1], &dummy_cost[mv_max+1] };
-    int *mvsadcost[2] = { &dummy_cost[mv_max+1], &dummy_cost[mv_max+1] };
-
-    // Save input state
-    unsigned char **base_src = b->base_src;
-    int src = b->src;
-    int src_stride = b->src_stride;
-    unsigned char **base_pre = d->base_pre;
-    int pre = d->pre;
-    int pre_stride = d->pre_stride;
-
-    // Setup frame pointers
-    b->base_src = &arf_frame->y_buffer;
-    b->src_stride = arf_frame->y_stride;
-    b->src = mb_offset;
-
-    d->base_pre = &frame_ptr->y_buffer;
-    d->pre_stride = frame_ptr->y_stride;
-    d->pre = mb_offset;
-
-    // Further step/diamond searches as necessary
-    if (cpi->Speed < 8)
-    {
-        step_param = cpi->sf.first_step +
-                    ((cpi->Speed > 5) ? 1 : 0);
-        further_steps =
-            (cpi->sf.max_step_search_steps - 1)-step_param;
-    }
-    else
-    {
-        step_param = cpi->sf.first_step + 2;
-        further_steps = 0;
-    }
-
-    if (1/*cpi->sf.search_method == HEX*/)
-    {
-        // TODO Check that the 16x16 vf & sdf are selected here
-        bestsme = vp8_hex_search(x, b, d,
-            &best_ref_mv1, &d->bmi.mv.as_mv,
-            step_param,
-            sadpb/*x->errorperbit*/,
-            &num00, cpi->fn_ptr.vf, cpi->fn_ptr.sdf,
-            mvsadcost, mvcost);
-    }
-    else
-    {
-        int mv_x, mv_y;
-
-        bestsme = cpi->diamond_search_sad(x, b, d,
-            &best_ref_mv1, &d->bmi.mv.as_mv,
-            step_param,
-            sadpb / 2/*x->errorperbit*/,
-            &num00, &cpi->fn_ptr,
-            mvsadcost, mvcost); //sadpb < 9
-
-        // Further step/diamond searches as necessary
-        n = 0;
-        //further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
-
-        n = num00;
-        num00 = 0;
-
-        while (n < further_steps)
-        {
-            n++;
-
-            if (num00)
-                num00--;
-            else
-            {
-                thissme = cpi->diamond_search_sad(x, b, d,
-                    &best_ref_mv1, &d->bmi.mv.as_mv,
-                    step_param + n,
-                    sadpb / 4/*x->errorperbit*/,
-                    &num00, &cpi->fn_ptr,
-                    mvsadcost, mvcost); //sadpb = 9
-
-                if (thissme < bestsme)
-                {
-                    bestsme = thissme;
-                    mv_y = d->bmi.mv.as_mv.row;
-                    mv_x = d->bmi.mv.as_mv.col;
-                }
-                else
-                {
-                    d->bmi.mv.as_mv.row = mv_y;
-                    d->bmi.mv.as_mv.col = mv_x;
-                }
-            }
-        }
-    }
-
-#if ALT_REF_SUBPEL_ENABLED
-    // Try sub-pixel MC?
-    //if (bestsme > error_thresh && bestsme < INT_MAX)
-    {
-        bestsme = cpi->find_fractional_mv_step(x, b, d,
-                    &d->bmi.mv.as_mv, &best_ref_mv1,
-                    x->errorperbit, cpi->fn_ptr.svf,
-                    cpi->fn_ptr.vf, cpi->mb.mvcost);
-    }
-#endif
-
-    // Save input state
-    b->base_src = base_src;
-    b->src = src;
-    b->src_stride = src_stride;
-    d->base_pre = base_pre;
-    d->pre = pre;
-    d->pre_stride = pre_stride;
-
-    return bestsme;
-}
-#endif
-
-static void vp8cx_temp_blur1_c
-(
-    VP8_COMP *cpi,
-    int frame_count,
-    int alt_ref_index,
-    int strength
-)
-{
-    int byte;
-    int frame;
-    int mb_col, mb_row;
-    unsigned int filter_weight[MAX_LAG_BUFFERS];
-    unsigned char *mm_ptr = cpi->fp_motion_map;
-    int cols = cpi->common.mb_cols;
-    int rows = cpi->common.mb_rows;
-    int MBs  = cpi->common.MBs;
-    int mb_y_offset = 0;
-    int mb_uv_offset = 0;
-    unsigned int accumulator[384];
-    unsigned int count[384];
-    MACROBLOCKD *mbd = &cpi->mb.e_mbd;
-    YV12_BUFFER_CONFIG *f = cpi->frames[alt_ref_index];
-    unsigned char *dst1, *dst2;
-    DECLARE_ALIGNED(16, unsigned char,  predictor[384]);
-    
-    // Save input state
-    unsigned char *y_buffer = mbd->pre.y_buffer;
-    unsigned char *u_buffer = mbd->pre.u_buffer;
-    unsigned char *v_buffer = mbd->pre.v_buffer;
-
-    if (!cpi->use_weighted_temporal_filter)
-    {
-        // Temporal filtering is unweighted
-        for (frame = 0; frame < frame_count; frame++)
-            filter_weight[frame] = 1;
-    }
-
-    for (mb_row = 0; mb_row < rows; mb_row++)
-    {
-#if ALT_REF_MC_ENABLED
-        // Reduced search extent by 3 for 6-tap filter & smaller UMV border
-        cpi->mb.mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 19));
-        cpi->mb.mv_row_max = ((cpi->common.mb_rows - 1 - mb_row) * 16)
-                                + (VP8BORDERINPIXELS - 19);
-#endif
-
-        for (mb_col = 0; mb_col < cols; mb_col++)
-        {
-            int i, j, k, w;
-            int weight_cap;
-            int stride;
-
-            vpx_memset(accumulator, 0, 384*sizeof(unsigned int));
-            vpx_memset(count, 0, 384*sizeof(unsigned int));
-
-#if ALT_REF_MC_ENABLED
-            // Reduced search extent by 3 for 6-tap filter & smaller UMV border
-            cpi->mb.mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 19));
-            cpi->mb.mv_col_max = ((cpi->common.mb_cols - 1 - mb_col) * 16)
-                                    + (VP8BORDERINPIXELS - 19);
-#endif
-
-            // Read & process macroblock weights from motion map
-            if (cpi->use_weighted_temporal_filter)
-            {
-                weight_cap = 2;
-
-                for (frame = alt_ref_index-1; frame >= 0; frame--)
-                {
-                    w = *(mm_ptr + (frame+1)*MBs);
-                    filter_weight[frame] = w < weight_cap ? w : weight_cap;
-                    weight_cap = w;
-                }
-
-                filter_weight[alt_ref_index] = 2;
-
-                weight_cap = 2;
-
-                for (frame = alt_ref_index+1; frame < frame_count; frame++)
-                {
-                    w = *(mm_ptr + frame*MBs);
-                    filter_weight[frame] = w < weight_cap ? w : weight_cap;
-                    weight_cap = w;
-                }
-
-            }
-
-            for (frame = 0; frame < frame_count; frame++)
-            {
-                int err;
-
-                if (cpi->frames[frame] == NULL)
-                    continue;
-
-                mbd->block[0].bmi.mv.as_mv.row = 0;
-                mbd->block[0].bmi.mv.as_mv.col = 0;
-
-#if ALT_REF_MC_ENABLED
-                //if (filter_weight[frame] == 0)
-                {
-#define THRESH_LOW   10000
-#define THRESH_HIGH  20000
-
-                    // Correlation has been lost try MC
-                    err = find_matching_mb ( cpi,
-                                             cpi->frames[alt_ref_index],
-                                             cpi->frames[frame],
-                                             mb_y_offset,
-                                             THRESH_LOW );
-
-                    if (filter_weight[frame] < 2)
-                    {
-                        // Set weight depending on error
-                        filter_weight[frame] = err<THRESH_LOW
-                                                ? 2 : err<THRESH_HIGH ? 1 : 0;
-                    }
-                }
-#endif
-                if (filter_weight[frame] != 0)
-                {
-                    // Construct the predictors
-                    build_predictors_mb (
-                              mbd,
-                              cpi->frames[frame]->y_buffer + mb_y_offset,
-                              cpi->frames[frame]->u_buffer + mb_uv_offset,
-                              cpi->frames[frame]->v_buffer + mb_uv_offset,
-                              cpi->frames[frame]->y_stride,
-                              mbd->block[0].bmi.mv.as_mv.row,
-                              mbd->block[0].bmi.mv.as_mv.col,
-                              predictor );
-
-                    // Apply the filter (YUV)
-                    apply_temporal_filter ( f->y_buffer + mb_y_offset,
-                                            f->y_stride,
-                                            predictor,
-                                            16,
-                                            strength,
-                                            filter_weight[frame],
-                                            accumulator,
-                                            count );
-
-                    apply_temporal_filter ( f->u_buffer + mb_uv_offset,
-                                            f->uv_stride,
-                                            predictor + 256,
-                                            8,
-                                            strength,
-                                            filter_weight[frame],
-                                            accumulator + 256,
-                                            count + 256 );
-
-                    apply_temporal_filter ( f->v_buffer + mb_uv_offset,
-                                            f->uv_stride,
-                                            predictor + 320,
-                                            8,
-                                            strength,
-                                            filter_weight[frame],
-                                            accumulator + 320,
-                                            count + 320 );
-                }
-            }
-
-            // Normalize filter output to produce AltRef frame
-            dst1 = cpi->alt_ref_buffer.source_buffer.y_buffer;
-            stride = cpi->alt_ref_buffer.source_buffer.y_stride;
-            byte = mb_y_offset;
-            for (i = 0,k = 0; i < 16; i++)
-            {
-                for (j = 0; j < 16; j++, k++)
-                {
-                    unsigned int pval = accumulator[k] + (count[k] >> 1);
-                    pval *= cpi->fixed_divide[count[k]];
-                    pval >>= 19;
-
-                    dst1[byte] = (unsigned char)pval;
-
-                    // move to next pixel
-                    byte++;
-                }
-
-                byte += stride - 16;
-            }
-
-            dst1 = cpi->alt_ref_buffer.source_buffer.u_buffer;
-            dst2 = cpi->alt_ref_buffer.source_buffer.v_buffer;
-            stride = cpi->alt_ref_buffer.source_buffer.uv_stride;
-            byte = mb_uv_offset;
-            for (i = 0,k = 256; i < 8; i++)
-            {
-                for (j = 0; j < 8; j++, k++)
-                {
-                    int m=k+64;
-
-                    // U
-                    unsigned int pval = accumulator[k] + (count[k] >> 1);
-                    pval *= cpi->fixed_divide[count[k]];
-                    pval >>= 19;
-                    dst1[byte] = (unsigned char)pval;
-
-                    // V
-                    pval = accumulator[m] + (count[m] >> 1);
-                    pval *= cpi->fixed_divide[count[m]];
-                    pval >>= 19;
-                    dst2[byte] = (unsigned char)pval;
-
-                    // move to next pixel
-                    byte++;
-                }
-
-                byte += stride - 8;
-            }
-
-            mm_ptr++;
-            mb_y_offset += 16;
-            mb_uv_offset += 8;
-        }
-
-        mb_y_offset += 16*f->y_stride-f->y_width;
-        mb_uv_offset += 8*f->uv_stride-f->uv_width;
-    }
-
-    // Restore input state
-    mbd->pre.y_buffer = y_buffer;
-    mbd->pre.u_buffer = u_buffer;
-    mbd->pre.v_buffer = v_buffer;
-}
-
-static void vp8cx_temp_filter_c
-(
-    VP8_COMP *cpi
-)
-{
-    int frame = 0;
-
-    int num_frames_backward = 0;
-    int num_frames_forward = 0;
-    int frames_to_blur_backward = 0;
-    int frames_to_blur_forward = 0;
-    int frames_to_blur = 0;
-    int start_frame = 0;
-    unsigned int filtered = 0;
-
-    int strength = cpi->oxcf.arnr_strength;
-
-    int blur_type = cpi->oxcf.arnr_type;
-
-    int max_frames = cpi->active_arnr_frames;
-
-    num_frames_backward = cpi->last_alt_ref_sei - cpi->source_encode_index;
-
-    if (num_frames_backward < 0)
-        num_frames_backward += cpi->oxcf.lag_in_frames;
-
-    num_frames_forward = cpi->oxcf.lag_in_frames - (num_frames_backward + 1);
-
-    switch (blur_type)
-    {
-    case 1:
-        /////////////////////////////////////////
-        // Backward Blur
-
-        frames_to_blur_backward = num_frames_backward;
-
-        if (frames_to_blur_backward >= max_frames)
-            frames_to_blur_backward = max_frames - 1;
-
-        frames_to_blur = frames_to_blur_backward + 1;
-        break;
-
-    case 2:
-        /////////////////////////////////////////
-        // Forward Blur
-
-        frames_to_blur_forward = num_frames_forward;
-
-        if (frames_to_blur_forward >= max_frames)
-            frames_to_blur_forward = max_frames - 1;
-
-        frames_to_blur = frames_to_blur_forward + 1;
-        break;
-
-    case 3:
-    default:
-        /////////////////////////////////////////
-        // Center Blur
-        frames_to_blur_forward = num_frames_forward;
-        frames_to_blur_backward = num_frames_backward;
-
-        if (frames_to_blur_forward > frames_to_blur_backward)
-            frames_to_blur_forward = frames_to_blur_backward;
-
-        if (frames_to_blur_backward > frames_to_blur_forward)
-            frames_to_blur_backward = frames_to_blur_forward;
-
-        // When max_frames is even we have 1 more frame backward than forward
-        if (frames_to_blur_forward > (max_frames - 1) / 2)
-            frames_to_blur_forward = ((max_frames - 1) / 2);
-
-        if (frames_to_blur_backward > (max_frames / 2))
-            frames_to_blur_backward = (max_frames / 2);
-
-        frames_to_blur = frames_to_blur_backward + frames_to_blur_forward + 1;
-        break;
-    }
-
-    start_frame = (cpi->last_alt_ref_sei
-                    + frames_to_blur_forward) % cpi->oxcf.lag_in_frames;
-
-#ifdef DEBUGFWG
-    // DEBUG FWG
-    printf("max:%d FBCK:%d FFWD:%d ftb:%d ftbbck:%d ftbfwd:%d sei:%d lasei:%d start:%d"
-           , max_frames
-           , num_frames_backward
-           , num_frames_forward
-           , frames_to_blur
-           , frames_to_blur_backward
-           , frames_to_blur_forward
-           , cpi->source_encode_index
-           , cpi->last_alt_ref_sei
-           , start_frame);
-#endif
-
-    // Setup frame pointers, NULL indicates frame not included in filter
-    vpx_memset(cpi->frames, 0, max_frames*sizeof(YV12_BUFFER_CONFIG *));
-    for (frame = 0; frame < frames_to_blur; frame++)
-    {
-        int which_buffer =  start_frame - frame;
-
-        if (which_buffer < 0)
-            which_buffer += cpi->oxcf.lag_in_frames;
-
-        cpi->frames[frames_to_blur-1-frame]
-                = &cpi->src_buffer[which_buffer].source_buffer;
-    }
-
-    vp8cx_temp_blur1_c (
-        cpi,
-        frames_to_blur,
-        frames_to_blur_backward,
-        strength );
-}
-#endif
-
  
  static void encode_frame_to_data_rate
  (
@@ -5230,10 +4642,10 @@ static void Pass2Encode(VP8_COMP *cpi, unsigned long *size, unsigned char *dest,
  #if HAVE_ARMV7
  extern void vp8_push_neon(INT64 *store);
  extern void vp8_pop_neon(INT64 *store);
-static INT64 store_reg[8];
  #endif
  int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, INT64 time_stamp, INT64 end_time)
  {
+    INT64 store_reg[8];
      VP8_COMP *cpi = (VP8_COMP *) ptr;
      VP8_COMMON *cm = &cpi->common;
      struct vpx_usec_timer  timer;
@@ -5242,7 +4654,12 @@ int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CON
          return -1;
  
  #if HAVE_ARMV7
-    vp8_push_neon(store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+    if (cm->rtcd.flags & HAS_NEON)
+#endif
+    {
+        vp8_push_neon(store_reg);
+    }
  #endif
  
      vpx_usec_timer_start(&timer);
@@ -5251,7 +4668,12 @@ int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CON
      if (cpi->source_buffer_count != 0 && cpi->source_buffer_count >= cpi->oxcf.lag_in_frames)
      {
  #if HAVE_ARMV7
-        vp8_pop_neon(store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+        if (cm->rtcd.flags & HAS_NEON)
+#endif
+        {
+            vp8_pop_neon(store_reg);
+        }
  #endif
          return -1;
      }
@@ -5292,9 +4714,20 @@ int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CON
          s->source_time_stamp = time_stamp;
          s->source_frame_flags = frame_flags;
  #if HAVE_ARMV7
-        vp8_yv12_copy_src_frame_func_neon(sd, &s->source_buffer);
-#else
-        vp8_yv12_copy_frame_ptr(sd, &s->source_buffer);
+#if CONFIG_RUNTIME_CPU_DETECT
+        if (cm->rtcd.flags & HAS_NEON)
+#endif
+        {
+            vp8_yv12_copy_src_frame_func_neon(sd, &s->source_buffer);
+        }
+#if CONFIG_RUNTIME_CPU_DETECT
+        else
+#endif
+#endif
+#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT
+        {
+            vp8_yv12_copy_frame_ptr(sd, &s->source_buffer);
+        }
  #endif
          cpi->source_buffer_count = 1;
      }
@@ -5303,14 +4736,19 @@ int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CON
      cpi->time_receive_data += vpx_usec_timer_elapsed(&timer);
  
  #if HAVE_ARMV7
-    vp8_pop_neon(store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+    if (cm->rtcd.flags & HAS_NEON)
+#endif
+    {
+        vp8_pop_neon(store_reg);
+    }
  #endif
  
      return 0;
  }
  int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, INT64 *time_stamp, INT64 *time_end, int flush)
  {
-
+    INT64 store_reg[8];
      VP8_COMP *cpi = (VP8_COMP *) ptr;
      VP8_COMMON *cm = &cpi->common;
      struct vpx_usec_timer  tsctimer;
@@ -5321,7 +4759,12 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
          return -1;
  
  #if HAVE_ARMV7
-    vp8_push_neon(store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+    if (cm->rtcd.flags & HAS_NEON)
+#endif
+    {
+        vp8_push_neon(store_reg);
+    }
  #endif
  
      vpx_usec_timer_start(&cmptimer);
@@ -5422,6 +4865,7 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
              cm->show_frame = 0;
              cpi->source_alt_ref_pending = FALSE;   // Clear Pending altf Ref flag.
              cpi->is_src_frame_alt_ref = 0;
+            cpi->is_next_src_alt_ref = 0;
          }
          else
  #endif
@@ -5440,6 +4884,11 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
                      cpi->is_src_frame_alt_ref = 0;
  
                  cpi->source_encode_index = (cpi->source_encode_index + 1) % cpi->oxcf.lag_in_frames;
+
+                if(cpi->source_encode_index == cpi->last_alt_ref_sei)
+                    cpi->is_next_src_alt_ref = 1;
+                else
+                    cpi->is_next_src_alt_ref = 0;
              }
  
  #endif
@@ -5467,7 +4916,12 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
  #endif
  
  #if HAVE_ARMV7
-        vp8_pop_neon(store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+        if (cm->rtcd.flags & HAS_NEON)
+#endif
+        {
+            vp8_pop_neon(store_reg);
+        }
  #endif
          return -1;
      }
@@ -5510,7 +4964,12 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
      if (!cpi)
      {
  #if HAVE_ARMV7
-        vp8_pop_neon(store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+        if (cm->rtcd.flags & HAS_NEON)
+#endif
+        {
+            vp8_pop_neon(store_reg);
+        }
  #endif
          return 0;
      }
@@ -5699,7 +5158,12 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
  #endif
  
  #if HAVE_ARMV7
-    vp8_pop_neon(store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+    if (cm->rtcd.flags & HAS_NEON)
+#endif
+    {
+        vp8_pop_neon(store_reg);
+    }
  #endif
  
      return 0;
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h

index cb768c01840d2e4bf18876adf20b5df75e121416..eb516e9277ca32ba4e80c1e0751af33cb3d1034f 100644 (file)
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -232,20 +232,20 @@ typedef struct VP8_ENCODER_RTCD
  typedef struct
  {
  
-    DECLARE_ALIGNED(16, short, Y1quant[QINDEX_RANGE][4][4]);
-    DECLARE_ALIGNED(16, short, Y1quant_shift[QINDEX_RANGE][4][4]);
-    DECLARE_ALIGNED(16, short, Y1zbin[QINDEX_RANGE][4][4]);
-    DECLARE_ALIGNED(16, short, Y1round[QINDEX_RANGE][4][4]);
+    DECLARE_ALIGNED(16, short, Y1quant[QINDEX_RANGE][16]);
+    DECLARE_ALIGNED(16, short, Y1quant_shift[QINDEX_RANGE][16]);
+    DECLARE_ALIGNED(16, short, Y1zbin[QINDEX_RANGE][16]);
+    DECLARE_ALIGNED(16, short, Y1round[QINDEX_RANGE][16]);
  
-    DECLARE_ALIGNED(16, short, Y2quant[QINDEX_RANGE][4][4]);
-    DECLARE_ALIGNED(16, short, Y2quant_shift[QINDEX_RANGE][4][4]);
-    DECLARE_ALIGNED(16, short, Y2zbin[QINDEX_RANGE][4][4]);
-    DECLARE_ALIGNED(16, short, Y2round[QINDEX_RANGE][4][4]);
+    DECLARE_ALIGNED(16, short, Y2quant[QINDEX_RANGE][16]);
+    DECLARE_ALIGNED(16, short, Y2quant_shift[QINDEX_RANGE][16]);
+    DECLARE_ALIGNED(16, short, Y2zbin[QINDEX_RANGE][16]);
+    DECLARE_ALIGNED(16, short, Y2round[QINDEX_RANGE][16]);
  
-    DECLARE_ALIGNED(16, short, UVquant[QINDEX_RANGE][4][4]);
-    DECLARE_ALIGNED(16, short, UVquant_shift[QINDEX_RANGE][4][4]);
-    DECLARE_ALIGNED(16, short, UVzbin[QINDEX_RANGE][4][4]);
-    DECLARE_ALIGNED(16, short, UVround[QINDEX_RANGE][4][4]);
+    DECLARE_ALIGNED(16, short, UVquant[QINDEX_RANGE][16]);
+    DECLARE_ALIGNED(16, short, UVquant_shift[QINDEX_RANGE][16]);
+    DECLARE_ALIGNED(16, short, UVzbin[QINDEX_RANGE][16]);
+    DECLARE_ALIGNED(16, short, UVround[QINDEX_RANGE][16]);
  
      DECLARE_ALIGNED(16, short, zrun_zbin_boost_y1[QINDEX_RANGE][16]);
      DECLARE_ALIGNED(16, short, zrun_zbin_boost_y2[QINDEX_RANGE][16]);
@@ -273,6 +273,7 @@ typedef struct
  
      int last_alt_ref_sei;
      int is_src_frame_alt_ref;
+    int is_next_src_alt_ref;
  
      int gold_is_last; // golden frame same as last frame ( short circuit gold searches)
      int alt_is_last;  // Alt reference frame same as last ( short circuit altref search)
@@ -460,14 +461,14 @@ typedef struct
  
      int target_bandwidth;
      long long bits_left;
-    FIRSTPASS_STATS total_stats;
-    FIRSTPASS_STATS this_frame_stats;
+    FIRSTPASS_STATS *total_stats;
+    FIRSTPASS_STATS *this_frame_stats;
      FIRSTPASS_STATS *stats_in, *stats_in_end;
      struct vpx_codec_pkt_list  *output_pkt_list;
      int                          first_pass_done;
      unsigned char *fp_motion_map;
-    FILE *fp_motion_mapfile;
-    int fpmm_pos;
+
+    unsigned char *fp_motion_map_stats, *fp_motion_map_stats_save;
  
  #if 0
      // Experimental code for lagged and one pass
diff --git a/vp8/encoder/picklpf.c b/vp8/encoder/picklpf.c

index 79e07dbc0e8e0a383cf1d0c5ec741bdc343d5006..09e8b5412b5895fd3050b2c0941f5047037578ec 100644 (file)
--- a/vp8/encoder/picklpf.c
+++ b/vp8/encoder/picklpf.c
@@ -16,6 +16,9 @@
  #include "vpx_scale/yv12extend.h"
  #include "vpx_scale/vpxscale.h"
  #include "alloccommon.h"
+#if ARCH_ARM
+#include "vpx_ports/arm.h"
+#endif
  
  extern void vp8_loop_filter_frame(VP8_COMMON *cm,    MACROBLOCKD *mbd,  int filt_val);
  extern void vp8_loop_filter_frame_yonly(VP8_COMMON *cm,    MACROBLOCKD *mbd,  int filt_val, int sharpness_lvl);
@@ -306,9 +309,20 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
  
      //  Make a copy of the unfiltered / processed recon buffer
  #if HAVE_ARMV7
-    vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(cm->frame_to_show, &cpi->last_frame_uf);
-#else
-    vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cpi->last_frame_uf);
+#if CONFIG_RUNTIME_CPU_DETECT
+    if (cm->rtcd.flags & HAS_NEON)
+#endif
+    {
+        vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(cm->frame_to_show, &cpi->last_frame_uf);
+    }
+#if CONFIG_RUNTIME_CPU_DETECT
+    else
+#endif
+#endif
+#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT
+    {
+        vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cpi->last_frame_uf);
+    }
  #endif
  
      if (cm->frame_type == KEY_FRAME)
@@ -343,9 +357,20 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
  
      //  Re-instate the unfiltered frame
  #if HAVE_ARMV7
-    vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show);
-#else
-    vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show);
+#if CONFIG_RUNTIME_CPU_DETECT
+    if (cm->rtcd.flags & HAS_NEON)
+#endif
+    {
+        vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show);
+    }
+#if CONFIG_RUNTIME_CPU_DETECT
+    else
+#endif
+#endif
+#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT
+    {
+        vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show);
+    }
  #endif
  
      while (filter_step > 0)
@@ -372,9 +397,20 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
  
              //  Re-instate the unfiltered frame
  #if HAVE_ARMV7
-            vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show);
-#else
-            vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show);
+#if CONFIG_RUNTIME_CPU_DETECT
+            if (cm->rtcd.flags & HAS_NEON)
+#endif
+            {
+                vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show);
+            }
+#if CONFIG_RUNTIME_CPU_DETECT
+            else
+#endif
+#endif
+#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT
+            {
+                vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show);
+            }
  #endif
  
              // If value is close to the best so far then bias towards a lower loop filter value.
@@ -401,9 +437,20 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
  
              //  Re-instate the unfiltered frame
  #if HAVE_ARMV7
-            vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show);
-#else
-            vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show);
+#if CONFIG_RUNTIME_CPU_DETECT
+            if (cm->rtcd.flags & HAS_NEON)
+#endif
+            {
+                vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show);
+            }
+#if CONFIG_RUNTIME_CPU_DETECT
+            else
+#endif
+#endif
+#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT
+            {
+                vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show);
+            }
  #endif
  
              // Was it better than the previous best?
diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c

index 6cc224494db848be96c65873c1f293c104b25a2f..5e65fadb3fef292553f2eed54ab5fa0753b6b302 100644 (file)
--- a/vp8/encoder/quantize.c
+++ b/vp8/encoder/quantize.c
@@ -23,14 +23,14 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
      int i, rc, eob;
      int zbin;
      int x, y, z, sz;
-    short *coeff_ptr  = &b->coeff[0];
-    short *zbin_ptr   = &b->zbin[0][0];
-    short *round_ptr  = &b->round[0][0];
-    short *quant_ptr  = &b->quant[0][0];
-    short *quant_shift_ptr = &b->quant_shift[0][0];
-    short *qcoeff_ptr = d->qcoeff;
-    short *dqcoeff_ptr = d->dqcoeff;
-    short *dequant_ptr = &d->dequant[0][0];
+    short *coeff_ptr       = b->coeff;
+    short *zbin_ptr        = b->zbin;
+    short *round_ptr       = b->round;
+    short *quant_ptr       = b->quant;
+    short *quant_shift_ptr = b->quant_shift;
+    short *qcoeff_ptr      = d->qcoeff;
+    short *dqcoeff_ptr     = d->dqcoeff;
+    short *dequant_ptr     = d->dequant;
  
      vpx_memset(qcoeff_ptr, 0, 32);
      vpx_memset(dqcoeff_ptr, 0, 32);
@@ -69,16 +69,16 @@ void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)
      int i, rc, eob;
      int zbin;
      int x, y, z, sz;
-    short *zbin_boost_ptr = &b->zrun_zbin_boost[0];
-    short *coeff_ptr  = &b->coeff[0];
-    short *zbin_ptr   = &b->zbin[0][0];
-    short *round_ptr  = &b->round[0][0];
-    short *quant_ptr  = &b->quant[0][0];
-    short *quant_shift_ptr = &b->quant_shift[0][0];
-    short *qcoeff_ptr = d->qcoeff;
-    short *dqcoeff_ptr = d->dqcoeff;
-    short *dequant_ptr = &d->dequant[0][0];
-    short zbin_oq_value = b->zbin_extra;
+    short *zbin_boost_ptr  = b->zrun_zbin_boost;
+    short *coeff_ptr       = b->coeff;
+    short *zbin_ptr        = b->zbin;
+    short *round_ptr       = b->round;
+    short *quant_ptr       = b->quant;
+    short *quant_shift_ptr = b->quant_shift;
+    short *qcoeff_ptr      = d->qcoeff;
+    short *dqcoeff_ptr     = d->dqcoeff;
+    short *dequant_ptr     = d->dequant;
+    short zbin_oq_value    = b->zbin_extra;
  
      vpx_memset(qcoeff_ptr, 0, 32);
      vpx_memset(dqcoeff_ptr, 0, 32);
@@ -136,12 +136,12 @@ void vp8_strict_quantize_b(BLOCK *b, BLOCKD *d)
      short *dqcoeff_ptr;
      short *dequant_ptr;
  
-    coeff_ptr = &b->coeff[0];
-    quant_ptr = &b->quant[0][0];
-    quant_shift_ptr = &b->quant_shift[0][0];
-    qcoeff_ptr = d->qcoeff;
-    dqcoeff_ptr = d->dqcoeff;
-    dequant_ptr = &d->dequant[0][0];
+    coeff_ptr       = b->coeff;
+    quant_ptr       = b->quant;
+    quant_shift_ptr = b->quant_shift;
+    qcoeff_ptr      = d->qcoeff;
+    dqcoeff_ptr     = d->dqcoeff;
+    dequant_ptr     = d->dequant;
      eob = - 1;
      vpx_memset(qcoeff_ptr, 0, 32);
      vpx_memset(dqcoeff_ptr, 0, 32);
@@ -183,12 +183,12 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
      int i, rc, eob;
      int zbin;
      int x, y, z, sz;
-    short *coeff_ptr  = &b->coeff[0];
-    short *round_ptr  = &b->round[0][0];
-    short *quant_ptr  = &b->quant[0][0];
-    short *qcoeff_ptr = d->qcoeff;
+    short *coeff_ptr   = b->coeff;
+    short *round_ptr   = b->round;
+    short *quant_ptr   = b->quant;
+    short *qcoeff_ptr  = d->qcoeff;
      short *dqcoeff_ptr = d->dqcoeff;
-    short *dequant_ptr = &d->dequant[0][0];
+    short *dequant_ptr = d->dequant;
  
      eob = -1;
      for (i = 0; i < 16; i++)
@@ -217,15 +217,15 @@ void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)
      int i, rc, eob;
      int zbin;
      int x, y, z, sz;
-    short *zbin_boost_ptr = &b->zrun_zbin_boost[0];
-    short *coeff_ptr  = &b->coeff[0];
-    short *zbin_ptr   = &b->zbin[0][0];
-    short *round_ptr  = &b->round[0][0];
-    short *quant_ptr  = &b->quant[0][0];
-    short *qcoeff_ptr = d->qcoeff;
-    short *dqcoeff_ptr = d->dqcoeff;
-    short *dequant_ptr = &d->dequant[0][0];
-    short zbin_oq_value = b->zbin_extra;
+    short *zbin_boost_ptr = b->zrun_zbin_boost;
+    short *coeff_ptr      = b->coeff;
+    short *zbin_ptr       = b->zbin;
+    short *round_ptr      = b->round;
+    short *quant_ptr      = b->quant;
+    short *qcoeff_ptr     = d->qcoeff;
+    short *dqcoeff_ptr    = d->dqcoeff;
+    short *dequant_ptr    = d->dequant;
+    short zbin_oq_value   = b->zbin_extra;
  
      vpx_memset(qcoeff_ptr, 0, 32);
      vpx_memset(dqcoeff_ptr, 0, 32);
diff --git a/vp8/encoder/temporal_filter.c b/vp8/encoder/temporal_filter.c

new file mode 100644 (file)

index 0000000..630afdb
--- /dev/null
+++ b/vp8/encoder/temporal_filter.c
@@ -0,0 +1,651 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "onyxc_int.h"
+#include "onyx_int.h"
+#include "systemdependent.h"
+#include "quantize.h"
+#include "alloccommon.h"
+#include "mcomp.h"
+#include "firstpass.h"
+#include "psnr.h"
+#include "vpx_scale/vpxscale.h"
+#include "extend.h"
+#include "ratectrl.h"
+#include "quant_common.h"
+#include "segmentation.h"
+#include "g_common.h"
+#include "vpx_scale/yv12extend.h"
+#include "postproc.h"
+#include "vpx_mem/vpx_mem.h"
+#include "swapyv12buffer.h"
+#include "threading.h"
+#include "vpx_ports/vpx_timer.h"
+#include "vpxerrors.h"
+
+#include <math.h>
+#include <limits.h>
+
+#define ALT_REF_MC_ENABLED 1    // dis/enable MC in AltRef filtering
+#define ALT_REF_SUBPEL_ENABLED 1 // dis/enable subpel in MC AltRef filtering
+
+#define USE_FILTER_LUT 1
+#if VP8_TEMPORAL_ALT_REF
+
+#if USE_FILTER_LUT
+static int modifier_lut[7][19] =
+{
+    // Strength=0
+    {16, 13, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    // Strength=1
+    {16, 15, 10, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    // Strength=2
+    {16, 15, 13, 9, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    // Strength=3
+    {16, 16, 15, 13, 10, 7, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    // Strength=4
+    {16, 16, 15, 14, 13, 11, 9, 7, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    // Strength=5
+    {16, 16, 16, 15, 15, 14, 13, 11, 10, 8, 7, 5, 3, 0, 0, 0, 0, 0, 0},
+    // Strength=6
+    {16, 16, 16, 16, 15, 15, 14, 14, 13, 12, 11, 10, 9, 8, 7, 5, 4, 2, 1}
+};
+#endif
+static void build_predictors_mb
+(
+    MACROBLOCKD *x,
+    unsigned char *y_mb_ptr,
+    unsigned char *u_mb_ptr,
+    unsigned char *v_mb_ptr,
+    int stride,
+    int mv_row,
+    int mv_col,
+    unsigned char *pred
+)
+{
+    int offset;
+    unsigned char *yptr, *uptr, *vptr;
+
+    // Y
+    yptr = y_mb_ptr + (mv_row >> 3) * stride + (mv_col >> 3);
+
+    if ((mv_row | mv_col) & 7)
+    {
+//        vp8_sixtap_predict16x16_c(yptr, stride,
+//                                    mv_col & 7, mv_row & 7, &pred[0], 16);
+        x->subpixel_predict16x16(yptr, stride,
+                                    mv_col & 7, mv_row & 7, &pred[0], 16);
+    }
+    else
+    {
+        //vp8_copy_mem16x16_c (yptr, stride, &pred[0], 16);
+        RECON_INVOKE(&x->rtcd->recon, copy16x16)(yptr, stride, &pred[0], 16);
+    }
+
+    // U & V
+    mv_row >>= 1;
+    mv_col >>= 1;
+    stride >>= 1;
+    offset = (mv_row >> 3) * stride + (mv_col >> 3);
+    uptr = u_mb_ptr + offset;
+    vptr = v_mb_ptr + offset;
+
+    if ((mv_row | mv_col) & 7)
+    {
+        x->subpixel_predict8x8(uptr, stride,
+                            mv_col & 7, mv_row & 7, &pred[256], 8);
+        x->subpixel_predict8x8(vptr, stride,
+                            mv_col & 7, mv_row & 7, &pred[320], 8);
+    }
+    else
+    {
+        RECON_INVOKE(&x->rtcd->recon, copy8x8)(uptr, stride, &pred[256], 8);
+        RECON_INVOKE(&x->rtcd->recon, copy8x8)(vptr, stride, &pred[320], 8);
+    }
+}
+static void apply_temporal_filter
+(
+    unsigned char *frame1,
+    unsigned int stride,
+    unsigned char *frame2,
+    unsigned int block_size,
+    int strength,
+    int filter_weight,
+    unsigned int *accumulator,
+    unsigned int *count
+)
+{
+    int i, j, k;
+    int modifier;
+    int byte = 0;
+
+#if USE_FILTER_LUT
+    int *lut = modifier_lut[strength];
+#endif
+
+    for (i = 0,k = 0; i < block_size; i++)
+    {
+        for (j = 0; j < block_size; j++, k++)
+        {
+
+            int src_byte = frame1[byte];
+            int pixel_value = *frame2++;
+
+#if USE_FILTER_LUT
+            // LUT implementation --
+            // improves precision of filter
+            modifier = abs(src_byte-pixel_value);
+            modifier = modifier>18 ? 0 : lut[modifier];
+#else
+            modifier   = src_byte;
+            modifier  -= pixel_value;
+            modifier  *= modifier;
+            modifier >>= strength;
+            modifier  *= 3;
+
+            if (modifier > 16)
+                modifier = 16;
+
+            modifier = 16 - modifier;
+#endif
+            modifier *= filter_weight;
+
+            count[k] += modifier;
+            accumulator[k] += modifier * pixel_value;
+
+            byte++;
+        }
+
+        byte += stride - block_size;
+    }
+}
+
+#if ALT_REF_MC_ENABLED
+static int dummy_cost[2*mv_max+1];
+
+static int find_matching_mb
+(
+    VP8_COMP *cpi,
+    YV12_BUFFER_CONFIG *arf_frame,
+    YV12_BUFFER_CONFIG *frame_ptr,
+    int mb_offset,
+    int error_thresh
+)
+{
+    MACROBLOCK *x = &cpi->mb;
+    int thissme;
+    int step_param;
+    int further_steps;
+    int n = 0;
+    int sadpb = x->sadperbit16;
+    int bestsme = INT_MAX;
+    int num00 = 0;
+
+    BLOCK *b = &x->block[0];
+    BLOCKD *d = &x->e_mbd.block[0];
+    MV best_ref_mv1 = {0,0};
+
+    int *mvcost[2]    = { &dummy_cost[mv_max+1], &dummy_cost[mv_max+1] };
+    int *mvsadcost[2] = { &dummy_cost[mv_max+1], &dummy_cost[mv_max+1] };
+
+    // Save input state
+    unsigned char **base_src = b->base_src;
+    int src = b->src;
+    int src_stride = b->src_stride;
+    unsigned char **base_pre = d->base_pre;
+    int pre = d->pre;
+    int pre_stride = d->pre_stride;
+
+    // Setup frame pointers
+    b->base_src = &arf_frame->y_buffer;
+    b->src_stride = arf_frame->y_stride;
+    b->src = mb_offset;
+
+    d->base_pre = &frame_ptr->y_buffer;
+    d->pre_stride = frame_ptr->y_stride;
+    d->pre = mb_offset;
+
+    // Further step/diamond searches as necessary
+    if (cpi->Speed < 8)
+    {
+        step_param = cpi->sf.first_step +
+                    ((cpi->Speed > 5) ? 1 : 0);
+        further_steps =
+            (cpi->sf.max_step_search_steps - 1)-step_param;
+    }
+    else
+    {
+        step_param = cpi->sf.first_step + 2;
+        further_steps = 0;
+    }
+
+    if (1/*cpi->sf.search_method == HEX*/)
+    {
+        // TODO Check that the 16x16 vf & sdf are selected here
+        bestsme = vp8_hex_search(x, b, d,
+            &best_ref_mv1, &d->bmi.mv.as_mv,
+            step_param,
+            sadpb/*x->errorperbit*/,
+            &num00, cpi->fn_ptr.vf, cpi->fn_ptr.sdf,
+            mvsadcost, mvcost);
+    }
+    else
+    {
+        int mv_x, mv_y;
+
+        bestsme = cpi->diamond_search_sad(x, b, d,
+            &best_ref_mv1, &d->bmi.mv.as_mv,
+            step_param,
+            sadpb / 2/*x->errorperbit*/,
+            &num00, &cpi->fn_ptr,
+            mvsadcost, mvcost); //sadpb < 9
+
+        // Further step/diamond searches as necessary
+        n = 0;
+        //further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
+
+        n = num00;
+        num00 = 0;
+
+        while (n < further_steps)
+        {
+            n++;
+
+            if (num00)
+                num00--;
+            else
+            {
+                thissme = cpi->diamond_search_sad(x, b, d,
+                    &best_ref_mv1, &d->bmi.mv.as_mv,
+                    step_param + n,
+                    sadpb / 4/*x->errorperbit*/,
+                    &num00, &cpi->fn_ptr,
+                    mvsadcost, mvcost); //sadpb = 9
+
+                if (thissme < bestsme)
+                {
+                    bestsme = thissme;
+                    mv_y = d->bmi.mv.as_mv.row;
+                    mv_x = d->bmi.mv.as_mv.col;
+                }
+                else
+                {
+                    d->bmi.mv.as_mv.row = mv_y;
+                    d->bmi.mv.as_mv.col = mv_x;
+                }
+            }
+        }
+    }
+
+#if ALT_REF_SUBPEL_ENABLED
+    // Try sub-pixel MC?
+    //if (bestsme > error_thresh && bestsme < INT_MAX)
+    {
+        bestsme = cpi->find_fractional_mv_step(x, b, d,
+                    &d->bmi.mv.as_mv, &best_ref_mv1,
+                    x->errorperbit, cpi->fn_ptr.svf,
+                    cpi->fn_ptr.vf, cpi->mb.mvcost);
+    }
+#endif
+
+    // Save input state
+    b->base_src = base_src;
+    b->src = src;
+    b->src_stride = src_stride;
+    d->base_pre = base_pre;
+    d->pre = pre;
+    d->pre_stride = pre_stride;
+
+    return bestsme;
+}
+#endif
+
+static void vp8cx_temp_blur1_c
+(
+    VP8_COMP *cpi,
+    int frame_count,
+    int alt_ref_index,
+    int strength
+)
+{
+    int byte;
+    int frame;
+    int mb_col, mb_row;
+    unsigned int filter_weight[MAX_LAG_BUFFERS];
+    unsigned char *mm_ptr = cpi->fp_motion_map;
+    int cols = cpi->common.mb_cols;
+    int rows = cpi->common.mb_rows;
+    int MBs  = cpi->common.MBs;
+    int mb_y_offset = 0;
+    int mb_uv_offset = 0;
+    unsigned int accumulator[384];
+    unsigned int count[384];
+    MACROBLOCKD *mbd = &cpi->mb.e_mbd;
+    YV12_BUFFER_CONFIG *f = cpi->frames[alt_ref_index];
+    unsigned char *dst1, *dst2;
+    DECLARE_ALIGNED(16, unsigned char,  predictor[384]);
+
+    // Save input state
+    unsigned char *y_buffer = mbd->pre.y_buffer;
+    unsigned char *u_buffer = mbd->pre.u_buffer;
+    unsigned char *v_buffer = mbd->pre.v_buffer;
+
+    if (!cpi->use_weighted_temporal_filter)
+    {
+        // Temporal filtering is unweighted
+        for (frame = 0; frame < frame_count; frame++)
+            filter_weight[frame] = 1;
+    }
+
+    for (mb_row = 0; mb_row < rows; mb_row++)
+    {
+#if ALT_REF_MC_ENABLED
+        // Reduced search extent by 3 for 6-tap filter & smaller UMV border
+        cpi->mb.mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 19));
+        cpi->mb.mv_row_max = ((cpi->common.mb_rows - 1 - mb_row) * 16)
+                                + (VP8BORDERINPIXELS - 19);
+#endif
+
+        for (mb_col = 0; mb_col < cols; mb_col++)
+        {
+            int i, j, k, w;
+            int weight_cap;
+            int stride;
+
+            vpx_memset(accumulator, 0, 384*sizeof(unsigned int));
+            vpx_memset(count, 0, 384*sizeof(unsigned int));
+
+#if ALT_REF_MC_ENABLED
+            // Reduced search extent by 3 for 6-tap filter & smaller UMV border
+            cpi->mb.mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 19));
+            cpi->mb.mv_col_max = ((cpi->common.mb_cols - 1 - mb_col) * 16)
+                                    + (VP8BORDERINPIXELS - 19);
+#endif
+
+            // Read & process macroblock weights from motion map
+            if (cpi->use_weighted_temporal_filter)
+            {
+                weight_cap = 2;
+
+                for (frame = alt_ref_index-1; frame >= 0; frame--)
+                {
+                    w = *(mm_ptr + (frame+1)*MBs);
+                    filter_weight[frame] = w < weight_cap ? w : weight_cap;
+                    weight_cap = w;
+                }
+
+                filter_weight[alt_ref_index] = 2;
+
+                weight_cap = 2;
+
+                for (frame = alt_ref_index+1; frame < frame_count; frame++)
+                {
+                    w = *(mm_ptr + frame*MBs);
+                    filter_weight[frame] = w < weight_cap ? w : weight_cap;
+                    weight_cap = w;
+                }
+
+            }
+
+            for (frame = 0; frame < frame_count; frame++)
+            {
+                int err;
+
+                if (cpi->frames[frame] == NULL)
+                    continue;
+
+                mbd->block[0].bmi.mv.as_mv.row = 0;
+                mbd->block[0].bmi.mv.as_mv.col = 0;
+
+#if ALT_REF_MC_ENABLED
+                //if (filter_weight[frame] == 0)
+                {
+#define THRESH_LOW   10000
+#define THRESH_HIGH  20000
+
+                    // Correlation has been lost try MC
+                    err = find_matching_mb ( cpi,
+                                             cpi->frames[alt_ref_index],
+                                             cpi->frames[frame],
+                                             mb_y_offset,
+                                             THRESH_LOW );
+
+                    if (filter_weight[frame] < 2)
+                    {
+                        // Set weight depending on error
+                        filter_weight[frame] = err<THRESH_LOW
+                                                ? 2 : err<THRESH_HIGH ? 1 : 0;
+                    }
+                }
+#endif
+                if (filter_weight[frame] != 0)
+                {
+                    // Construct the predictors
+                    build_predictors_mb (
+                              mbd,
+                              cpi->frames[frame]->y_buffer + mb_y_offset,
+                              cpi->frames[frame]->u_buffer + mb_uv_offset,
+                              cpi->frames[frame]->v_buffer + mb_uv_offset,
+                              cpi->frames[frame]->y_stride,
+                              mbd->block[0].bmi.mv.as_mv.row,
+                              mbd->block[0].bmi.mv.as_mv.col,
+                              predictor );
+
+                    // Apply the filter (YUV)
+                    apply_temporal_filter ( f->y_buffer + mb_y_offset,
+                                            f->y_stride,
+                                            predictor,
+                                            16,
+                                            strength,
+                                            filter_weight[frame],
+                                            accumulator,
+                                            count );
+
+                    apply_temporal_filter ( f->u_buffer + mb_uv_offset,
+                                            f->uv_stride,
+                                            predictor + 256,
+                                            8,
+                                            strength,
+                                            filter_weight[frame],
+                                            accumulator + 256,
+                                            count + 256 );
+
+                    apply_temporal_filter ( f->v_buffer + mb_uv_offset,
+                                            f->uv_stride,
+                                            predictor + 320,
+                                            8,
+                                            strength,
+                                            filter_weight[frame],
+                                            accumulator + 320,
+                                            count + 320 );
+                }
+            }
+
+            // Normalize filter output to produce AltRef frame
+            dst1 = cpi->alt_ref_buffer.source_buffer.y_buffer;
+            stride = cpi->alt_ref_buffer.source_buffer.y_stride;
+            byte = mb_y_offset;
+            for (i = 0,k = 0; i < 16; i++)
+            {
+                for (j = 0; j < 16; j++, k++)
+                {
+                    unsigned int pval = accumulator[k] + (count[k] >> 1);
+                    pval *= cpi->fixed_divide[count[k]];
+                    pval >>= 19;
+
+                    dst1[byte] = (unsigned char)pval;
+
+                    // move to next pixel
+                    byte++;
+                }
+
+                byte += stride - 16;
+            }
+
+            dst1 = cpi->alt_ref_buffer.source_buffer.u_buffer;
+            dst2 = cpi->alt_ref_buffer.source_buffer.v_buffer;
+            stride = cpi->alt_ref_buffer.source_buffer.uv_stride;
+            byte = mb_uv_offset;
+            for (i = 0,k = 256; i < 8; i++)
+            {
+                for (j = 0; j < 8; j++, k++)
+                {
+                    int m=k+64;
+
+                    // U
+                    unsigned int pval = accumulator[k] + (count[k] >> 1);
+                    pval *= cpi->fixed_divide[count[k]];
+                    pval >>= 19;
+                    dst1[byte] = (unsigned char)pval;
+
+                    // V
+                    pval = accumulator[m] + (count[m] >> 1);
+                    pval *= cpi->fixed_divide[count[m]];
+                    pval >>= 19;
+                    dst2[byte] = (unsigned char)pval;
+
+                    // move to next pixel
+                    byte++;
+                }
+
+                byte += stride - 8;
+            }
+
+            mm_ptr++;
+            mb_y_offset += 16;
+            mb_uv_offset += 8;
+        }
+
+        mb_y_offset += 16*f->y_stride-f->y_width;
+        mb_uv_offset += 8*f->uv_stride-f->uv_width;
+    }
+
+    // Restore input state
+    mbd->pre.y_buffer = y_buffer;
+    mbd->pre.u_buffer = u_buffer;
+    mbd->pre.v_buffer = v_buffer;
+}
+
+void vp8cx_temp_filter_c
+(
+    VP8_COMP *cpi
+)
+{
+    int frame = 0;
+
+    int num_frames_backward = 0;
+    int num_frames_forward = 0;
+    int frames_to_blur_backward = 0;
+    int frames_to_blur_forward = 0;
+    int frames_to_blur = 0;
+    int start_frame = 0;
+    unsigned int filtered = 0;
+
+    int strength = cpi->oxcf.arnr_strength;
+
+    int blur_type = cpi->oxcf.arnr_type;
+
+    int max_frames = cpi->active_arnr_frames;
+
+    num_frames_backward = cpi->last_alt_ref_sei - cpi->source_encode_index;
+
+    if (num_frames_backward < 0)
+        num_frames_backward += cpi->oxcf.lag_in_frames;
+
+    num_frames_forward = cpi->oxcf.lag_in_frames - (num_frames_backward + 1);
+
+    switch (blur_type)
+    {
+    case 1:
+        /////////////////////////////////////////
+        // Backward Blur
+
+        frames_to_blur_backward = num_frames_backward;
+
+        if (frames_to_blur_backward >= max_frames)
+            frames_to_blur_backward = max_frames - 1;
+
+        frames_to_blur = frames_to_blur_backward + 1;
+        break;
+
+    case 2:
+        /////////////////////////////////////////
+        // Forward Blur
+
+        frames_to_blur_forward = num_frames_forward;
+
+        if (frames_to_blur_forward >= max_frames)
+            frames_to_blur_forward = max_frames - 1;
+
+        frames_to_blur = frames_to_blur_forward + 1;
+        break;
+
+    case 3:
+    default:
+        /////////////////////////////////////////
+        // Center Blur
+        frames_to_blur_forward = num_frames_forward;
+        frames_to_blur_backward = num_frames_backward;
+
+        if (frames_to_blur_forward > frames_to_blur_backward)
+            frames_to_blur_forward = frames_to_blur_backward;
+
+        if (frames_to_blur_backward > frames_to_blur_forward)
+            frames_to_blur_backward = frames_to_blur_forward;
+
+        // When max_frames is even we have 1 more frame backward than forward
+        if (frames_to_blur_forward > (max_frames - 1) / 2)
+            frames_to_blur_forward = ((max_frames - 1) / 2);
+
+        if (frames_to_blur_backward > (max_frames / 2))
+            frames_to_blur_backward = (max_frames / 2);
+
+        frames_to_blur = frames_to_blur_backward + frames_to_blur_forward + 1;
+        break;
+    }
+
+    start_frame = (cpi->last_alt_ref_sei
+                    + frames_to_blur_forward) % cpi->oxcf.lag_in_frames;
+
+#ifdef DEBUGFWG
+    // DEBUG FWG
+    printf("max:%d FBCK:%d FFWD:%d ftb:%d ftbbck:%d ftbfwd:%d sei:%d lasei:%d start:%d"
+           , max_frames
+           , num_frames_backward
+           , num_frames_forward
+           , frames_to_blur
+           , frames_to_blur_backward
+           , frames_to_blur_forward
+           , cpi->source_encode_index
+           , cpi->last_alt_ref_sei
+           , start_frame);
+#endif
+
+    // Setup frame pointers, NULL indicates frame not included in filter
+    vpx_memset(cpi->frames, 0, max_frames*sizeof(YV12_BUFFER_CONFIG *));
+    for (frame = 0; frame < frames_to_blur; frame++)
+    {
+        int which_buffer =  start_frame - frame;
+
+        if (which_buffer < 0)
+            which_buffer += cpi->oxcf.lag_in_frames;
+
+        cpi->frames[frames_to_blur-1-frame]
+                = &cpi->src_buffer[which_buffer].source_buffer;
+    }
+
+    vp8cx_temp_blur1_c (
+        cpi,
+        frames_to_blur,
+        frames_to_blur_backward,
+        strength );
+}
+#endif
diff --git a/vp8/encoder/temporal_filter.h b/vp8/encoder/temporal_filter.h

new file mode 100644 (file)

index 0000000..f70e8c0
--- /dev/null
+++ b/vp8/encoder/temporal_filter.h
@@ -0,0 +1,19 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef __INC_VP8_TEMPORAL_FILTER_H
+#define __INC_VP8_TEMPORAL_FILTER_H
+
+#include "onyx_int.h"
+
+void vp8cx_temp_filter_c(VP8_COMP *cpi);
+
+#endif // __INC_VP8_TEMPORAL_FILTER_H
diff --git a/vp8/encoder/x86/encodemb_x86.h b/vp8/encoder/x86/encodemb_x86.h

index d090b2d89369bbda78b9f4113295698fa86b0388..69b3edd6670ddd9ef85f1f370161a261705a3a57 100644 (file)
--- a/vp8/encoder/x86/encodemb_x86.h
+++ b/vp8/encoder/x86/encodemb_x86.h
@@ -55,7 +55,9 @@ extern prototype_submbuv(vp8_subtract_mbuv_mmx);
  extern prototype_berr(vp8_block_error_xmm);
  extern prototype_mberr(vp8_mbblock_error_xmm);
  extern prototype_mbuverr(vp8_mbuverror_xmm);
-
+extern prototype_subb(vp8_subtract_b_sse2);
+extern prototype_submby(vp8_subtract_mby_sse2);
+extern prototype_submbuv(vp8_subtract_mbuv_sse2);
  
  #if !CONFIG_RUNTIME_CPU_DETECT
  #undef  vp8_encodemb_berr
@@ -67,6 +69,15 @@ extern prototype_mbuverr(vp8_mbuverror_xmm);
  #undef  vp8_encodemb_mbuverr
  #define vp8_encodemb_mbuverr vp8_mbuverror_xmm
  
+#undef  vp8_encodemb_subb
+#define vp8_encodemb_subb vp8_subtract_b_sse2
+
+#undef  vp8_encodemb_submby
+#define vp8_encodemb_submby vp8_subtract_mby_sse2
+
+#undef  vp8_encodemb_submbuv
+#define vp8_encodemb_submbuv vp8_subtract_mbuv_sse2
+
  #endif
  #endif
  
diff --git a/vp8/encoder/x86/fwalsh_sse2.asm b/vp8/encoder/x86/fwalsh_sse2.asm

index 38812c8d11f3e56818f735684effbf23ab42964f..ffc9b3dcae66ee31438a7f6b97c2599ce3d6ebea 100644 (file)
--- a/vp8/encoder/x86/fwalsh_sse2.asm
+++ b/vp8/encoder/x86/fwalsh_sse2.asm
@@ -21,94 +21,122 @@ sym(vp8_short_walsh4x4_sse2):
      push        rdi
      ; end prolog
  
-    mov     rsi, arg(0)
-    mov     rdi, arg(1)
-
-    movdqu    xmm4, [rsi + 0]       ;ip[4] ip[0]
-    movdqu    xmm0, [rsi + 16]      ;ip[12] ip[8]
-
-    pxor  xmm7, xmm7
-    ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    ; 13 12 11 10 03 02 01 00
-    ;
-    ; 33 32 31 30 23 22 21 20
-    ;
-    movdqa    xmm3, xmm4          ; 13 12 11 10 03 02 01 00
-    punpcklwd xmm4, xmm0          ; 23 03 22 02 21 01 20 00
-    punpckhwd xmm3, xmm0          ; 33 13 32 12 31 11 30 10
-    movdqa    xmm1, xmm4          ; 23 03 22 02 21 01 20 00
-    punpcklwd xmm4, xmm3          ; 31 21 11 01 30 20 10 00
-    punpckhwd xmm1, xmm3          ; 33 23 13 03 32 22 12 02
-    ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    pshufd    xmm2, xmm1, 4eh       ;ip[8] ip[12]
-    movdqa    xmm3, xmm4          ;ip[4] ip[0]
-
-    paddw   xmm4, xmm2          ;ip[4]+ip[8] ip[0]+ip[12] aka b1 a1
-    psubw   xmm3, xmm2          ;ip[4]-ip[8] ip[0]-ip[12] aka c1 d1
-
+    mov     rsi, arg(0)           ; input
+    mov     rdi, arg(1)           ; output
+    movsxd  rdx, dword ptr arg(2) ; pitch
+
+    ; first for loop
+    movq    xmm0, MMWORD PTR [rsi]           ; load input
+    movq    xmm1, MMWORD PTR [rsi + rdx]
+    lea     rsi,  [rsi + rdx*2]
+    movq    xmm2, MMWORD PTR [rsi]
+    movq    xmm3, MMWORD PTR [rsi + rdx]
+
+    punpcklwd xmm0,  xmm1
+    punpcklwd xmm2,  xmm3
+
+    movdqa    xmm1, xmm0
+    punpckldq xmm0, xmm2           ; ip[1] ip[0]
+    punpckhdq xmm1, xmm2           ; ip[3] ip[2]
+
+    movdqa    xmm2, xmm0
+    paddw     xmm0, xmm1
+    psubw     xmm2, xmm1
+
+    psllw     xmm0, 2              ; d1  a1
+    psllw     xmm2, 2              ; c1  b1
+
+    movdqa    xmm1, xmm0
+    punpcklqdq xmm0, xmm2          ; b1  a1
+    punpckhqdq xmm1, xmm2          ; c1  d1
+
+    pxor      xmm6, xmm6
+    movq      xmm6, xmm0
+    pxor      xmm7, xmm7
+    pcmpeqw   xmm7, xmm6
+    paddw     xmm7, [GLOBAL(c1)]
+
+    movdqa    xmm2, xmm0
+    paddw     xmm0, xmm1           ; b1+c1  a1+d1
+    psubw     xmm2, xmm1           ; b1-c1  a1-d1
+    paddw     xmm0, xmm7           ; b1+c1  a1+d1+(a1!=0)
+
+    ; second for loop
+    ; input: 13  9  5  1 12  8  4  0 (xmm0)
+    ;        14 10  6  2 15 11  7  3 (xmm2)
+    ; after shuffle:
+    ;        13  5  9  1 12  4  8  0 (xmm0)
+    ;        14  6 10  2 15  7 11  3 (xmm1)
+    pshuflw   xmm3, xmm0, 0xd8
+    pshufhw   xmm0, xmm3, 0xd8
+    pshuflw   xmm3, xmm2, 0xd8
+    pshufhw   xmm1, xmm3, 0xd8
+
+    movdqa    xmm2, xmm0
+    pmaddwd   xmm0, [GLOBAL(c1)]    ; d11 a11 d10 a10
+    pmaddwd   xmm2, [GLOBAL(cn1)]   ; c11 b11 c10 b10
+    movdqa    xmm3, xmm1
+    pmaddwd   xmm1, [GLOBAL(c1)]    ; d12 a12 d13 a13
+    pmaddwd   xmm3, [GLOBAL(cn1)]   ; c12 b12 c13 b13
+
+    pshufd    xmm4, xmm0, 0xd8      ; d11 d10 a11 a10
+    pshufd    xmm5, xmm2, 0xd8      ; c11 c10 b11 b10
+    pshufd    xmm6, xmm1, 0x72      ; d13 d12 a13 a12
+    pshufd    xmm7, xmm3, 0x72      ; c13 c12 b13 b12
+
+    movdqa    xmm0, xmm4
+    punpcklqdq xmm0, xmm5           ; b11 b10 a11 a10
+    punpckhqdq xmm4, xmm5           ; c11 c10 d11 d10
+    movdqa    xmm1, xmm6
+    punpcklqdq xmm1, xmm7           ; b13 b12 a13 a12
+    punpckhqdq xmm6, xmm7           ; c13 c12 d13 d12
+
+    movdqa    xmm2, xmm0
+    paddd     xmm0, xmm4            ; b21 b20 a21 a20
+    psubd     xmm2, xmm4            ; c21 c20 d21 d20
+    movdqa    xmm3, xmm1
+    paddd     xmm1, xmm6            ; b23 b22 a23 a22
+    psubd     xmm3, xmm6            ; c23 c22 d23 d22
+
+    pxor      xmm4, xmm4
      movdqa    xmm5, xmm4
-    punpcklqdq  xmm4, xmm3          ;d1 a1
-    punpckhqdq  xmm5, xmm3          ;c1 b1
-
-    movdqa    xmm1, xmm5          ;c1 b1
-    paddw   xmm5, xmm4          ;dl+cl a1+b1 aka op[4] op[0]
-    psubw   xmm4, xmm1          ;d1-c1 a1-b1 aka op[12] op[8]
-    ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    ; 13 12 11 10 03 02 01 00
-    ;
-    ; 33 32 31 30 23 22 21 20
-    ;
-    movdqa    xmm0, xmm5          ; 13 12 11 10 03 02 01 00
-    punpcklwd xmm5, xmm4          ; 23 03 22 02 21 01 20 00
-    punpckhwd xmm0, xmm4          ; 33 13 32 12 31 11 30 10
-    movdqa    xmm1, xmm5          ; 23 03 22 02 21 01 20 00
-    punpcklwd xmm5, xmm0          ; 31 21 11 01 30 20 10 00
-    punpckhwd xmm1, xmm0          ; 33 23 13 03 32 22 12 02
-    ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    pshufd    xmm2, xmm1, 4eh       ;ip[8] ip[12]
-    movdqa    xmm3, xmm5          ;ip[4] ip[0]
-
-    paddw   xmm5, xmm2          ;ip[4]+ip[8] ip[0]+ip[12] aka b1 a1
-    psubw   xmm3, xmm2          ;ip[4]-ip[8] ip[0]-ip[12] aka c1 d1
-
-    movdqa    xmm6, xmm5
-    punpcklqdq  xmm5, xmm3          ;d1 a1
-    punpckhqdq  xmm6, xmm3          ;c1 b1
-
-    movdqa    xmm1, xmm6          ;c1 b1
-    paddw   xmm6, xmm5          ;dl+cl a1+b1 aka op[4] op[0]
-    psubw   xmm5, xmm1          ;d1-c1 a1-b1 aka op[12] op[8]
-
-    movdqa    xmm0, xmm6          ;aka b2 a2
-    movdqa    xmm1, xmm5          ;aka d2 c2
-
-    pcmpgtw   xmm0, xmm7
-    pcmpgtw   xmm1, xmm7
-
-    psrlw   xmm0, 15
-    psrlw   xmm1, 15
-
-    paddw   xmm6, xmm0
-    paddw   xmm5, xmm1
-
-    psraw   xmm6, 1
-    psraw   xmm5, 1
-
-    ;   a2 = a1 + b1;
-    ;   b2 = c1 + d1;
-    ;   c2 = a1 - b1;
-    ;   d2 = d1 - c1;
-    ;        a2 += (a2>0);
-    ;        b2 += (b2>0);
-    ;        c2 += (c2>0);
-    ;        d2 += (d2>0);
-    ;   op[0] = (a2)>>1;
-    ;   op[4] = (b2)>>1;
-    ;   op[8] = (c2)>>1;
-    ;   op[12]= (d2)>>1;
-
-    movdqu  [rdi + 0], xmm6
-    movdqu  [rdi + 16], xmm5
+    pcmpgtd   xmm4, xmm0
+    pcmpgtd   xmm5, xmm2
+    pand      xmm4, [GLOBAL(cd1)]
+    pand      xmm5, [GLOBAL(cd1)]
+
+    pxor      xmm6, xmm6
+    movdqa    xmm7, xmm6
+    pcmpgtd   xmm6, xmm1
+    pcmpgtd   xmm7, xmm3
+    pand      xmm6, [GLOBAL(cd1)]
+    pand      xmm7, [GLOBAL(cd1)]
+
+    paddd     xmm0, xmm4
+    paddd     xmm2, xmm5
+    paddd     xmm0, [GLOBAL(cd3)]
+    paddd     xmm2, [GLOBAL(cd3)]
+    paddd     xmm1, xmm6
+    paddd     xmm3, xmm7
+    paddd     xmm1, [GLOBAL(cd3)]
+    paddd     xmm3, [GLOBAL(cd3)]
+
+    psrad     xmm0, 3
+    psrad     xmm1, 3
+    psrad     xmm2, 3
+    psrad     xmm3, 3
+    movdqa    xmm4, xmm0
+    punpcklqdq xmm0, xmm1           ; a23 a22 a21 a20
+    punpckhqdq xmm4, xmm1           ; b23 b22 b21 b20
+    movdqa    xmm5, xmm2
+    punpckhqdq xmm2, xmm3           ; c23 c22 c21 c20
+    punpcklqdq xmm5, xmm3           ; d23 d22 d21 d20
+
+    packssdw  xmm0, xmm4            ; b23 b22 b21 b20 a23 a22 a21 a20
+    packssdw  xmm2, xmm5            ; d23 d22 d21 d20 c23 c22 c21 c20
+
+    movdqa  XMMWORD PTR [rdi], xmm0
+    movdqa  XMMWORD PTR [rdi + 16], xmm2
  
      ; begin epilog
      pop rdi
@@ -116,3 +144,17 @@ sym(vp8_short_walsh4x4_sse2):
      UNSHADOW_ARGS
      pop         rbp
      ret
+
+SECTION_RODATA
+align 16
+c1:
+    dw 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001
+align 16
+cn1:
+    dw 0x0001, 0xffff, 0x0001, 0xffff, 0x0001, 0xffff, 0x0001, 0xffff
+align 16
+cd1:
+    dd 0x00000001, 0x00000001, 0x00000001, 0x00000001
+align 16
+cd3:
+    dd 0x00000003, 0x00000003, 0x00000003, 0x00000003
diff --git a/vp8/encoder/x86/quantize_sse2.asm b/vp8/encoder/x86/quantize_sse2.asm

index 3248813372ec2c5938ad99cd12d59674328869ef..1e0bd5c48e487e1b2ce9f8470c807a44e46b9493 100644 (file)
--- a/vp8/encoder/x86/quantize_sse2.asm
+++ b/vp8/encoder/x86/quantize_sse2.asm
@@ -258,8 +258,8 @@ rq_zigzag_1c:
  ;                           short *qcoeff_ptr,short *dequant_ptr,
  ;                           short *scan_mask, short *round_ptr,
  ;                           short *quant_ptr, short *dqcoeff_ptr);
-global sym(vp8_fast_quantize_b_impl_ssse2)
-sym(vp8_fast_quantize_b_impl_ssse2):
+global sym(vp8_fast_quantize_b_impl_sse2)
+sym(vp8_fast_quantize_b_impl_sse2):
      push        rbp
      mov         rbp, rsp
      SHADOW_ARGS_TO_STACK 7
diff --git a/vp8/encoder/x86/sad_mmx.asm b/vp8/encoder/x86/sad_mmx.asm

index 19041d49f2ea30e7619147e6ce90eab2f9e08b14..85cb023a48771c3165d924497d8e553a96f6186f 100644 (file)
--- a/vp8/encoder/x86/sad_mmx.asm
+++ b/vp8/encoder/x86/sad_mmx.asm
@@ -17,8 +17,6 @@ global sym(vp8_sad8x8_mmx)
  global sym(vp8_sad4x4_mmx)
  global sym(vp8_sad16x8_mmx)
  
-%idefine QWORD
-
  ;unsigned int vp8_sad16x16_mmx(
  ;    unsigned char *src_ptr,
  ;    int  src_stride,
@@ -272,11 +270,11 @@ sym(vp8_sad4x4_mmx):
          movsxd          rax,        dword ptr arg(1) ;src_stride
          movsxd          rdx,        dword ptr arg(3) ;ref_stride
  
-        movd            mm0,       QWORD PTR [rsi]
-        movd            mm1,       QWORD PTR [rdi]
+        movd            mm0,        DWORD PTR [rsi]
+        movd            mm1,        DWORD PTR [rdi]
  
-        movd            mm2,       QWORD PTR [rsi+rax]
-        movd            mm3,       QWORD PTR [rdi+rdx]
+        movd            mm2,        DWORD PTR [rsi+rax]
+        movd            mm3,        DWORD PTR [rdi+rdx]
  
          punpcklbw       mm0,        mm2
          punpcklbw       mm1,        mm3
@@ -298,11 +296,11 @@ sym(vp8_sad4x4_mmx):
          lea             rsi,        [rsi+rax*2]
          lea             rdi,        [rdi+rdx*2]
  
-        movd            mm4,       QWORD PTR [rsi]
-        movd            mm5,       QWORD PTR [rdi]
+        movd            mm4,        DWORD PTR [rsi]
+        movd            mm5,        DWORD PTR [rdi]
  
-        movd            mm6,       QWORD PTR [rsi+rax]
-        movd            mm7,       QWORD PTR [rdi+rdx]
+        movd            mm6,        DWORD PTR [rsi+rax]
+        movd            mm7,        DWORD PTR [rdi+rdx]
  
          punpcklbw       mm4,        mm6
          punpcklbw       mm5,        mm7
diff --git a/vp8/encoder/x86/sad_sse2.asm b/vp8/encoder/x86/sad_sse2.asm

index 0f6c5d9c4d313bbce7d412bbc0dc3c77d3a77718..39ed796049bc0b5c01dbdd0b99e72106f20081c3 100644 (file)
--- a/vp8/encoder/x86/sad_sse2.asm
+++ b/vp8/encoder/x86/sad_sse2.asm
@@ -11,8 +11,6 @@
  
  %include "vpx_ports/x86_abi_support.asm"
  
-%idefine QWORD
-
  ;unsigned int vp8_sad16x16_wmt(
  ;    unsigned char *src_ptr,
  ;    int  src_stride,
@@ -221,11 +219,11 @@ sym(vp8_sad4x4_wmt):
          movsxd          rax,        dword ptr arg(1) ;src_stride
          movsxd          rdx,        dword ptr arg(3) ;ref_stride
  
-        movd            mm0,       QWORD PTR [rsi]
-        movd            mm1,       QWORD PTR [rdi]
+        movd            mm0,        DWORD PTR [rsi]
+        movd            mm1,        DWORD PTR [rdi]
  
-        movd            mm2,       QWORD PTR [rsi+rax]
-        movd            mm3,       QWORD PTR [rdi+rdx]
+        movd            mm2,        DWORD PTR [rsi+rax]
+        movd            mm3,        DWORD PTR [rdi+rdx]
  
          punpcklbw       mm0,        mm2
          punpcklbw       mm1,        mm3
@@ -234,12 +232,12 @@ sym(vp8_sad4x4_wmt):
          lea             rsi,        [rsi+rax*2]
  
          lea             rdi,        [rdi+rdx*2]
-        movd            mm4,       QWORD PTR [rsi]
+        movd            mm4,        DWORD PTR [rsi]
  
-        movd            mm5,       QWORD PTR [rdi]
-        movd            mm6,       QWORD PTR [rsi+rax]
+        movd            mm5,        DWORD PTR [rdi]
+        movd            mm6,        DWORD PTR [rsi+rax]
  
-        movd            mm7,       QWORD PTR [rdi+rdx]
+        movd            mm7,        DWORD PTR [rdi+rdx]
          punpcklbw       mm4,        mm6
  
          punpcklbw       mm5,        mm7
diff --git a/vp8/encoder/x86/sad_sse3.asm b/vp8/encoder/x86/sad_sse3.asm

index b12c81562d2963a26a294df195a0d9b473e2d1a1..1b7293c20f3cf57fe3c8964f068647f39c098f04 100644 (file)
--- a/vp8/encoder/x86/sad_sse3.asm
+++ b/vp8/encoder/x86/sad_sse3.asm
@@ -11,23 +11,21 @@
  
  %include "vpx_ports/x86_abi_support.asm"
  
-%idefine QWORD
-
  %macro PROCESS_16X2X3 1
  %if %1
-        movdqa          xmm0,       [rsi]
-        lddqu           xmm5,       [rdi]
-        lddqu           xmm6,       [rdi+1]
-        lddqu           xmm7,       [rdi+2]
+        movdqa          xmm0,       XMMWORD PTR [rsi]
+        lddqu           xmm5,       XMMWORD PTR [rdi]
+        lddqu           xmm6,       XMMWORD PTR [rdi+1]
+        lddqu           xmm7,       XMMWORD PTR [rdi+2]
  
          psadbw          xmm5,       xmm0
          psadbw          xmm6,       xmm0
          psadbw          xmm7,       xmm0
  %else
-        movdqa          xmm0,       [rsi]
-        lddqu           xmm1,       [rdi]
-        lddqu           xmm2,       [rdi+1]
-        lddqu           xmm3,       [rdi+2]
+        movdqa          xmm0,       XMMWORD PTR [rsi]
+        lddqu           xmm1,       XMMWORD PTR [rdi]
+        lddqu           xmm2,       XMMWORD PTR [rdi+1]
+        lddqu           xmm3,       XMMWORD PTR [rdi+2]
  
          psadbw          xmm1,       xmm0
          psadbw          xmm2,       xmm0
@@ -37,10 +35,10 @@
          paddw           xmm6,       xmm2
          paddw           xmm7,       xmm3
  %endif
-        movdqa          xmm0,       QWORD PTR [rsi+rax]
-        lddqu           xmm1,       QWORD PTR [rdi+rdx]
-        lddqu           xmm2,       QWORD PTR [rdi+rdx+1]
-        lddqu           xmm3,       QWORD PTR [rdi+rdx+2]
+        movdqa          xmm0,       XMMWORD PTR [rsi+rax]
+        lddqu           xmm1,       XMMWORD PTR [rdi+rdx]
+        lddqu           xmm2,       XMMWORD PTR [rdi+rdx+1]
+        lddqu           xmm3,       XMMWORD PTR [rdi+rdx+2]
  
          lea             rsi,        [rsi+rax*2]
          lea             rdi,        [rdi+rdx*2]
@@ -56,19 +54,19 @@
  
  %macro PROCESS_8X2X3 1
  %if %1
-        movq            mm0,       [rsi]
-        movq            mm5,       [rdi]
-        movq            mm6,       [rdi+1]
-        movq            mm7,       [rdi+2]
+        movq            mm0,       QWORD PTR [rsi]
+        movq            mm5,       QWORD PTR [rdi]
+        movq            mm6,       QWORD PTR [rdi+1]
+        movq            mm7,       QWORD PTR [rdi+2]
  
          psadbw          mm5,       mm0
          psadbw          mm6,       mm0
          psadbw          mm7,       mm0
  %else
-        movq            mm0,       [rsi]
-        movq            mm1,       [rdi]
-        movq            mm2,       [rdi+1]
-        movq            mm3,       [rdi+2]
+        movq            mm0,       QWORD PTR [rsi]
+        movq            mm1,       QWORD PTR [rdi]
+        movq            mm2,       QWORD PTR [rdi+1]
+        movq            mm3,       QWORD PTR [rdi+2]
  
          psadbw          mm1,       mm0
          psadbw          mm2,       mm0
@@ -105,45 +103,45 @@
  
  %macro PROCESS_16X2X4 1
  %if %1
-        movdqa          xmm0,       [rsi]
-        lddqu           xmm4,       [rcx]
-        lddqu           xmm5,       [rdx]
-        lddqu           xmm6,       [rbx]
-        lddqu           xmm7,       [rdi]
+        movdqa          xmm0,       XMMWORD PTR [rsi]
+        lddqu           xmm4,       XMMWORD PTR [rcx]
+        lddqu           xmm5,       XMMWORD PTR [rdx]
+        lddqu           xmm6,       XMMWORD PTR [rbx]
+        lddqu           xmm7,       XMMWORD PTR [rdi]
  
          psadbw          xmm4,       xmm0
          psadbw          xmm5,       xmm0
          psadbw          xmm6,       xmm0
          psadbw          xmm7,       xmm0
  %else
-        movdqa          xmm0,       [rsi]
-        lddqu           xmm1,       [rcx]
-        lddqu           xmm2,       [rdx]
-        lddqu           xmm3,       [rbx]
+        movdqa          xmm0,       XMMWORD PTR [rsi]
+        lddqu           xmm1,       XMMWORD PTR [rcx]
+        lddqu           xmm2,       XMMWORD PTR [rdx]
+        lddqu           xmm3,       XMMWORD PTR [rbx]
  
          psadbw          xmm1,       xmm0
          psadbw          xmm2,       xmm0
          psadbw          xmm3,       xmm0
  
          paddw           xmm4,       xmm1
-        lddqu           xmm1,       [rdi]
+        lddqu           xmm1,       XMMWORD PTR [rdi]
          paddw           xmm5,       xmm2
          paddw           xmm6,       xmm3
  
          psadbw          xmm1,       xmm0
          paddw           xmm7,       xmm1
  %endif
-        movdqa          xmm0,       QWORD PTR [rsi+rax]
-        lddqu           xmm1,       QWORD PTR [rcx+rbp]
-        lddqu           xmm2,       QWORD PTR [rdx+rbp]
-        lddqu           xmm3,       QWORD PTR [rbx+rbp]
+        movdqa          xmm0,       XMMWORD PTR [rsi+rax]
+        lddqu           xmm1,       XMMWORD PTR [rcx+rbp]
+        lddqu           xmm2,       XMMWORD PTR [rdx+rbp]
+        lddqu           xmm3,       XMMWORD PTR [rbx+rbp]
  
          psadbw          xmm1,       xmm0
          psadbw          xmm2,       xmm0
          psadbw          xmm3,       xmm0
  
          paddw           xmm4,       xmm1
-        lddqu           xmm1,       QWORD PTR [rdi+rbp]
+        lddqu           xmm1,       XMMWORD PTR [rdi+rbp]
          paddw           xmm5,       xmm2
          paddw           xmm6,       xmm3
  
@@ -162,28 +160,28 @@
  
  %macro PROCESS_8X2X4 1
  %if %1
-        movq            mm0,        [rsi]
-        movq            mm4,        [rcx]
-        movq            mm5,        [rdx]
-        movq            mm6,        [rbx]
-        movq            mm7,        [rdi]
+        movq            mm0,        QWORD PTR [rsi]
+        movq            mm4,        QWORD PTR [rcx]
+        movq            mm5,        QWORD PTR [rdx]
+        movq            mm6,        QWORD PTR [rbx]
+        movq            mm7,        QWORD PTR [rdi]
  
          psadbw          mm4,        mm0
          psadbw          mm5,        mm0
          psadbw          mm6,        mm0
          psadbw          mm7,        mm0
  %else
-        movq            mm0,        [rsi]
-        movq            mm1,        [rcx]
-        movq            mm2,        [rdx]
-        movq            mm3,        [rbx]
+        movq            mm0,        QWORD PTR [rsi]
+        movq            mm1,        QWORD PTR [rcx]
+        movq            mm2,        QWORD PTR [rdx]
+        movq            mm3,        QWORD PTR [rbx]
  
          psadbw          mm1,        mm0
          psadbw          mm2,        mm0
          psadbw          mm3,        mm0
  
          paddw           mm4,        mm1
-        movq            mm1,        [rdi]
+        movq            mm1,        QWORD PTR [rdi]
          paddw           mm5,        mm2
          paddw           mm6,        mm3
  
@@ -430,20 +428,20 @@ sym(vp8_sad4x4x3_sse3):
          movsxd          rax,        dword ptr arg(1) ;src_stride
          movsxd          rdx,        dword ptr arg(3) ;ref_stride
  
-        movd            mm0,        QWORD PTR [rsi]
-        movd            mm1,        QWORD PTR [rdi]
+        movd            mm0,        DWORD PTR [rsi]
+        movd            mm1,        DWORD PTR [rdi]
  
-        movd            mm2,        QWORD PTR [rsi+rax]
-        movd            mm3,        QWORD PTR [rdi+rdx]
+        movd            mm2,        DWORD PTR [rsi+rax]
+        movd            mm3,        DWORD PTR [rdi+rdx]
  
          punpcklbw       mm0,        mm2
          punpcklbw       mm1,        mm3
  
-        movd            mm4,        QWORD PTR [rdi+1]
-        movd            mm5,        QWORD PTR [rdi+2]
+        movd            mm4,        DWORD PTR [rdi+1]
+        movd            mm5,        DWORD PTR [rdi+2]
  
-        movd            mm2,        QWORD PTR [rdi+rdx+1]
-        movd            mm3,        QWORD PTR [rdi+rdx+2]
+        movd            mm2,        DWORD PTR [rdi+rdx+1]
+        movd            mm3,        DWORD PTR [rdi+rdx+2]
  
          psadbw          mm1,        mm0
  
@@ -458,24 +456,24 @@ sym(vp8_sad4x4x3_sse3):
          lea             rsi,        [rsi+rax*2]
          lea             rdi,        [rdi+rdx*2]
  
-        movd            mm0,        QWORD PTR [rsi]
-        movd            mm2,        QWORD PTR [rdi]
+        movd            mm0,        DWORD PTR [rsi]
+        movd            mm2,        DWORD PTR [rdi]
  
-        movd            mm3,        QWORD PTR [rsi+rax]
-        movd            mm6,        QWORD PTR [rdi+rdx]
+        movd            mm3,        DWORD PTR [rsi+rax]
+        movd            mm6,        DWORD PTR [rdi+rdx]
  
          punpcklbw       mm0,        mm3
          punpcklbw       mm2,        mm6
  
-        movd            mm3,        QWORD PTR [rdi+1]
-        movd            mm7,        QWORD PTR [rdi+2]
+        movd            mm3,        DWORD PTR [rdi+1]
+        movd            mm7,        DWORD PTR [rdi+2]
  
          psadbw          mm2,        mm0
  
          paddw           mm1,        mm2
  
-        movd            mm2,        QWORD PTR [rdi+rdx+1]
-        movd            mm6,        QWORD PTR [rdi+rdx+2]
+        movd            mm2,        DWORD PTR [rdi+rdx+1]
+        movd            mm6,        DWORD PTR [rdi+rdx+2]
  
          punpcklbw       mm3,        mm2
          punpcklbw       mm7,        mm6
@@ -846,23 +844,23 @@ sym(vp8_sad4x4x4d_sse3):
  
          xchg            rbx,        rax
  
-        movd            mm0,        QWORD PTR [rsi]
-        movd            mm1,        QWORD PTR [rcx]
+        movd            mm0,        DWORD PTR [rsi]
+        movd            mm1,        DWORD PTR [rcx]
  
-        movd            mm2,        QWORD PTR [rsi+rax]
-        movd            mm3,        QWORD PTR [rcx+rbp]
+        movd            mm2,        DWORD PTR [rsi+rax]
+        movd            mm3,        DWORD PTR [rcx+rbp]
  
          punpcklbw       mm0,        mm2
          punpcklbw       mm1,        mm3
  
-        movd            mm4,        QWORD PTR [rdx]
-        movd            mm5,        QWORD PTR [rbx]
+        movd            mm4,        DWORD PTR [rdx]
+        movd            mm5,        DWORD PTR [rbx]
  
-        movd            mm6,        QWORD PTR [rdi]
-        movd            mm2,        QWORD PTR [rdx+rbp]
+        movd            mm6,        DWORD PTR [rdi]
+        movd            mm2,        DWORD PTR [rdx+rbp]
  
-        movd            mm3,        QWORD PTR [rbx+rbp]
-        movd            mm7,        QWORD PTR [rdi+rbp]
+        movd            mm3,        DWORD PTR [rbx+rbp]
+        movd            mm7,        DWORD PTR [rdi+rbp]
  
          psadbw          mm1,        mm0
  
@@ -885,17 +883,17 @@ sym(vp8_sad4x4x4d_sse3):
  
          lea             rdi,        [rdi+rbp*2]
  
-        movd            mm0,        QWORD PTR [rsi]
-        movd            mm2,        QWORD PTR [rcx]
+        movd            mm0,        DWORD PTR [rsi]
+        movd            mm2,        DWORD PTR [rcx]
  
-        movd            mm3,        QWORD PTR [rsi+rax]
-        movd            mm7,        QWORD PTR [rcx+rbp]
+        movd            mm3,        DWORD PTR [rsi+rax]
+        movd            mm7,        DWORD PTR [rcx+rbp]
  
          punpcklbw       mm0,        mm3
          punpcklbw       mm2,        mm7
  
-        movd            mm3,        QWORD PTR [rdx]
-        movd            mm7,        QWORD PTR [rbx]
+        movd            mm3,        DWORD PTR [rdx]
+        movd            mm7,        DWORD PTR [rbx]
  
          psadbw          mm2,        mm0
          mov             rax,        rbp
@@ -906,8 +904,8 @@ sym(vp8_sad4x4x4d_sse3):
          paddw           mm1,        mm2
          movd            [rsi],      mm1
  
-        movd            mm2,        QWORD PTR [rdx+rax]
-        movd            mm1,        QWORD PTR [rbx+rax]
+        movd            mm2,        DWORD PTR [rdx+rax]
+        movd            mm1,        DWORD PTR [rbx+rax]
  
          punpcklbw       mm3,        mm2
          punpcklbw       mm7,        mm1
@@ -915,8 +913,8 @@ sym(vp8_sad4x4x4d_sse3):
          psadbw          mm3,        mm0
          psadbw          mm7,        mm0
  
-        movd            mm2,        QWORD PTR [rdi]
-        movd            mm1,        QWORD PTR [rdi+rax]
+        movd            mm2,        DWORD PTR [rdi]
+        movd            mm1,        DWORD PTR [rdi+rax]
  
          paddw           mm3,        mm4
          paddw           mm7,        mm5
diff --git a/vp8/encoder/x86/sad_ssse3.asm b/vp8/encoder/x86/sad_ssse3.asm

index 94bbfffbc5dc02071c341d1d1c00cc3251e7cc5b..69c5eaedc795ae7d5886f4e984e50ba18adf8c81 100644 (file)
--- a/vp8/encoder/x86/sad_ssse3.asm
+++ b/vp8/encoder/x86/sad_ssse3.asm
@@ -11,23 +11,21 @@
  
  %include "vpx_ports/x86_abi_support.asm"
  
-%idefine QWORD
-
  %macro PROCESS_16X2X3 1
  %if %1
-        movdqa          xmm0,       [rsi]
-        lddqu           xmm5,       [rdi]
-        lddqu           xmm6,       [rdi+1]
-        lddqu           xmm7,       [rdi+2]
+        movdqa          xmm0,       XMMWORD PTR [rsi]
+        lddqu           xmm5,       XMMWORD PTR [rdi]
+        lddqu           xmm6,       XMMWORD PTR [rdi+1]
+        lddqu           xmm7,       XMMWORD PTR [rdi+2]
  
          psadbw          xmm5,       xmm0
          psadbw          xmm6,       xmm0
          psadbw          xmm7,       xmm0
  %else
-        movdqa          xmm0,       [rsi]
-        lddqu           xmm1,       [rdi]
-        lddqu           xmm2,       [rdi+1]
-        lddqu           xmm3,       [rdi+2]
+        movdqa          xmm0,       XMMWORD PTR [rsi]
+        lddqu           xmm1,       XMMWORD PTR [rdi]
+        lddqu           xmm2,       XMMWORD PTR [rdi+1]
+        lddqu           xmm3,       XMMWORD PTR [rdi+2]
  
          psadbw          xmm1,       xmm0
          psadbw          xmm2,       xmm0
@@ -37,10 +35,10 @@
          paddw           xmm6,       xmm2
          paddw           xmm7,       xmm3
  %endif
-        movdqa          xmm0,       QWORD PTR [rsi+rax]
-        lddqu           xmm1,       QWORD PTR [rdi+rdx]
-        lddqu           xmm2,       QWORD PTR [rdi+rdx+1]
-        lddqu           xmm3,       QWORD PTR [rdi+rdx+2]
+        movdqa          xmm0,       XMMWORD PTR [rsi+rax]
+        lddqu           xmm1,       XMMWORD PTR [rdi+rdx]
+        lddqu           xmm2,       XMMWORD PTR [rdi+rdx+1]
+        lddqu           xmm3,       XMMWORD PTR [rdi+rdx+2]
  
          lea             rsi,        [rsi+rax*2]
          lea             rdi,        [rdi+rdx*2]
@@ -56,9 +54,9 @@
  
  %macro PROCESS_16X2X3_OFFSET 2
  %if %1
-        movdqa          xmm0,       [rsi]
-        movdqa          xmm4,       [rdi]
-        movdqa          xmm7,       [rdi+16]
+        movdqa          xmm0,       XMMWORD PTR [rsi]
+        movdqa          xmm4,       XMMWORD PTR [rdi]
+        movdqa          xmm7,       XMMWORD PTR [rdi+16]
  
          movdqa          xmm5,       xmm7
          palignr         xmm5,       xmm4,       %2
@@ -72,9 +70,9 @@
          psadbw          xmm6,       xmm0
          psadbw          xmm7,       xmm0
  %else
-        movdqa          xmm0,       [rsi]
-        movdqa          xmm4,       [rdi]
-        movdqa          xmm3,       [rdi+16]
+        movdqa          xmm0,       XMMWORD PTR [rsi]
+        movdqa          xmm4,       XMMWORD PTR [rdi]
+        movdqa          xmm3,       XMMWORD PTR [rdi+16]
  
          movdqa          xmm1,       xmm3
          palignr         xmm1,       xmm4,       %2
@@ -92,9 +90,9 @@
          paddw           xmm6,       xmm2
          paddw           xmm7,       xmm3
  %endif
-        movdqa          xmm0,       QWORD PTR [rsi+rax]
-        movdqa          xmm4,       QWORD PTR [rdi+rdx]
-        movdqa          xmm3,       QWORD PTR [rdi+rdx+16]
+        movdqa          xmm0,       XMMWORD PTR [rsi+rax]
+        movdqa          xmm4,       XMMWORD PTR [rdi+rdx]
+        movdqa          xmm3,       XMMWORD PTR [rdi+rdx+16]
  
          movdqa          xmm1,       xmm3
          palignr         xmm1,       xmm4,       %2
diff --git a/vp8/encoder/x86/subtract_mmx.asm b/vp8/encoder/x86/subtract_mmx.asm

index 8fe3ee174f0e8a934710375ad2066af17f0e41fd..a47e1f0d6ef7ac2c9aca5f0f458ea94179a957dc 100644 (file)
--- a/vp8/encoder/x86/subtract_mmx.asm
+++ b/vp8/encoder/x86/subtract_mmx.asm
@@ -12,7 +12,7 @@
  %include "vpx_ports/x86_abi_support.asm"
  
  ;void vp8_subtract_b_mmx_impl(unsigned char *z,  int src_stride,
-;                            unsigned short *diff, unsigned char *Predictor,
+;                            short *diff, unsigned char *Predictor,
  ;                            int pitch);
  global sym(vp8_subtract_b_mmx_impl)
  sym(vp8_subtract_b_mmx_impl):
diff --git a/vp8/encoder/x86/subtract_sse2.asm b/vp8/encoder/x86/subtract_sse2.asm

new file mode 100644 (file)

index 0000000..60522ba
--- /dev/null
+++ b/vp8/encoder/x86/subtract_sse2.asm
@@ -0,0 +1,348 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+;void vp8_subtract_b_sse2_impl(unsigned char *z,  int src_stride,
+;                            short *diff, unsigned char *Predictor,
+;                            int pitch);
+global sym(vp8_subtract_b_sse2_impl)
+sym(vp8_subtract_b_sse2_impl):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 5
+    push rsi
+    push rdi
+    ; end prolog
+
+        mov     rdi,        arg(2) ;diff
+        mov     rax,        arg(3) ;Predictor
+        mov     rsi,        arg(0) ;z
+        movsxd  rdx,        dword ptr arg(1);src_stride;
+        movsxd  rcx,        dword ptr arg(4);pitch
+        pxor    mm7,        mm7
+
+        movd    mm0,        [rsi]
+        movd    mm1,        [rax]
+        punpcklbw   mm0,    mm7
+        punpcklbw   mm1,    mm7
+        psubw   mm0,        mm1
+        movq    MMWORD PTR [rdi],      mm0
+
+        movd    mm0,        [rsi+rdx]
+        movd    mm1,        [rax+rcx]
+        punpcklbw   mm0,    mm7
+        punpcklbw   mm1,    mm7
+        psubw   mm0,        mm1
+        movq    MMWORD PTR [rdi+rcx*2], mm0
+
+        movd    mm0,        [rsi+rdx*2]
+        movd    mm1,        [rax+rcx*2]
+        punpcklbw   mm0,    mm7
+        punpcklbw   mm1,    mm7
+        psubw   mm0,        mm1
+        movq    MMWORD PTR [rdi+rcx*4], mm0
+
+        lea     rsi,        [rsi+rdx*2]
+        lea     rcx,        [rcx+rcx*2]
+
+        movd    mm0,        [rsi+rdx]
+        movd    mm1,        [rax+rcx]
+        punpcklbw   mm0,    mm7
+        punpcklbw   mm1,    mm7
+        psubw   mm0,        mm1
+        movq    MMWORD PTR [rdi+rcx*2], mm0
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+
+;void vp8_subtract_mby_sse2(short *diff, unsigned char *src, unsigned char *pred, int stride)
+global sym(vp8_subtract_mby_sse2)
+sym(vp8_subtract_mby_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 4
+    push rsi
+    push rdi
+    ; end prolog
+
+            mov         rsi,            arg(1) ;src
+            mov         rdi,            arg(0) ;diff
+
+            mov         rax,            arg(2) ;pred
+            movsxd      rdx,            dword ptr arg(3) ;stride
+
+            mov         rcx,            8      ; do two lines at one time
+
+submby_loop:
+            movdqa      xmm0,           XMMWORD PTR [rsi]   ; src
+            movdqa      xmm1,           XMMWORD PTR [rax]   ; pred
+
+            movdqa      xmm2,           xmm0
+            psubb       xmm0,           xmm1
+
+            pxor        xmm1,           [GLOBAL(t80)]   ;convert to signed values
+            pxor        xmm2,           [GLOBAL(t80)]
+            pcmpgtb     xmm1,           xmm2            ; obtain sign information
+
+            movdqa      xmm2,    xmm0
+            movdqa      xmm3,    xmm1
+            punpcklbw   xmm0,    xmm1            ; put sign back to subtraction
+            punpckhbw   xmm2,    xmm3            ; put sign back to subtraction
+
+            movdqa      XMMWORD PTR [rdi],   xmm0
+            movdqa      XMMWORD PTR [rdi +16], xmm2
+
+            movdqa      xmm4,           XMMWORD PTR [rsi + rdx]
+            movdqa      xmm5,           XMMWORD PTR [rax + 16]
+
+            movdqa      xmm6,           xmm4
+            psubb       xmm4,           xmm5
+
+            pxor        xmm5,           [GLOBAL(t80)]   ;convert to signed values
+            pxor        xmm6,           [GLOBAL(t80)]
+            pcmpgtb     xmm5,           xmm6            ; obtain sign information
+
+            movdqa      xmm6,    xmm4
+            movdqa      xmm7,    xmm5
+            punpcklbw   xmm4,    xmm5            ; put sign back to subtraction
+            punpckhbw   xmm6,    xmm7            ; put sign back to subtraction
+
+            movdqa      XMMWORD PTR [rdi +32], xmm4
+            movdqa      XMMWORD PTR [rdi +48], xmm6
+
+            add         rdi,            64
+            add         rax,            32
+            lea         rsi,            [rsi+rdx*2]
+
+            sub         rcx,            1
+            jnz         submby_loop
+
+    pop rdi
+    pop rsi
+    ; begin epilog
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+
+;void vp8_subtract_mbuv_sse2(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride)
+global sym(vp8_subtract_mbuv_sse2)
+sym(vp8_subtract_mbuv_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 5
+    push rsi
+    push rdi
+    ; end prolog
+
+            mov     rdi,        arg(0) ;diff
+            mov     rax,        arg(3) ;pred
+            mov     rsi,        arg(1) ;z = usrc
+            add     rdi,        256*2  ;diff = diff + 256 (shorts)
+            add     rax,        256    ;Predictor = pred + 256
+            movsxd  rdx,        dword ptr arg(4) ;stride;
+            lea     rcx,        [rdx + rdx*2]
+
+            ;u
+            ;line 0 1
+            movq       xmm0,    MMWORD PTR [rsi]  ; src
+            movq       xmm2,    MMWORD PTR [rsi+rdx]
+            movdqa     xmm1,    XMMWORD PTR [rax]  ; pred
+            punpcklqdq xmm0,    xmm2
+
+            movdqa     xmm2,    xmm0
+            psubb      xmm0,    xmm1            ; subtraction with sign missed
+
+            pxor       xmm1,    [GLOBAL(t80)]   ;convert to signed values
+            pxor       xmm2,    [GLOBAL(t80)]
+            pcmpgtb    xmm1,    xmm2            ; obtain sign information
+
+            movdqa     xmm2,    xmm0
+            movdqa     xmm3,    xmm1
+            punpcklbw  xmm0,    xmm1            ; put sign back to subtraction
+            punpckhbw  xmm2,    xmm3            ; put sign back to subtraction
+
+            movdqa     XMMWORD PTR [rdi],   xmm0
+            movdqa     XMMWORD PTR [rdi +16],   xmm2
+
+            ;line 2 3
+            movq       xmm0,    MMWORD PTR [rsi+rdx*2]  ; src
+            movq       xmm2,    MMWORD PTR [rsi+rcx]
+            movdqa     xmm1,    XMMWORD PTR [rax+16]  ; pred
+            punpcklqdq xmm0,    xmm2
+
+            movdqa     xmm2,    xmm0
+            psubb      xmm0,    xmm1            ; subtraction with sign missed
+
+            pxor       xmm1,    [GLOBAL(t80)]   ;convert to signed values
+            pxor       xmm2,    [GLOBAL(t80)]
+            pcmpgtb    xmm1,    xmm2            ; obtain sign information
+
+            movdqa     xmm2,    xmm0
+            movdqa     xmm3,    xmm1
+            punpcklbw  xmm0,    xmm1            ; put sign back to subtraction
+            punpckhbw  xmm2,    xmm3            ; put sign back to subtraction
+
+            movdqa     XMMWORD PTR [rdi + 32],   xmm0
+            movdqa     XMMWORD PTR [rdi + 48],   xmm2
+
+            ;line 4 5
+            lea        rsi,     [rsi + rdx*4]
+
+            movq       xmm0,    MMWORD PTR [rsi]  ; src
+            movq       xmm2,    MMWORD PTR [rsi+rdx]
+            movdqa     xmm1,    XMMWORD PTR [rax + 32]  ; pred
+            punpcklqdq xmm0,    xmm2
+
+            movdqa     xmm2,    xmm0
+            psubb      xmm0,    xmm1            ; subtraction with sign missed
+
+            pxor       xmm1,    [GLOBAL(t80)]   ;convert to signed values
+            pxor       xmm2,    [GLOBAL(t80)]
+            pcmpgtb    xmm1,    xmm2            ; obtain sign information
+
+            movdqa     xmm2,    xmm0
+            movdqa     xmm3,    xmm1
+            punpcklbw  xmm0,    xmm1            ; put sign back to subtraction
+            punpckhbw  xmm2,    xmm3            ; put sign back to subtraction
+
+            movdqa     XMMWORD PTR [rdi + 64],   xmm0
+            movdqa     XMMWORD PTR [rdi + 80],   xmm2
+
+            ;line 6 7
+            movq       xmm0,    MMWORD PTR [rsi+rdx*2]  ; src
+            movq       xmm2,    MMWORD PTR [rsi+rcx]
+            movdqa     xmm1,    XMMWORD PTR [rax+ 48]  ; pred
+            punpcklqdq xmm0,    xmm2
+
+            movdqa     xmm2,    xmm0
+            psubb      xmm0,    xmm1            ; subtraction with sign missed
+
+            pxor       xmm1,    [GLOBAL(t80)]   ;convert to signed values
+            pxor       xmm2,    [GLOBAL(t80)]
+            pcmpgtb    xmm1,    xmm2            ; obtain sign information
+
+            movdqa     xmm2,    xmm0
+            movdqa     xmm3,    xmm1
+            punpcklbw  xmm0,    xmm1            ; put sign back to subtraction
+            punpckhbw  xmm2,    xmm3            ; put sign back to subtraction
+
+            movdqa     XMMWORD PTR [rdi + 96],   xmm0
+            movdqa     XMMWORD PTR [rdi + 112],  xmm2
+
+            ;v
+            mov     rsi,        arg(2) ;z = vsrc
+            add     rdi,        64*2  ;diff = diff + 320 (shorts)
+            add     rax,        64    ;Predictor = pred + 320
+
+            ;line 0 1
+            movq       xmm0,    MMWORD PTR [rsi]  ; src
+            movq       xmm2,    MMWORD PTR [rsi+rdx]
+            movdqa     xmm1,    XMMWORD PTR [rax]  ; pred
+            punpcklqdq xmm0,    xmm2
+
+            movdqa     xmm2,    xmm0
+            psubb      xmm0,    xmm1            ; subtraction with sign missed
+
+            pxor       xmm1,    [GLOBAL(t80)]   ;convert to signed values
+            pxor       xmm2,    [GLOBAL(t80)]
+            pcmpgtb    xmm1,    xmm2            ; obtain sign information
+
+            movdqa     xmm2,    xmm0
+            movdqa     xmm3,    xmm1
+            punpcklbw  xmm0,    xmm1            ; put sign back to subtraction
+            punpckhbw  xmm2,    xmm3            ; put sign back to subtraction
+
+            movdqa     XMMWORD PTR [rdi],   xmm0
+            movdqa     XMMWORD PTR [rdi +16],   xmm2
+
+            ;line 2 3
+            movq       xmm0,    MMWORD PTR [rsi+rdx*2]  ; src
+            movq       xmm2,    MMWORD PTR [rsi+rcx]
+            movdqa     xmm1,    XMMWORD PTR [rax+16]  ; pred
+            punpcklqdq xmm0,    xmm2
+
+            movdqa     xmm2,    xmm0
+            psubb      xmm0,    xmm1            ; subtraction with sign missed
+
+            pxor       xmm1,    [GLOBAL(t80)]   ;convert to signed values
+            pxor       xmm2,    [GLOBAL(t80)]
+            pcmpgtb    xmm1,    xmm2            ; obtain sign information
+
+            movdqa     xmm2,    xmm0
+            movdqa     xmm3,    xmm1
+            punpcklbw  xmm0,    xmm1            ; put sign back to subtraction
+            punpckhbw  xmm2,    xmm3            ; put sign back to subtraction
+
+            movdqa     XMMWORD PTR [rdi + 32],   xmm0
+            movdqa     XMMWORD PTR [rdi + 48],   xmm2
+
+            ;line 4 5
+            lea        rsi,     [rsi + rdx*4]
+
+            movq       xmm0,    MMWORD PTR [rsi]  ; src
+            movq       xmm2,    MMWORD PTR [rsi+rdx]
+            movdqa     xmm1,    XMMWORD PTR [rax + 32]  ; pred
+            punpcklqdq xmm0,    xmm2
+
+            movdqa     xmm2,    xmm0
+            psubb      xmm0,    xmm1            ; subtraction with sign missed
+
+            pxor       xmm1,    [GLOBAL(t80)]   ;convert to signed values
+            pxor       xmm2,    [GLOBAL(t80)]
+            pcmpgtb    xmm1,    xmm2            ; obtain sign information
+
+            movdqa     xmm2,    xmm0
+            movdqa     xmm3,    xmm1
+            punpcklbw  xmm0,    xmm1            ; put sign back to subtraction
+            punpckhbw  xmm2,    xmm3            ; put sign back to subtraction
+
+            movdqa     XMMWORD PTR [rdi + 64],   xmm0
+            movdqa     XMMWORD PTR [rdi + 80],   xmm2
+
+            ;line 6 7
+            movq       xmm0,    MMWORD PTR [rsi+rdx*2]  ; src
+            movq       xmm2,    MMWORD PTR [rsi+rcx]
+            movdqa     xmm1,    XMMWORD PTR [rax+ 48]  ; pred
+            punpcklqdq xmm0,    xmm2
+
+            movdqa     xmm2,    xmm0
+            psubb      xmm0,    xmm1            ; subtraction with sign missed
+
+            pxor       xmm1,    [GLOBAL(t80)]   ;convert to signed values
+            pxor       xmm2,    [GLOBAL(t80)]
+            pcmpgtb    xmm1,    xmm2            ; obtain sign information
+
+            movdqa     xmm2,    xmm0
+            movdqa     xmm3,    xmm1
+            punpcklbw  xmm0,    xmm1            ; put sign back to subtraction
+            punpckhbw  xmm2,    xmm3            ; put sign back to subtraction
+
+            movdqa     XMMWORD PTR [rdi + 96],   xmm0
+            movdqa     XMMWORD PTR [rdi + 112],  xmm2
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+SECTION_RODATA
+align 16
+t80:
+    times 16 db 0x80
diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c

index 7490a8add97d62e5f5afb2f0f1efdd92532a43df..3e5a8abf9e92f747f37959c91446c9b16a3481a8 100644 (file)
--- a/vp8/encoder/x86/x86_csystemdependent.c
+++ b/vp8/encoder/x86/x86_csystemdependent.c
@@ -29,14 +29,14 @@ int vp8_fast_quantize_b_impl_mmx(short *coeff_ptr, short *zbin_ptr,
                                   short *quant_ptr, short *dqcoeff_ptr);
  void vp8_fast_quantize_b_mmx(BLOCK *b, BLOCKD *d)
  {
-    short *scan_mask    = vp8_default_zig_zag_mask;//d->scan_order_mask_ptr;
-    short *coeff_ptr  = &b->coeff[0];
-    short *zbin_ptr   = &b->zbin[0][0];
-    short *round_ptr  = &b->round[0][0];
-    short *quant_ptr  = &b->quant[0][0];
-    short *qcoeff_ptr = d->qcoeff;
+    short *scan_mask   = vp8_default_zig_zag_mask;//d->scan_order_mask_ptr;
+    short *coeff_ptr   = b->coeff;
+    short *zbin_ptr    = b->zbin;
+    short *round_ptr   = b->round;
+    short *quant_ptr   = b->quant;
+    short *qcoeff_ptr  = d->qcoeff;
      short *dqcoeff_ptr = d->dqcoeff;
-    short *dequant_ptr = &d->dequant[0][0];
+    short *dequant_ptr = d->dequant;
  
      d->eob = vp8_fast_quantize_b_impl_mmx(
                   coeff_ptr,
@@ -94,15 +94,15 @@ int vp8_fast_quantize_b_impl_sse2(short *coeff_ptr,
                                   short *quant_ptr, short *dqcoeff_ptr);
  void vp8_fast_quantize_b_sse2(BLOCK *b, BLOCKD *d)
  {
-    short *scan_mask    = vp8_default_zig_zag_mask;//d->scan_order_mask_ptr;
-    short *coeff_ptr  = &b->coeff[0];
-    short *round_ptr  = &b->round[0][0];
-    short *quant_ptr  = &b->quant[0][0];
-    short *qcoeff_ptr = d->qcoeff;
+    short *scan_mask   = vp8_default_zig_zag_mask;//d->scan_order_mask_ptr;
+    short *coeff_ptr   = b->coeff;
+    short *round_ptr   = b->round;
+    short *quant_ptr   = b->quant;
+    short *qcoeff_ptr  = d->qcoeff;
      short *dqcoeff_ptr = d->dqcoeff;
-    short *dequant_ptr = &d->dequant[0][0];
+    short *dequant_ptr = d->dequant;
  
-    d->eob = vp8_fast_quantize_b_impl_ssse2(
+    d->eob = vp8_fast_quantize_b_impl_sse2(
                   coeff_ptr,
                   qcoeff_ptr,
                   dequant_ptr,
@@ -124,15 +124,15 @@ int vp8_regular_quantize_b_impl_sse2(short *coeff_ptr, short *zbin_ptr,
  
  void vp8_regular_quantize_b_sse2(BLOCK *b,BLOCKD *d)
  {
-    short *zbin_boost_ptr = &b->zrun_zbin_boost[0];
-    short *coeff_ptr  = &b->coeff[0];
-    short *zbin_ptr   = &b->zbin[0][0];
-    short *round_ptr  = &b->round[0][0];
-    short *quant_ptr  = &b->quant[0][0];
-    short *qcoeff_ptr = d->qcoeff;
-    short *dqcoeff_ptr = d->dqcoeff;
-    short *dequant_ptr = &d->dequant[0][0];
-    short zbin_oq_value = b->zbin_extra;
+    short *zbin_boost_ptr = b->zrun_zbin_boost;
+    short *coeff_ptr      = b->coeff;
+    short *zbin_ptr       = b->zbin;
+    short *round_ptr      = b->round;
+    short *quant_ptr      = b->quant;
+    short *qcoeff_ptr     = d->qcoeff;
+    short *dqcoeff_ptr    = d->dqcoeff;
+    short *dequant_ptr    = d->dequant;
+    short zbin_oq_value   = b->zbin_extra;
  
      d->eob = vp8_regular_quantize_b_impl_sse2(
          coeff_ptr,
@@ -165,6 +165,18 @@ int vp8_mbuverror_xmm(MACROBLOCK *mb)
      return vp8_mbuverror_xmm_impl(s_ptr, d_ptr);
  }
  
+void vp8_subtract_b_sse2_impl(unsigned char *z,  int src_stride,
+                             short *diff, unsigned char *predictor,
+                             int pitch);
+void vp8_subtract_b_sse2(BLOCK *be, BLOCKD *bd, int pitch)
+{
+    unsigned char *z = *(be->base_src) + be->src;
+    unsigned int  src_stride = be->src_stride;
+    short *diff = &be->src_diff[0];
+    unsigned char *predictor = &bd->predictor[0];
+    vp8_subtract_b_sse2_impl(z, src_stride, diff, predictor, pitch);
+}
+
  #endif
  
  void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
@@ -277,17 +289,17 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
          cpi->rtcd.fdct.fast4x4                   = vp8_short_fdct4x4_sse2;
          cpi->rtcd.fdct.fast8x4                   = vp8_short_fdct8x4_sse2;
  
-        cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_c ;
+        cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_sse2 ;
  
          cpi->rtcd.encodemb.berr                  = vp8_block_error_xmm;
          cpi->rtcd.encodemb.mberr                 = vp8_mbblock_error_xmm;
          cpi->rtcd.encodemb.mbuverr               = vp8_mbuverror_xmm;
-        /* cpi->rtcd.encodemb.sub* not implemented for wmt */
+        cpi->rtcd.encodemb.subb                  = vp8_subtract_b_sse2;
+        cpi->rtcd.encodemb.submby                = vp8_subtract_mby_sse2;
+        cpi->rtcd.encodemb.submbuv               = vp8_subtract_mbuv_sse2;
  
          /*cpi->rtcd.quantize.quantb            = vp8_regular_quantize_b_sse2;*/
-
          cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_sse2;
-
      }
  
  #endif
diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk

index ecca18a0a461d379918b975a5d3b9516a74538ec..9ab92b33431c7ea933e5de0b9b7faeb2b4809180 100644 (file)
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -112,6 +112,8 @@ VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/postproc_mmx.asm
  VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/postproc_sse2.asm
  endif
  
+VP8_COMMON_SRCS-$(ARCH_ARM)  += common/arm/arm_systemdependent.c
+
  # common (c)
  VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/bilinearfilter_arm.c
  VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/filter_arm.c
@@ -119,15 +121,8 @@ VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/loopfilter_arm.c
  VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/recon_arm.c
  VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/reconintra4x4_arm.c
  VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/reconintra_arm.c
-VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/systemdependent.c
  VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/vpx_asm_offsets.c
  
-VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6)  += common/filter_c.c
-VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6)  += common/idctllm.c
-VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6)  += common/recon.c
-VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6)  += common/reconintra4x4.c
-VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6)  += common/generic/systemdependent.c
-
  # common (armv6)
  VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/armv6/bilinearfilter_v6$(ASM)
  VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/armv6/copymem8x4_v6$(ASM)
@@ -152,16 +147,10 @@ VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/copymem8x8_neon$(ASM)
  VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/copymem16x16_neon$(ASM)
  VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/dc_only_idct_add_neon$(ASM)
  VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/iwalsh_neon$(ASM)
+VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/loopfilter_neon$(ASM)
  VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/loopfiltersimplehorizontaledge_neon$(ASM)
  VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/loopfiltersimpleverticaledge_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/loopfilterhorizontaledge_uv_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/loopfilterhorizontaledge_y_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/loopfilterverticaledge_uv_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/loopfilterverticaledge_y_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/mbloopfilterhorizontaledge_uv_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/mbloopfilterhorizontaledge_y_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/mbloopfilterverticaledge_uv_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/mbloopfilterverticaledge_y_neon$(ASM)
+VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/mbloopfilter_neon$(ASM)
  VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/recon2b_neon$(ASM)
  VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/recon4b_neon$(ASM)
  VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/reconb_neon$(ASM)
diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c

index a6cb27b9327e1b4ee07ff5e99361afc963400526..8e50b7f1b0d75075e1c808dd7c7e0fca3d711a39 100644 (file)
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -14,6 +14,7 @@
  #include "vpx_version.h"
  #include "onyx_int.h"
  #include "vpx/vp8e.h"
+#include "vp8/encoder/firstpass.h"
  #include "onyx.h"
  #include <stdlib.h>
  #include <string.h>
@@ -184,27 +185,30 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t      *ctx,
      RANGE_CHECK(vp8_cfg, token_partitions,   VP8_ONE_TOKENPARTITION, VP8_EIGHT_TOKENPARTITION);
      RANGE_CHECK_HI(vp8_cfg, Sharpness,       7);
      RANGE_CHECK(vp8_cfg, arnr_max_frames, 0, 15);
-    RANGE_CHECK(vp8_cfg, arnr_strength,   1, 6);
+    RANGE_CHECK_HI(vp8_cfg, arnr_strength,   6);
      RANGE_CHECK(vp8_cfg, arnr_type,       1, 3);
  
      if (cfg->g_pass == VPX_RC_LAST_PASS)
      {
-        int n_doubles = cfg->rc_twopass_stats_in.sz / sizeof(double);
-        int n_packets = cfg->rc_twopass_stats_in.sz / sizeof(FIRSTPASS_STATS);
-        double frames;
+        int              mb_r = (cfg->g_h + 15) / 16;
+        int              mb_c = (cfg->g_w + 15) / 16;
+        size_t           packet_sz = vp8_firstpass_stats_sz(mb_r * mb_c);
+        int              n_packets = cfg->rc_twopass_stats_in.sz / packet_sz;
+        FIRSTPASS_STATS *stats;
  
          if (!cfg->rc_twopass_stats_in.buf)
              ERROR("rc_twopass_stats_in.buf not set.");
  
-        if (cfg->rc_twopass_stats_in.sz % sizeof(FIRSTPASS_STATS))
+        if (cfg->rc_twopass_stats_in.sz % packet_sz)
              ERROR("rc_twopass_stats_in.sz indicates truncated packet.");
  
-        if (cfg->rc_twopass_stats_in.sz < 2 * sizeof(FIRSTPASS_STATS))
+        if (cfg->rc_twopass_stats_in.sz < 2 * packet_sz)
              ERROR("rc_twopass_stats_in requires at least two packets.");
  
-        frames = ((double *)cfg->rc_twopass_stats_in.buf)[n_doubles - 1];
+        stats = (void*)((char *)cfg->rc_twopass_stats_in.buf
+                + (n_packets - 1) * packet_sz);
  
-        if ((int)(frames + 0.5) != n_packets - 1)
+        if ((int)(stats->count + 0.5) != n_packets - 1)
              ERROR("rc_twopass_stats_in missing EOS stats packet");
      }
  
@@ -759,12 +763,13 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t  *ctx,
                  {
                      pkt.data.frame.flags |= VPX_FRAME_IS_INVISIBLE;
  
-                    // TODO: ideally this timestamp should be as close as
-                    // possible to the prior PTS so that if a decoder uses
-                    // pts to schedule when to do this, we start right after
-                    // last frame was decoded.  Maybe should be set to
-                    // last time stamp. Invisible frames have no duration..
-                    pkt.data.frame.pts --;
+                    // This timestamp should be as close as possible to the
+                    // prior PTS so that if a decoder uses pts to schedule when
+                    // to do this, we start right after last frame was decoded.
+                    // Invisible frames have no duration.
+                    pkt.data.frame.pts = ((cpi->last_time_stamp_seen
+                        * ctx->cfg.g_timebase.den + round)
+                        / ctx->cfg.g_timebase.num / 10000000) + 1;
                      pkt.data.frame.duration = 0;
                  }
  
diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk

index 4ce18b6e79494b9476747a9e45c0a007702f254a..98288f2b67b186e22b14c9066b4b341188fc1d3b 100644 (file)
--- a/vp8/vp8cx.mk
+++ b/vp8/vp8cx.mk
@@ -82,6 +82,8 @@ VP8_CX_SRCS-yes += encoder/treewriter.c
  VP8_CX_SRCS-yes += encoder/variance_c.c
  VP8_CX_SRCS-$(CONFIG_PSNR) += common/postproc.h
  VP8_CX_SRCS-$(CONFIG_PSNR) += common/postproc.c
+VP8_CX_SRCS-yes += encoder/temporal_filter.c
+VP8_CX_SRCS-yes += encoder/temporal_filter.h
  
  ifeq ($(CONFIG_REALTIME_ONLY),yes)
  VP8_CX_SRCS_REMOVE-yes += encoder/firstpass.c
@@ -104,6 +106,7 @@ VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/variance_impl_sse2.asm
  VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/sad_sse2.asm
  VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/fwalsh_sse2.asm
  VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.asm
+VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/subtract_sse2.asm
  VP8_CX_SRCS-$(HAVE_SSE3) += encoder/x86/sad_sse3.asm
  VP8_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/sad_ssse3.asm
  VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_mmx.asm
diff --git a/vp8/vp8cx_arm.mk b/vp8/vp8cx_arm.mk

index 1424bd15a2bbda115dbc4ef1973a3fc275d44234..d126faf32fcd98346ed1eaf7518dc164ee543005 100644 (file)
--- a/vp8/vp8cx_arm.mk
+++ b/vp8/vp8cx_arm.mk
@@ -13,17 +13,22 @@
  
  #File list for arm
  # encoder
-VP8_CX_SRCS-$(HAVE_ARMV6)  += encoder/arm/csystemdependent.c
+VP8_CX_SRCS-$(ARCH_ARM)  += encoder/arm/arm_csystemdependent.c
  
  VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/encodemb_arm.c
  VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/quantize_arm.c
  VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/picklpf_arm.c
-VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/boolhuff_arm.c
+VP8_CX_SRCS-$(HAVE_ARMV5TE) += encoder/arm/boolhuff_arm.c
  VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/mcomp_arm.c
  
-VP8_CX_SRCS_REMOVE-$(HAVE_ARMV6)  += encoder/generic/csystemdependent.c
-VP8_CX_SRCS_REMOVE-$(HAVE_ARMV7)  += encoder/boolhuff.c
-VP8_CX_SRCS_REMOVE-$(HAVE_ARMV7)  += encoder/mcomp.c
+VP8_CX_SRCS_REMOVE-$(HAVE_ARMV5TE)  += encoder/boolhuff.c
+
+#File list for armv5te
+# encoder
+VP8_CX_SRCS-$(HAVE_ARMV5TE)  += encoder/arm/armv5te/boolhuff_armv5te$(ASM)
+VP8_CX_SRCS-$(HAVE_ARMV5TE)  += encoder/arm/armv5te/vp8_packtokens_armv5$(ASM)
+VP8_CX_SRCS-$(HAVE_ARMV5TE)  += encoder/arm/armv5te/vp8_packtokens_mbrow_armv5$(ASM)
+VP8_CX_SRCS-$(HAVE_ARMV5TE)  += encoder/arm/armv5te/vp8_packtokens_partitions_armv5$(ASM)
  
  #File list for armv6
  # encoder
@@ -44,10 +49,6 @@ VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/neon/vp8_subpixelvariance8x8_neon$(ASM
  VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/neon/vp8_subpixelvariance16x16_neon$(ASM)
  VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/neon/vp8_subpixelvariance16x16s_neon$(ASM)
  VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/neon/vp8_memcpy_neon$(ASM)
-VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/neon/vp8_packtokens_armv7$(ASM)
-VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/neon/vp8_packtokens_mbrow_armv7$(ASM)
-VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/neon/vp8_packtokens_partitions_armv7$(ASM)
-VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/neon/boolhuff_armv7$(ASM)
  VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/neon/vp8_shortwalsh4x4_neon$(ASM)
  
  VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/vpx_vp8_enc_asm_offsets.c
diff --git a/vp8/vp8dx_arm.mk b/vp8/vp8dx_arm.mk

index ae0610cdac2e82714ec25d746110c2f732379c96..0803a9cb0958169b3aac7f479a1ee0df4267b50a 100644 (file)
--- a/vp8/vp8dx_arm.mk
+++ b/vp8/vp8dx_arm.mk
@@ -11,11 +11,9 @@
  
  #VP8_DX_SRCS list is modified according to different platforms.
  
+VP8_DX_SRCS-$(ARCH_ARM)  += decoder/arm/arm_dsystemdependent.c
+
  VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/dequantize_arm.c
-VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/dsystemdependent.c
-VP8_DX_SRCS_REMOVE-$(HAVE_ARMV6)  += decoder/generic/dsystemdependent.c
-VP8_DX_SRCS_REMOVE-$(HAVE_ARMV6)  += decoder/dequantize.c
-VP8_DX_SRCS_REMOVE-$(HAVE_ARMV6)  += decoder/idct_blk.c
  VP8_DX_SRCS-$(CONFIG_ARM_ASM_DETOK)  += decoder/arm/detokenize$(ASM)
  
  #File list for armv6
diff --git a/vpx_ports/arm.h b/vpx_ports/arm.h

new file mode 100644 (file)

index 0000000..81af1f1
--- /dev/null
+++ b/vpx_ports/arm.h
@@ -0,0 +1,27 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VPX_PORTS_ARM_H
+#define VPX_PORTS_ARM_H
+#include <stdlib.h>
+#include "config.h"
+
+/*ARMv5TE "Enhanced DSP" instructions.*/
+#define HAS_EDSP  0x01
+/*ARMv6 "Parallel" or "Media" instructions.*/
+#define HAS_MEDIA 0x02
+/*ARMv7 optional NEON instructions.*/
+#define HAS_NEON  0x04
+
+int arm_cpu_caps(void);
+
+#endif
+
diff --git a/vpx_ports/arm_cpudetect.c b/vpx_ports/arm_cpudetect.c

new file mode 100644 (file)

index 0000000..4109924
--- /dev/null
+++ b/vpx_ports/arm_cpudetect.c
@@ -0,0 +1,190 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include "arm.h"
+
+static int arm_cpu_env_flags(int *flags)
+{
+    char *env;
+    env = getenv("VPX_SIMD_CAPS");
+    if (env && *env)
+    {
+        *flags = (int)strtol(env, NULL, 0);
+        return 0;
+    }
+    *flags = 0;
+    return -1;
+}
+
+static int arm_cpu_env_mask(void)
+{
+    char *env;
+    env = getenv("VPX_SIMD_CAPS_MASK");
+    return env && *env ? (int)strtol(env, NULL, 0) : ~0;
+}
+
+
+#if defined(_MSC_VER)
+/*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/
+#define WIN32_LEAN_AND_MEAN
+#define WIN32_EXTRA_LEAN
+#include <windows.h>
+
+int arm_cpu_caps(void)
+{
+    int flags;
+    int mask;
+    if (!arm_cpu_env_flags(&flags))
+    {
+        return flags;
+    }
+    mask = arm_cpu_env_mask();
+    /* MSVC has no inline __asm support for ARM, but it does let you __emit
+     *  instructions via their assembled hex code.
+     * All of these instructions should be essentially nops.
+     */
+#if defined(HAVE_ARMV5TE)
+    if (mask & HAS_EDSP)
+    {
+        __try
+        {
+            /*PLD [r13]*/
+            __emit(0xF5DDF000);
+            flags |= HAS_EDSP;
+        }
+        __except(GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION)
+        {
+            /*Ignore exception.*/
+        }
+    }
+#if defined(HAVE_ARMV6)
+    if (mask & HAS_MEDIA)
+        __try
+        {
+            /*SHADD8 r3,r3,r3*/
+            __emit(0xE6333F93);
+            flags |= HAS_MEDIA;
+        }
+        __except(GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION)
+        {
+            /*Ignore exception.*/
+        }
+    }
+#if defined(HAVE_ARMV7)
+    if (mask & HAS_NEON)
+    {
+        __try
+        {
+            /*VORR q0,q0,q0*/
+            __emit(0xF2200150);
+            flags |= HAS_NEON;
+        }
+        __except(GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION)
+        {
+            /*Ignore exception.*/
+        }
+    }
+#endif
+#endif
+#endif
+    return flags & mask;
+}
+
+#elif defined(__linux__)
+#include <stdio.h>
+
+int arm_cpu_caps(void)
+{
+    FILE *fin;
+    int flags;
+    int mask;
+    if (!arm_cpu_env_flags(&flags))
+    {
+        return flags;
+    }
+    mask = arm_cpu_env_mask();
+    /* Reading /proc/self/auxv would be easier, but that doesn't work reliably
+     *  on Android.
+     * This also means that detection will fail in Scratchbox.
+     */
+    fin = fopen("/proc/cpuinfo","r");
+    if(fin != NULL)
+    {
+        /* 512 should be enough for anybody (it's even enough for all the flags
+         * that x86 has accumulated... so far).
+         */
+        char buf[512];
+        while (fgets(buf, 511, fin) != NULL)
+        {
+#if defined(HAVE_ARMV5TE) || defined(HAVE_ARMV7)
+            if (memcmp(buf, "Features", 8) == 0)
+            {
+                char *p;
+#if defined(HAVE_ARMV5TE)
+                p=strstr(buf, " edsp");
+                if (p != NULL && (p[5] == ' ' || p[5] == '\n'))
+                {
+                    flags |= HAS_EDSP;
+                }
+#if defined(HAVE_ARMV7)
+                p = strstr(buf, " neon");
+                if (p != NULL && (p[5] == ' ' || p[5] == '\n'))
+                {
+                    flags |= HAS_NEON;
+                }
+#endif
+#endif
+            }
+#endif
+#if defined(HAVE_ARMV6)
+            if (memcmp(buf, "CPU architecture:",17) == 0){
+                int version;
+                version = atoi(buf+17);
+                if (version >= 6)
+                {
+                    flags |= HAS_MEDIA;
+                }
+            }
+#endif
+        }
+        fclose(fin);
+    }
+    return flags & mask;
+}
+
+#elif !CONFIG_RUNTIME_CPU_DETECT
+
+int arm_cpu_caps(void)
+{
+    int flags;
+    int mask;
+    if (!arm_cpu_env_flags(&flags))
+    {
+        return flags;
+    }
+    mask = arm_cpu_env_mask();
+#if defined(HAVE_ARMV5TE)
+    flags |= HAS_EDSP;
+#endif
+#if defined(HAVE_ARMV6)
+    flags |= HAS_MEDIA;
+#endif
+#if defined(HAVE_ARMV7)
+    flags |= HAS_NEON;
+#endif
+    return flags & mask;
+}
+
+#else
+#error "--enable-runtime-cpu-detect selected, but no CPU detection method " \
+ "available for your platform. Reconfigure without --enable-runtime-cpu-detect."
+#endif
diff --git a/vpx_scale/arm/scalesystemdependant.c b/vpx_scale/arm/scalesystemdependant.c

index 1e8bcb89d35b13cf278153e760729880fe0d7ae5..fee76fff78c0acb6539be0989762cbea0d33b535 100644 (file)
--- a/vpx_scale/arm/scalesystemdependant.c
+++ b/vpx_scale/arm/scalesystemdependant.c
@@ -10,6 +10,7 @@
  
  
  #include "vpx_ports/config.h"
+#include "vpx_ports/arm.h"
  #include "vpx_scale/vpxscale.h"
  
  
@@ -47,6 +48,9 @@ extern void vp8_yv12_copy_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CO
   ****************************************************************************/
  void vp8_scale_machine_specific_config()
  {
+#if HAVE_ARMV7 && CONFIG_RUNTIME_CPU_DETECT
+    int flags;
+#endif
      /*
      vp8_horizontal_line_1_2_scale        = horizontal_line_1_2_scale_armv4;
      vp8_vertical_band_1_2_scale          = vertical_band_1_2_scale_armv4;
@@ -73,14 +77,20 @@ void vp8_scale_machine_specific_config()
      vp8_horizontal_line_5_4_scale         = vp8cx_horizontal_line_5_4_scale_c;
      */
  
-#if HAVE_ARMV7
-    vp8_yv12_extend_frame_borders_ptr      = vp8_yv12_extend_frame_borders_neon;
-    vp8_yv12_copy_frame_yonly_ptr          = vp8_yv12_copy_frame_yonly_neon;
-    vp8_yv12_copy_frame_ptr               = vp8_yv12_copy_frame_neon;
-#else
+#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT
      vp8_yv12_extend_frame_borders_ptr      = vp8_yv12_extend_frame_borders;
      vp8_yv12_copy_frame_yonly_ptr          = vp8_yv12_copy_frame_yonly;
      vp8_yv12_copy_frame_ptr           = vp8_yv12_copy_frame;
  #endif
-
+#if HAVE_ARMV7
+#if CONFIG_RUNTIME_CPU_DETECT
+    flags = arm_cpu_caps();
+    if (flags & HAS_NEON)
+#endif
+    {
+        vp8_yv12_extend_frame_borders_ptr = vp8_yv12_extend_frame_borders_neon;
+        vp8_yv12_copy_frame_yonly_ptr     = vp8_yv12_copy_frame_yonly_neon;
+        vp8_yv12_copy_frame_ptr           = vp8_yv12_copy_frame_neon;
+    }
+#endif
  }
diff --git a/vpxdec.c b/vpxdec.c

new file mode 100644 (file)

index 0000000..ee84197
--- /dev/null
+++ b/vpxdec.c
@@ -0,0 +1,1032 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/* This is a simple program that reads ivf files and decodes them
+ * using the new interface. Decoded frames are output as YV12 raw.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <limits.h>
+#if defined(_WIN32)
+#include <io.h>
+#define snprintf _snprintf
+#define isatty   _isatty
+#define fileno   _fileno
+#else
+#include <unistd.h>
+#endif
+#define VPX_CODEC_DISABLE_COMPAT 1
+#include "vpx_config.h"
+#include "vpx/vpx_decoder.h"
+#include "vpx_ports/vpx_timer.h"
+#if CONFIG_VP8_DECODER
+#include "vpx/vp8dx.h"
+#endif
+#if CONFIG_MD5
+#include "md5_utils.h"
+#endif
+#include "nestegg/include/nestegg/nestegg.h"
+
+#ifndef PATH_MAX
+#define PATH_MAX 256
+#endif
+
+static const char *exec_name;
+
+#define VP8_FOURCC (0x00385056)
+static const struct
+{
+    char const *name;
+    const vpx_codec_iface_t *iface;
+    unsigned int             fourcc;
+    unsigned int             fourcc_mask;
+} ifaces[] =
+{
+#if CONFIG_VP8_DECODER
+    {"vp8",  &vpx_codec_vp8_dx_algo,   VP8_FOURCC, 0x00FFFFFF},
+#endif
+};
+
+#include "args.h"
+static const arg_def_t codecarg = ARG_DEF(NULL, "codec", 1,
+                                  "Codec to use");
+static const arg_def_t use_yv12 = ARG_DEF(NULL, "yv12", 0,
+                                  "Output raw YV12 frames");
+static const arg_def_t use_i420 = ARG_DEF(NULL, "i420", 0,
+                                  "Output raw I420 frames");
+static const arg_def_t flipuvarg = ARG_DEF(NULL, "flipuv", 0,
+                                   "Flip the chroma planes in the output");
+static const arg_def_t noblitarg = ARG_DEF(NULL, "noblit", 0,
+                                   "Don't process the decoded frames");
+static const arg_def_t progressarg = ARG_DEF(NULL, "progress", 0,
+                                     "Show progress after each frame decodes");
+static const arg_def_t limitarg = ARG_DEF(NULL, "limit", 1,
+                                  "Stop decoding after n frames");
+static const arg_def_t postprocarg = ARG_DEF(NULL, "postproc", 0,
+                                     "Postprocess decoded frames");
+static const arg_def_t summaryarg = ARG_DEF(NULL, "summary", 0,
+                                    "Show timing summary");
+static const arg_def_t outputfile = ARG_DEF("o", "output", 1,
+                                    "Output file name pattern (see below)");
+static const arg_def_t threadsarg = ARG_DEF("t", "threads", 1,
+                                    "Max threads to use");
+static const arg_def_t verbosearg = ARG_DEF("v", "verbose", 0,
+                                  "Show version string");
+
+#if CONFIG_MD5
+static const arg_def_t md5arg = ARG_DEF(NULL, "md5", 0,
+                                        "Compute the MD5 sum of the decoded frame");
+#endif
+static const arg_def_t *all_args[] =
+{
+    &codecarg, &use_yv12, &use_i420, &flipuvarg, &noblitarg,
+    &progressarg, &limitarg, &postprocarg, &summaryarg, &outputfile,
+    &threadsarg, &verbosearg,
+#if CONFIG_MD5
+    &md5arg,
+#endif
+    NULL
+};
+
+#if CONFIG_VP8_DECODER
+static const arg_def_t addnoise_level = ARG_DEF(NULL, "noise-level", 1,
+                                        "Enable VP8 postproc add noise");
+static const arg_def_t deblock = ARG_DEF(NULL, "deblock", 0,
+                                 "Enable VP8 deblocking");
+static const arg_def_t demacroblock_level = ARG_DEF(NULL, "demacroblock-level", 1,
+        "Enable VP8 demacroblocking, w/ level");
+static const arg_def_t pp_debug_info = ARG_DEF(NULL, "pp-debug-info", 1,
+                                       "Enable VP8 visible debug info");
+
+
+static const arg_def_t *vp8_pp_args[] =
+{
+    &addnoise_level, &deblock, &demacroblock_level, &pp_debug_info,
+    NULL
+};
+#endif
+
+static void usage_exit()
+{
+    int i;
+
+    fprintf(stderr, "Usage: %s <options> filename\n\n"
+            "Options:\n", exec_name);
+    arg_show_usage(stderr, all_args);
+#if CONFIG_VP8_DECODER
+    fprintf(stderr, "\nVP8 Postprocessing Options:\n");
+    arg_show_usage(stderr, vp8_pp_args);
+#endif
+    fprintf(stderr,
+            "\nOutput File Patterns:\n\n"
+            "  The -o argument specifies the name of the file(s) to "
+            "write to. If the\n  argument does not include any escape "
+            "characters, the output will be\n  written to a single file. "
+            "Otherwise, the filename will be calculated by\n  expanding "
+            "the following escape characters:\n"
+            "\n\t%%w   - Frame width"
+            "\n\t%%h   - Frame height"
+            "\n\t%%<n> - Frame number, zero padded to <n> places (1..9)"
+            "\n\n  Pattern arguments are only supported in conjunction "
+            "with the --yv12 and\n  --i420 options. If the -o option is "
+            "not specified, the output will be\n  directed to stdout.\n"
+            );
+    fprintf(stderr, "\nIncluded decoders:\n\n");
+
+    for (i = 0; i < sizeof(ifaces) / sizeof(ifaces[0]); i++)
+        fprintf(stderr, "    %-6s - %s\n",
+                ifaces[i].name,
+                vpx_codec_iface_name(ifaces[i].iface));
+
+    exit(EXIT_FAILURE);
+}
+
+void die(const char *fmt, ...)
+{
+    va_list ap;
+    va_start(ap, fmt);
+    vfprintf(stderr, fmt, ap);
+    fprintf(stderr, "\n");
+    usage_exit();
+}
+
+static unsigned int mem_get_le16(const void *vmem)
+{
+    unsigned int  val;
+    const unsigned char *mem = (const unsigned char *)vmem;
+
+    val = mem[1] << 8;
+    val |= mem[0];
+    return val;
+}
+
+static unsigned int mem_get_le32(const void *vmem)
+{
+    unsigned int  val;
+    const unsigned char *mem = (const unsigned char *)vmem;
+
+    val = mem[3] << 24;
+    val |= mem[2] << 16;
+    val |= mem[1] << 8;
+    val |= mem[0];
+    return val;
+}
+
+enum file_kind
+{
+    RAW_FILE,
+    IVF_FILE,
+    WEBM_FILE
+};
+
+struct input_ctx
+{
+    enum file_kind  kind;
+    FILE           *infile;
+    nestegg        *nestegg_ctx;
+    nestegg_packet *pkt;
+    unsigned int    chunk;
+    unsigned int    chunks;
+    unsigned int    video_track;
+};
+
+#define IVF_FRAME_HDR_SZ (sizeof(uint32_t) + sizeof(uint64_t))
+#define RAW_FRAME_HDR_SZ (sizeof(uint32_t))
+static int read_frame(struct input_ctx      *input,
+                      uint8_t               **buf,
+                      size_t                *buf_sz,
+                      size_t                *buf_alloc_sz)
+{
+    char            raw_hdr[IVF_FRAME_HDR_SZ];
+    size_t          new_buf_sz;
+    FILE           *infile = input->infile;
+    enum file_kind  kind = input->kind;
+    if(kind == WEBM_FILE)
+    {
+        if(input->chunk >= input->chunks)
+        {
+            unsigned int track;
+
+            do
+            {
+                /* End of this packet, get another. */
+                if(input->pkt)
+                    nestegg_free_packet(input->pkt);
+
+                if(nestegg_read_packet(input->nestegg_ctx, &input->pkt) <= 0
+                   || nestegg_packet_track(input->pkt, &track))
+                    return 1;
+
+            } while(track != input->video_track);
+
+            if(nestegg_packet_count(input->pkt, &input->chunks))
+                return 1;
+            input->chunk = 0;
+        }
+
+        if(nestegg_packet_data(input->pkt, input->chunk, buf, buf_sz))
+            return 1;
+        input->chunk++;
+
+        return 0;
+    }
+    /* For both the raw and ivf formats, the frame size is the first 4 bytes
+     * of the frame header. We just need to special case on the header
+     * size.
+     */
+    else if (fread(raw_hdr, kind==IVF_FILE
+                   ? IVF_FRAME_HDR_SZ : RAW_FRAME_HDR_SZ, 1, infile) != 1)
+    {
+        if (!feof(infile))
+            fprintf(stderr, "Failed to read frame size\n");
+
+        new_buf_sz = 0;
+    }
+    else
+    {
+        new_buf_sz = mem_get_le32(raw_hdr);
+
+        if (new_buf_sz > 256 * 1024 * 1024)
+        {
+            fprintf(stderr, "Error: Read invalid frame size (%u)\n",
+                    (unsigned int)new_buf_sz);
+            new_buf_sz = 0;
+        }
+
+        if (kind == RAW_FILE && new_buf_sz > 256 * 1024)
+            fprintf(stderr, "Warning: Read invalid frame size (%u)"
+                    " - not a raw file?\n", (unsigned int)new_buf_sz);
+
+        if (new_buf_sz > *buf_alloc_sz)
+        {
+            uint8_t *new_buf = realloc(*buf, 2 * new_buf_sz);
+
+            if (new_buf)
+            {
+                *buf = new_buf;
+                *buf_alloc_sz = 2 * new_buf_sz;
+            }
+            else
+            {
+                fprintf(stderr, "Failed to allocate compressed data buffer\n");
+                new_buf_sz = 0;
+            }
+        }
+    }
+
+    *buf_sz = new_buf_sz;
+
+    if (*buf_sz)
+    {
+        if (fread(*buf, 1, *buf_sz, infile) != *buf_sz)
+        {
+            fprintf(stderr, "Failed to read full frame\n");
+            return 1;
+        }
+
+        return 0;
+    }
+
+    return 1;
+}
+
+void *out_open(const char *out_fn, int do_md5)
+{
+    void *out = NULL;
+
+    if (do_md5)
+    {
+#if CONFIG_MD5
+        MD5Context *md5_ctx = out = malloc(sizeof(MD5Context));
+        (void)out_fn;
+        MD5Init(md5_ctx);
+#endif
+    }
+    else
+    {
+        FILE *outfile = out = strcmp("-", out_fn) ? fopen(out_fn, "wb") : stdout;
+
+        if (!outfile)
+        {
+            fprintf(stderr, "Failed to output file");
+            exit(EXIT_FAILURE);
+        }
+    }
+
+    return out;
+}
+
+void out_put(void *out, const uint8_t *buf, unsigned int len, int do_md5)
+{
+    if (do_md5)
+    {
+#if CONFIG_MD5
+        MD5Update(out, buf, len);
+#endif
+    }
+    else
+    {
+        fwrite(buf, 1, len, out);
+    }
+}
+
+void out_close(void *out, const char *out_fn, int do_md5)
+{
+    if (do_md5)
+    {
+#if CONFIG_MD5
+        uint8_t md5[16];
+        int i;
+
+        MD5Final(md5, out);
+        free(out);
+
+        for (i = 0; i < 16; i++)
+            printf("%02x", md5[i]);
+
+        printf("  %s\n", out_fn);
+#endif
+    }
+    else
+    {
+        fclose(out);
+    }
+}
+
+unsigned int file_is_ivf(FILE *infile,
+                         unsigned int *fourcc,
+                         unsigned int *width,
+                         unsigned int *height,
+                         unsigned int *fps_den,
+                         unsigned int *fps_num)
+{
+    char raw_hdr[32];
+    int is_ivf = 0;
+
+    if (fread(raw_hdr, 1, 32, infile) == 32)
+    {
+        if (raw_hdr[0] == 'D' && raw_hdr[1] == 'K'
+            && raw_hdr[2] == 'I' && raw_hdr[3] == 'F')
+        {
+            is_ivf = 1;
+
+            if (mem_get_le16(raw_hdr + 4) != 0)
+                fprintf(stderr, "Error: Unrecognized IVF version! This file may not"
+                        " decode properly.");
+
+            *fourcc = mem_get_le32(raw_hdr + 8);
+            *width = mem_get_le16(raw_hdr + 12);
+            *height = mem_get_le16(raw_hdr + 14);
+            *fps_num = mem_get_le32(raw_hdr + 16);
+            *fps_den = mem_get_le32(raw_hdr + 20);
+
+            /* Some versions of vpxenc used 1/(2*fps) for the timebase, so
+             * we can guess the framerate using only the timebase in this
+             * case. Other files would require reading ahead to guess the
+             * timebase, like we do for webm.
+             */
+            if(*fps_num < 1000)
+            {
+                /* Correct for the factor of 2 applied to the timebase in the
+                 * encoder.
+                 */
+                if(*fps_num&1)*fps_den<<=1;
+                else *fps_num>>=1;
+            }
+            else
+            {
+                /* Don't know FPS for sure, and don't have readahead code
+                 * (yet?), so just default to 30fps.
+                 */
+                *fps_num = 30;
+                *fps_den = 1;
+            }
+        }
+    }
+
+    if (!is_ivf)
+        rewind(infile);
+
+    return is_ivf;
+}
+
+
+unsigned int file_is_raw(FILE *infile,
+                         unsigned int *fourcc,
+                         unsigned int *width,
+                         unsigned int *height,
+                         unsigned int *fps_den,
+                         unsigned int *fps_num)
+{
+    unsigned char buf[32];
+    int is_raw = 0;
+    vpx_codec_stream_info_t si;
+
+    if (fread(buf, 1, 32, infile) == 32)
+    {
+        int i;
+
+        if(mem_get_le32(buf) < 256 * 1024 * 1024)
+            for (i = 0; i < sizeof(ifaces) / sizeof(ifaces[0]); i++)
+                if(!vpx_codec_peek_stream_info(ifaces[i].iface,
+                                               buf + 4, 32 - 4, &si))
+                {
+                    is_raw = 1;
+                    *fourcc = ifaces[i].fourcc;
+                    *width = si.w;
+                    *height = si.h;
+                    *fps_num = 30;
+                    *fps_den = 1;
+                    break;
+                }
+    }
+
+    rewind(infile);
+    return is_raw;
+}
+
+
+static int
+nestegg_read_cb(void *buffer, size_t length, void *userdata)
+{
+    FILE *f = userdata;
+
+    fread(buffer, 1, length, f);
+    if (ferror(f))
+        return -1;
+    if (feof(f))
+        return 0;
+    return 1;
+}
+
+
+static int
+nestegg_seek_cb(int64_t offset, int whence, void * userdata)
+{
+    switch(whence) {
+        case NESTEGG_SEEK_SET: whence = SEEK_SET; break;
+        case NESTEGG_SEEK_CUR: whence = SEEK_CUR; break;
+        case NESTEGG_SEEK_END: whence = SEEK_END; break;
+    };
+    return fseek(userdata, offset, whence)? -1 : 0;
+}
+
+
+static int64_t
+nestegg_tell_cb(void * userdata)
+{
+    return ftell(userdata);
+}
+
+
+static void
+nestegg_log_cb(nestegg * context, unsigned int severity, char const * format,
+               ...)
+{
+    va_list ap;
+
+    va_start(ap, format);
+    vfprintf(stderr, format, ap);
+    fprintf(stderr, "\n");
+    va_end(ap);
+}
+
+
+static int
+webm_guess_framerate(struct input_ctx *input,
+                     unsigned int     *fps_den,
+                     unsigned int     *fps_num)
+{
+    unsigned int i;
+    uint64_t     tstamp=0;
+
+    /* Guess the framerate. Read up to 1 second, or 50 video packets,
+     * whichever comes first.
+     */
+    for(i=0; tstamp < 1000000000 && i < 50;)
+    {
+        nestegg_packet * pkt;
+        unsigned int track;
+
+        if(nestegg_read_packet(input->nestegg_ctx, &pkt) <= 0)
+            break;
+
+        nestegg_packet_track(pkt, &track);
+        if(track == input->video_track)
+        {
+            nestegg_packet_tstamp(pkt, &tstamp);
+            i++;
+        }
+
+        nestegg_free_packet(pkt);
+    }
+
+    if(nestegg_track_seek(input->nestegg_ctx, input->video_track, 0))
+        goto fail;
+
+    *fps_num = (i - 1) * 1000000;
+    *fps_den = tstamp / 1000;
+    return 0;
+fail:
+    input->nestegg_ctx = NULL;
+    rewind(input->infile);
+    return 1;
+}
+
+
+static int
+file_is_webm(struct input_ctx *input,
+             unsigned int     *fourcc,
+             unsigned int     *width,
+             unsigned int     *height,
+             unsigned int     *fps_den,
+             unsigned int     *fps_num)
+{
+    unsigned int i, n;
+    int          track_type = -1;
+    uint64_t     tstamp=0;
+
+    nestegg_io io = {nestegg_read_cb, nestegg_seek_cb, nestegg_tell_cb,
+                     input->infile};
+    nestegg_video_params params;
+    nestegg_packet * pkt;
+
+    if(nestegg_init(&input->nestegg_ctx, io, NULL))
+        goto fail;
+
+    if(nestegg_track_count(input->nestegg_ctx, &n))
+        goto fail;
+
+    for(i=0; i<n; i++)
+    {
+        track_type = nestegg_track_type(input->nestegg_ctx, i);
+
+        if(track_type == NESTEGG_TRACK_VIDEO)
+            break;
+        else if(track_type < 0)
+            goto fail;
+    }
+
+    if(nestegg_track_codec_id(input->nestegg_ctx, i) != NESTEGG_CODEC_VP8)
+    {
+        fprintf(stderr, "Not VP8 video, quitting.\n");
+        exit(1);
+    }
+
+    input->video_track = i;
+
+    if(nestegg_track_video_params(input->nestegg_ctx, i, &params))
+        goto fail;
+
+    *fps_den = 0;
+    *fps_num = 0;
+    *fourcc = VP8_FOURCC;
+    *width = params.width;
+    *height = params.height;
+    return 1;
+fail:
+    input->nestegg_ctx = NULL;
+    rewind(input->infile);
+    return 0;
+}
+
+
+void show_progress(int frame_in, int frame_out, unsigned long dx_time)
+{
+    fprintf(stderr, "%d decoded frames/%d showed frames in %lu us (%.2f fps)\r",
+            frame_in, frame_out, dx_time,
+            (float)frame_out * 1000000.0 / (float)dx_time);
+}
+
+
+void generate_filename(const char *pattern, char *out, size_t q_len,
+                       unsigned int d_w, unsigned int d_h,
+                       unsigned int frame_in)
+{
+    const char *p = pattern;
+    char *q = out;
+
+    do
+    {
+        char *next_pat = strchr(p, '%');
+
+        if(p == next_pat)
+        {
+            size_t pat_len;
+
+            // parse the pattern
+            q[q_len - 1] = '\0';
+            switch(p[1])
+            {
+            case 'w': snprintf(q, q_len - 1, "%d", d_w); break;
+            case 'h': snprintf(q, q_len - 1, "%d", d_h); break;
+            case '1': snprintf(q, q_len - 1, "%d", frame_in); break;
+            case '2': snprintf(q, q_len - 1, "%02d", frame_in); break;
+            case '3': snprintf(q, q_len - 1, "%03d", frame_in); break;
+            case '4': snprintf(q, q_len - 1, "%04d", frame_in); break;
+            case '5': snprintf(q, q_len - 1, "%05d", frame_in); break;
+            case '6': snprintf(q, q_len - 1, "%06d", frame_in); break;
+            case '7': snprintf(q, q_len - 1, "%07d", frame_in); break;
+            case '8': snprintf(q, q_len - 1, "%08d", frame_in); break;
+            case '9': snprintf(q, q_len - 1, "%09d", frame_in); break;
+            default:
+                die("Unrecognized pattern %%%c\n", p[1]);
+            }
+
+            pat_len = strlen(q);
+            if(pat_len >= q_len - 1)
+                die("Output filename too long.\n");
+            q += pat_len;
+            p += 2;
+            q_len -= pat_len;
+        }
+        else
+        {
+            size_t copy_len;
+
+            // copy the next segment
+            if(!next_pat)
+                copy_len = strlen(p);
+            else
+                copy_len = next_pat - p;
+
+            if(copy_len >= q_len - 1)
+                die("Output filename too long.\n");
+
+            memcpy(q, p, copy_len);
+            q[copy_len] = '\0';
+            q += copy_len;
+            p += copy_len;
+            q_len -= copy_len;
+        }
+    } while(*p);
+}
+
+
+int main(int argc, const char **argv_)
+{
+    vpx_codec_ctx_t          decoder;
+    char                  *fn = NULL;
+    int                    i;
+    uint8_t               *buf = NULL;
+    size_t                 buf_sz = 0, buf_alloc_sz = 0;
+    FILE                  *infile;
+    int                    frame_in = 0, frame_out = 0, flipuv = 0, noblit = 0, do_md5 = 0, progress = 0;
+    int                    stop_after = 0, postproc = 0, summary = 0, quiet = 1;
+    vpx_codec_iface_t       *iface = NULL;
+    unsigned int           fourcc;
+    unsigned long          dx_time = 0;
+    struct arg               arg;
+    char                   **argv, **argi, **argj;
+    const char             *outfile_pattern = 0;
+    char                    outfile[PATH_MAX];
+    int                     single_file;
+    int                     use_y4m = 1;
+    unsigned int            width;
+    unsigned int            height;
+    unsigned int            fps_den;
+    unsigned int            fps_num;
+    void                   *out = NULL;
+    vpx_codec_dec_cfg_t     cfg = {0};
+#if CONFIG_VP8_DECODER
+    vp8_postproc_cfg_t      vp8_pp_cfg = {0};
+#endif
+    struct input_ctx        input = {0};
+
+    /* Parse command line */
+    exec_name = argv_[0];
+    argv = argv_dup(argc - 1, argv_ + 1);
+
+    for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step)
+    {
+        memset(&arg, 0, sizeof(arg));
+        arg.argv_step = 1;
+
+        if (arg_match(&arg, &codecarg, argi))
+        {
+            int j, k = -1;
+
+            for (j = 0; j < sizeof(ifaces) / sizeof(ifaces[0]); j++)
+                if (!strcmp(ifaces[j].name, arg.val))
+                    k = j;
+
+            if (k >= 0)
+                iface = ifaces[k].iface;
+            else
+                die("Error: Unrecognized argument (%s) to --codec\n",
+                    arg.val);
+        }
+        else if (arg_match(&arg, &outputfile, argi))
+            outfile_pattern = arg.val;
+        else if (arg_match(&arg, &use_yv12, argi))
+        {
+            use_y4m = 0;
+            flipuv = 1;
+        }
+        else if (arg_match(&arg, &use_i420, argi))
+        {
+            use_y4m = 0;
+            flipuv = 0;
+        }
+        else if (arg_match(&arg, &flipuvarg, argi))
+            flipuv = 1;
+        else if (arg_match(&arg, &noblitarg, argi))
+            noblit = 1;
+        else if (arg_match(&arg, &progressarg, argi))
+            progress = 1;
+        else if (arg_match(&arg, &limitarg, argi))
+            stop_after = arg_parse_uint(&arg);
+        else if (arg_match(&arg, &postprocarg, argi))
+            postproc = 1;
+        else if (arg_match(&arg, &md5arg, argi))
+            do_md5 = 1;
+        else if (arg_match(&arg, &summaryarg, argi))
+            summary = 1;
+        else if (arg_match(&arg, &threadsarg, argi))
+            cfg.threads = arg_parse_uint(&arg);
+        else if (arg_match(&arg, &verbosearg, argi))
+            quiet = 0;
+
+#if CONFIG_VP8_DECODER
+        else if (arg_match(&arg, &addnoise_level, argi))
+        {
+            postproc = 1;
+            vp8_pp_cfg.post_proc_flag |= VP8_ADDNOISE;
+            vp8_pp_cfg.noise_level = arg_parse_uint(&arg);
+        }
+        else if (arg_match(&arg, &demacroblock_level, argi))
+        {
+            postproc = 1;
+            vp8_pp_cfg.post_proc_flag |= VP8_DEMACROBLOCK;
+            vp8_pp_cfg.deblocking_level = arg_parse_uint(&arg);
+        }
+        else if (arg_match(&arg, &deblock, argi))
+        {
+            postproc = 1;
+            vp8_pp_cfg.post_proc_flag |= VP8_DEBLOCK;
+        }
+        else if (arg_match(&arg, &pp_debug_info, argi))
+        {
+            unsigned int level = arg_parse_uint(&arg);
+
+            postproc = 1;
+            vp8_pp_cfg.post_proc_flag &= ~0x7;
+
+            if (level)
+                vp8_pp_cfg.post_proc_flag |= 8 << (level - 1);
+        }
+
+#endif
+        else
+            argj++;
+    }
+
+    /* Check for unrecognized options */
+    for (argi = argv; *argi; argi++)
+        if (argi[0][0] == '-' && strlen(argi[0]) > 1)
+            die("Error: Unrecognized option %s\n", *argi);
+
+    /* Handle non-option arguments */
+    fn = argv[0];
+
+    if (!fn)
+        usage_exit();
+
+    /* Open file */
+    infile = strcmp(fn, "-") ? fopen(fn, "rb") : stdin;
+
+    if (!infile)
+    {
+        fprintf(stderr, "Failed to open file '%s'",
+                strcmp(fn, "-") ? fn : "stdin");
+        return EXIT_FAILURE;
+    }
+
+    /* Make sure we don't dump to the terminal, unless forced to with -o - */
+    if(!outfile_pattern && isatty(fileno(stdout)) && !do_md5)
+    {
+        fprintf(stderr,
+                "Not dumping raw video to your terminal. Use '-o -' to "
+                "override.\n");
+        return EXIT_FAILURE;
+    }
+
+    input.infile = infile;
+    if(file_is_ivf(infile, &fourcc, &width, &height, &fps_den,
+                   &fps_num))
+        input.kind = IVF_FILE;
+    else if(file_is_webm(&input, &fourcc, &width, &height, &fps_den, &fps_num))
+        input.kind = WEBM_FILE;
+    else if(file_is_raw(infile, &fourcc, &width, &height, &fps_den, &fps_num))
+        input.kind = RAW_FILE;
+    else
+    {
+        fprintf(stderr, "Unrecognized input file type.\n");
+        return EXIT_FAILURE;
+    }
+
+    /* If the output file is not set or doesn't have a sequence number in
+     * it, then we only open it once.
+     */
+    outfile_pattern = outfile_pattern ? outfile_pattern : "-";
+    single_file = 1;
+    {
+        const char *p = outfile_pattern;
+        do
+        {
+            p = strchr(p, '%');
+            if(p && p[1] >= '1' && p[1] <= '9')
+            {
+                // pattern contains sequence number, so it's not unique.
+                single_file = 0;
+                break;
+            }
+            if(p)
+                p++;
+        } while(p);
+    }
+
+    if(single_file && !noblit)
+    {
+        generate_filename(outfile_pattern, outfile, sizeof(outfile)-1,
+                          width, height, 0);
+        out = out_open(outfile, do_md5);
+    }
+
+    if (use_y4m && !noblit)
+    {
+        char buffer[128];
+        if (!single_file)
+        {
+            fprintf(stderr, "YUV4MPEG2 not supported with output patterns,"
+                            " try --i420 or --yv12.\n");
+            return EXIT_FAILURE;
+        }
+
+        if(input.kind == WEBM_FILE)
+            webm_guess_framerate(&input, &fps_den, &fps_num);
+
+        /*Note: We can't output an aspect ratio here because IVF doesn't
+           store one, and neither does VP8.
+          That will have to wait until these tools support WebM natively.*/
+        sprintf(buffer, "YUV4MPEG2 C%s W%u H%u F%u:%u I%c\n",
+                "420jpeg", width, height, fps_num, fps_den, 'p');
+        out_put(out, (unsigned char *)buffer, strlen(buffer), do_md5);
+    }
+
+    /* Try to determine the codec from the fourcc. */
+    for (i = 0; i < sizeof(ifaces) / sizeof(ifaces[0]); i++)
+        if ((fourcc & ifaces[i].fourcc_mask) == ifaces[i].fourcc)
+        {
+            vpx_codec_iface_t  *ivf_iface = ifaces[i].iface;
+
+            if (iface && iface != ivf_iface)
+                fprintf(stderr, "Notice -- IVF header indicates codec: %s\n",
+                        ifaces[i].name);
+            else
+                iface = ivf_iface;
+
+            break;
+        }
+
+    if (vpx_codec_dec_init(&decoder, iface ? iface :  ifaces[0].iface, &cfg,
+                           postproc ? VPX_CODEC_USE_POSTPROC : 0))
+    {
+        fprintf(stderr, "Failed to initialize decoder: %s\n", vpx_codec_error(&decoder));
+        return EXIT_FAILURE;
+    }
+
+    if (!quiet)
+        fprintf(stderr, "%s\n", decoder.name);
+
+#if CONFIG_VP8_DECODER
+
+    if (vp8_pp_cfg.post_proc_flag
+        && vpx_codec_control(&decoder, VP8_SET_POSTPROC, &vp8_pp_cfg))
+    {
+        fprintf(stderr, "Failed to configure postproc: %s\n", vpx_codec_error(&decoder));
+        return EXIT_FAILURE;
+    }
+
+#endif
+
+    /* Decode file */
+    while (!read_frame(&input, &buf, &buf_sz, &buf_alloc_sz))
+    {
+        vpx_codec_iter_t  iter = NULL;
+        vpx_image_t    *img;
+        struct vpx_usec_timer timer;
+
+        vpx_usec_timer_start(&timer);
+
+        if (vpx_codec_decode(&decoder, buf, buf_sz, NULL, 0))
+        {
+            const char *detail = vpx_codec_error_detail(&decoder);
+            fprintf(stderr, "Failed to decode frame: %s\n", vpx_codec_error(&decoder));
+
+            if (detail)
+                fprintf(stderr, "  Additional information: %s\n", detail);
+
+            goto fail;
+        }
+
+        vpx_usec_timer_mark(&timer);
+        dx_time += vpx_usec_timer_elapsed(&timer);
+
+        ++frame_in;
+
+        if ((img = vpx_codec_get_frame(&decoder, &iter)))
+            ++frame_out;
+
+        if (progress)
+            show_progress(frame_in, frame_out, dx_time);
+
+        if (!noblit)
+        {
+            if (img)
+            {
+                unsigned int y;
+                char out_fn[PATH_MAX];
+                uint8_t *buf;
+
+                if (!single_file)
+                {
+                    size_t len = sizeof(out_fn)-1;
+
+                    out_fn[len] = '\0';
+                    generate_filename(outfile_pattern, out_fn, len-1,
+                                      img->d_w, img->d_h, frame_in);
+                    out = out_open(out_fn, do_md5);
+                }
+                else if(use_y4m)
+                    out_put(out, (unsigned char *)"FRAME\n", 6, do_md5);
+
+                buf = img->planes[VPX_PLANE_Y];
+
+                for (y = 0; y < img->d_h; y++)
+                {
+                    out_put(out, buf, img->d_w, do_md5);
+                    buf += img->stride[VPX_PLANE_Y];
+                }
+
+                buf = img->planes[flipuv?VPX_PLANE_V:VPX_PLANE_U];
+
+                for (y = 0; y < (1 + img->d_h) / 2; y++)
+                {
+                    out_put(out, buf, (1 + img->d_w) / 2, do_md5);
+                    buf += img->stride[VPX_PLANE_U];
+                }
+
+                buf = img->planes[flipuv?VPX_PLANE_U:VPX_PLANE_V];
+
+                for (y = 0; y < (1 + img->d_h) / 2; y++)
+                {
+                    out_put(out, buf, (1 + img->d_w) / 2, do_md5);
+                    buf += img->stride[VPX_PLANE_V];
+                }
+
+                if (!single_file)
+                    out_close(out, out_fn, do_md5);
+            }
+        }
+
+        if (stop_after && frame_in >= stop_after)
+            break;
+    }
+
+    if (summary || progress)
+    {
+        show_progress(frame_in, frame_out, dx_time);
+        fprintf(stderr, "\n");
+    }
+
+fail:
+
+    if (vpx_codec_destroy(&decoder))
+    {
+        fprintf(stderr, "Failed to destroy decoder: %s\n", vpx_codec_error(&decoder));
+        return EXIT_FAILURE;
+    }
+
+    if (single_file && !noblit)
+        out_close(out, outfile, do_md5);
+
+    if(input.nestegg_ctx)
+        nestegg_destroy(input.nestegg_ctx);
+    if(input.kind != WEBM_FILE)
+        free(buf);
+    fclose(infile);
+    free(argv);
+
+    return EXIT_SUCCESS;
+}
diff --git a/ivfenc.c b/vpxenc.c

similarity index 72%

rename from ivfenc.c

rename to vpxenc.c

index 3fea5b5ccc9e5b6eefa7359b5cf8664dc8540900..a17878217d3daddda67333dda0ac720e12eff646 100644 (file)
--- a/ivfenc.c
+++ b/vpxenc.c
@@ -22,6 +22,7 @@
  #include <stdlib.h>
  #include <stdarg.h>
  #include <string.h>
+#include <limits.h>
  #include "vpx/vpx_encoder.h"
  #if USE_POSIX_MMAP
  #include <sys/types.h>
@@ -30,10 +31,31 @@
  #include <fcntl.h>
  #include <unistd.h>
  #endif
+#include "vpx_version.h"
  #include "vpx/vp8cx.h"
  #include "vpx_ports/mem_ops.h"
  #include "vpx_ports/vpx_timer.h"
  #include "y4minput.h"
+#include "libmkv/EbmlWriter.h"
+#include "libmkv/EbmlIDs.h"
+
+/* Need special handling of these functions on Windows */
+#if defined(_MSC_VER)
+/* MSVS doesn't define off_t, and uses _f{seek,tell}i64 */
+typedef __int64 off_t;
+#define fseeko _fseeki64
+#define ftello _ftelli64
+#elif defined(_WIN32)
+/* MinGW defines off_t, and uses f{seek,tell}o64 */
+#define fseeko fseeko64
+#define ftello ftello64
+#endif
+
+#if defined(_MSC_VER)
+#define LITERALU64(n) n
+#else
+#define LITERALU64(n) n##LLU
+#endif
  
  static const char *exec_name;
  
@@ -395,8 +417,331 @@ static void write_ivf_frame_header(FILE *outfile,
      fwrite(header, 1, 12, outfile);
  }
  
+
+typedef off_t EbmlLoc;
+
+
+struct cue_entry
+{
+    unsigned int time;
+    uint64_t     loc;
+};
+
+
+struct EbmlGlobal
+{
+    FILE    *stream;
+    uint64_t last_pts_ms;
+    vpx_rational_t  framerate;
+
+    /* These pointers are to the start of an element */
+    off_t    position_reference;
+    off_t    seek_info_pos;
+    off_t    segment_info_pos;
+    off_t    track_pos;
+    off_t    cue_pos;
+    off_t    cluster_pos;
+
+    /* These pointers are to the size field of the element */
+    EbmlLoc  startSegment;
+    EbmlLoc  startCluster;
+
+    uint32_t cluster_timecode;
+    int      cluster_open;
+
+    struct cue_entry *cue_list;
+    unsigned int      cues;
+
+};
+
+
+void Ebml_Write(EbmlGlobal *glob, const void *buffer_in, unsigned long len)
+{
+    fwrite(buffer_in, 1, len, glob->stream);
+}
+
+
+void Ebml_Serialize(EbmlGlobal *glob, const void *buffer_in, unsigned long len)
+{
+    const unsigned char *q = (const unsigned char *)buffer_in + len - 1;
+
+    for(; len; len--)
+        Ebml_Write(glob, q--, 1);
+}
+
+
+static void
+Ebml_StartSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc,
+                          unsigned long class_id)
+{
+    //todo this is always taking 8 bytes, this may need later optimization
+    //this is a key that says lenght unknown
+    unsigned long long unknownLen =  LITERALU64(0x01FFFFFFFFFFFFFF);
+
+    Ebml_WriteID(glob, class_id);
+    *ebmlLoc = ftello(glob->stream);
+    Ebml_Serialize(glob, &unknownLen, 8);
+}
+
+static void
+Ebml_EndSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc)
+{
+    off_t pos;
+    uint64_t size;
+
+    /* Save the current stream pointer */
+    pos = ftello(glob->stream);
+
+    /* Calculate the size of this element */
+    size = pos - *ebmlLoc - 8;
+    size |=  LITERALU64(0x0100000000000000);
+
+    /* Seek back to the beginning of the element and write the new size */
+    fseeko(glob->stream, *ebmlLoc, SEEK_SET);
+    Ebml_Serialize(glob, &size, 8);
+
+    /* Reset the stream pointer */
+    fseeko(glob->stream, pos, SEEK_SET);
+}
+
+
+static void
+write_webm_seek_element(EbmlGlobal *ebml, unsigned long id, off_t pos)
+{
+    uint64_t offset = pos - ebml->position_reference;
+    EbmlLoc start;
+    Ebml_StartSubElement(ebml, &start, Seek);
+    Ebml_SerializeBinary(ebml, SeekID, id);
+    Ebml_SerializeUnsigned64(ebml, SeekPosition, offset);
+    Ebml_EndSubElement(ebml, &start);
+}
+
+
+static void
+write_webm_seek_info(EbmlGlobal *ebml)
+{
+
+    off_t pos;
+
+    /* Save the current stream pointer */
+    pos = ftello(ebml->stream);
+
+    if(ebml->seek_info_pos)
+        fseeko(ebml->stream, ebml->seek_info_pos, SEEK_SET);
+    else
+        ebml->seek_info_pos = pos;
+
+    {
+        EbmlLoc start;
+
+        Ebml_StartSubElement(ebml, &start, SeekHead);
+        write_webm_seek_element(ebml, Tracks, ebml->track_pos);
+        write_webm_seek_element(ebml, Cues,   ebml->cue_pos);
+        write_webm_seek_element(ebml, Info,   ebml->segment_info_pos);
+        Ebml_EndSubElement(ebml, &start);
+    }
+    {
+        //segment info
+        EbmlLoc startInfo;
+        uint64_t frame_time;
+
+        frame_time = (uint64_t)1000 * ebml->framerate.den
+                     / ebml->framerate.num;
+        ebml->segment_info_pos = ftello(ebml->stream);
+        Ebml_StartSubElement(ebml, &startInfo, Info);
+        Ebml_SerializeUnsigned(ebml, TimecodeScale, 1000000);
+        Ebml_SerializeFloat(ebml, Segment_Duration,
+                            ebml->last_pts_ms + frame_time);
+        Ebml_SerializeString(ebml, 0x4D80, "vpxenc" VERSION_STRING);
+        Ebml_SerializeString(ebml, 0x5741, "vpxenc" VERSION_STRING);
+        Ebml_EndSubElement(ebml, &startInfo);
+    }
+}
+
+
+static void
+write_webm_file_header(EbmlGlobal                *glob,
+                       const vpx_codec_enc_cfg_t *cfg,
+                       const struct vpx_rational *fps)
+{
+    {
+        EbmlLoc start;
+        Ebml_StartSubElement(glob, &start, EBML);
+        Ebml_SerializeUnsigned(glob, EBMLVersion, 1);
+        Ebml_SerializeUnsigned(glob, EBMLReadVersion, 1); //EBML Read Version
+        Ebml_SerializeUnsigned(glob, EBMLMaxIDLength, 4); //EBML Max ID Length
+        Ebml_SerializeUnsigned(glob, EBMLMaxSizeLength, 8); //EBML Max Size Length
+        Ebml_SerializeString(glob, DocType, "webm"); //Doc Type
+        Ebml_SerializeUnsigned(glob, DocTypeVersion, 2); //Doc Type Version
+        Ebml_SerializeUnsigned(glob, DocTypeReadVersion, 2); //Doc Type Read Version
+        Ebml_EndSubElement(glob, &start);
+    }
+    {
+        Ebml_StartSubElement(glob, &glob->startSegment, Segment); //segment
+        glob->position_reference = ftello(glob->stream);
+        glob->framerate = *fps;
+        write_webm_seek_info(glob);
+
+        {
+            EbmlLoc trackStart;
+            glob->track_pos = ftello(glob->stream);
+            Ebml_StartSubElement(glob, &trackStart, Tracks);
+            {
+                unsigned int trackNumber = 1;
+                uint64_t     trackID = 0;
+
+                EbmlLoc start;
+                Ebml_StartSubElement(glob, &start, TrackEntry);
+                Ebml_SerializeUnsigned(glob, TrackNumber, trackNumber);
+                Ebml_SerializeUnsigned(glob, TrackUID, trackID);
+                Ebml_SerializeUnsigned(glob, TrackType, 1); //video is always 1
+                Ebml_SerializeString(glob, CodecID, "V_VP8");
+                {
+                    unsigned int pixelWidth = cfg->g_w;
+                    unsigned int pixelHeight = cfg->g_h;
+                    float        frameRate   = (float)fps->num/(float)fps->den;
+
+                    EbmlLoc videoStart;
+                    Ebml_StartSubElement(glob, &videoStart, Video);
+                    Ebml_SerializeUnsigned(glob, PixelWidth, pixelWidth);
+                    Ebml_SerializeUnsigned(glob, PixelHeight, pixelHeight);
+                    Ebml_SerializeFloat(glob, FrameRate, frameRate);
+                    Ebml_EndSubElement(glob, &videoStart); //Video
+                }
+                Ebml_EndSubElement(glob, &start); //Track Entry
+            }
+            Ebml_EndSubElement(glob, &trackStart);
+        }
+        // segment element is open
+    }
+}
+
+
+static void
+write_webm_block(EbmlGlobal                *glob,
+                 const vpx_codec_enc_cfg_t *cfg,
+                 const vpx_codec_cx_pkt_t  *pkt)
+{
+    unsigned long  block_length;
+    unsigned char  track_number;
+    unsigned short block_timecode = 0;
+    unsigned char  flags;
+    uint64_t       pts_ms;
+    int            start_cluster = 0, is_keyframe;
+
+    /* Calculate the PTS of this frame in milliseconds */
+    pts_ms = pkt->data.frame.pts * 1000
+             * (uint64_t)cfg->g_timebase.num / (uint64_t)cfg->g_timebase.den;
+    if(pts_ms <= glob->last_pts_ms)
+        pts_ms = glob->last_pts_ms + 1;
+    glob->last_pts_ms = pts_ms;
+
+    /* Calculate the relative time of this block */
+    if(pts_ms - glob->cluster_timecode > SHRT_MAX)
+        start_cluster = 1;
+    else
+        block_timecode = pts_ms - glob->cluster_timecode;
+
+    is_keyframe = (pkt->data.frame.flags & VPX_FRAME_IS_KEY);
+    if(start_cluster || is_keyframe)
+    {
+        if(glob->cluster_open)
+            Ebml_EndSubElement(glob, &glob->startCluster);
+
+        /* Open the new cluster */
+        block_timecode = 0;
+        glob->cluster_open = 1;
+        glob->cluster_timecode = pts_ms;
+        glob->cluster_pos = ftello(glob->stream);
+        Ebml_StartSubElement(glob, &glob->startCluster, Cluster); //cluster
+        Ebml_SerializeUnsigned(glob, Timecode, glob->cluster_timecode);
+
+        /* Save a cue point if this is a keyframe. */
+        if(is_keyframe)
+        {
+            struct cue_entry *cue;
+
+            glob->cue_list = realloc(glob->cue_list,
+                                     (glob->cues+1) * sizeof(struct cue_entry));
+            cue = &glob->cue_list[glob->cues];
+            cue->time = glob->cluster_timecode;
+            cue->loc = glob->cluster_pos;
+            glob->cues++;
+        }
+    }
+
+    /* Write the Simple Block */
+    Ebml_WriteID(glob, SimpleBlock);
+
+    block_length = pkt->data.frame.sz + 4;
+    block_length |= 0x10000000;
+    Ebml_Serialize(glob, &block_length, 4);
+
+    track_number = 1;
+    track_number |= 0x80;
+    Ebml_Write(glob, &track_number, 1);
+
+    Ebml_Serialize(glob, &block_timecode, 2);
+
+    flags = 0;
+    if(is_keyframe)
+        flags |= 0x80;
+    if(pkt->data.frame.flags & VPX_FRAME_IS_INVISIBLE)
+        flags |= 0x08;
+    Ebml_Write(glob, &flags, 1);
+
+    Ebml_Write(glob, pkt->data.frame.buf, pkt->data.frame.sz);
+}
+
+
+static void
+write_webm_file_footer(EbmlGlobal *glob)
+{
+
+    if(glob->cluster_open)
+        Ebml_EndSubElement(glob, &glob->startCluster);
+
+    {
+        EbmlLoc start;
+        int i;
+
+        glob->cue_pos = ftello(glob->stream);
+        Ebml_StartSubElement(glob, &start, Cues);
+        for(i=0; i<glob->cues; i++)
+        {
+            struct cue_entry *cue = &glob->cue_list[i];
+            EbmlLoc start;
+
+            Ebml_StartSubElement(glob, &start, CuePoint);
+            {
+                EbmlLoc start;
+
+                Ebml_SerializeUnsigned(glob, CueTime, cue->time);
+
+                Ebml_StartSubElement(glob, &start, CueTrackPositions);
+                Ebml_SerializeUnsigned(glob, CueTrack, 1);
+                Ebml_SerializeUnsigned64(glob, CueClusterPosition,
+                                         cue->loc - glob->position_reference);
+                //Ebml_SerializeUnsigned(glob, CueBlockNumber, cue->blockNumber);
+                Ebml_EndSubElement(glob, &start);
+            }
+            Ebml_EndSubElement(glob, &start);
+        }
+        Ebml_EndSubElement(glob, &start);
+    }
+
+    Ebml_EndSubElement(glob, &glob->startSegment);
+
+    /* Patch up the seek info block */
+    write_webm_seek_info(glob);
+    fseeko(glob->stream, 0, SEEK_END);
+}
+
+
  #include "args.h"
  
+static const arg_def_t outputfile = ARG_DEF("o", "output", 1,
+        "Output filename");
  static const arg_def_t use_yv12 = ARG_DEF(NULL, "yv12", 0,
                                    "Input file is YV12 ");
  static const arg_def_t use_i420 = ARG_DEF(NULL, "i420", 0,
@@ -423,10 +768,15 @@ static const arg_def_t verbosearg       = ARG_DEF("v", "verbose", 0,
          "Show encoder parameters");
  static const arg_def_t psnrarg          = ARG_DEF(NULL, "psnr", 0,
          "Show PSNR in status line");
+static const arg_def_t framerate        = ARG_DEF(NULL, "fps", 1,
+        "Stream frame rate (rate/scale)");
+static const arg_def_t use_ivf          = ARG_DEF(NULL, "ivf", 0,
+        "Output IVF (default is WebM)");
  static const arg_def_t *main_args[] =
  {
-    &codecarg, &passes, &pass_arg, &fpf_name, &limit, &deadline, &best_dl, &good_dl, &rt_dl,
-    &verbosearg, &psnrarg,
+    &outputfile, &codecarg, &passes, &pass_arg, &fpf_name, &limit, &deadline,
+    &best_dl, &good_dl, &rt_dl,
+    &verbosearg, &psnrarg, &use_ivf, &framerate,
      NULL
  };
  
@@ -450,7 +800,7 @@ static const arg_def_t lag_in_frames    = ARG_DEF(NULL, "lag-in-frames", 1,
  static const arg_def_t *global_args[] =
  {
      &use_yv12, &use_i420, &usage, &threads, &profile,
-    &width, &height, &timebase, &error_resilient,
+    &width, &height, &timebase, &framerate, &error_resilient,
      &lag_in_frames, NULL
  };
  
@@ -560,7 +910,8 @@ static void usage_exit()
  {
      int i;
  
-    fprintf(stderr, "Usage: %s <options> src_filename dst_filename\n", exec_name);
+    fprintf(stderr, "Usage: %s <options> -o dst_filename src_filename \n",
+            exec_name);
  
      fprintf(stderr, "\nOptions:\n");
      arg_show_usage(stdout, main_args);
@@ -614,10 +965,13 @@ int main(int argc, const char **argv_)
      static const int        *ctrl_args_map = NULL;
      int                      verbose = 0, show_psnr = 0;
      int                      arg_use_i420 = 1;
-    int                      arg_have_timebase = 0;
      unsigned long            cx_time = 0;
      unsigned int             file_type, fourcc;
      y4m_input                y4m;
+    struct vpx_rational      arg_framerate = {30, 1};
+    int                      arg_have_framerate = 0;
+    int                      write_webm = 1;
+    EbmlGlobal               ebml = {0};
  
      exec_name = argv_[0];
  
@@ -689,6 +1043,15 @@ int main(int argc, const char **argv_)
              arg_limit = arg_parse_uint(&arg);
          else if (arg_match(&arg, &psnrarg, argi))
              show_psnr = 1;
+        else if (arg_match(&arg, &framerate, argi))
+        {
+            arg_framerate = arg_parse_rational(&arg);
+            arg_have_framerate = 1;
+        }
+        else if (arg_match(&arg, &use_ivf, argi))
+            write_webm = 0;
+        else if (arg_match(&arg, &outputfile, argi))
+            out_fn = arg.val;
          else
              argj++;
      }
@@ -720,6 +1083,11 @@ int main(int argc, const char **argv_)
          return EXIT_FAILURE;
      }
  
+    /* Change the default timebase to a high enough value so that the encoder
+     * will always create strictly increasing timestamps.
+     */
+    cfg.g_timebase.den = 1000;
+
      /* Now parse the remainder of the parameters. */
      for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step)
      {
@@ -735,10 +1103,7 @@ int main(int argc, const char **argv_)
          else if (arg_match(&arg, &height, argi))
              cfg.g_h = arg_parse_uint(&arg);
          else if (arg_match(&arg, &timebase, argi))
-        {
              cfg.g_timebase = arg_parse_rational(&arg);
-            arg_have_timebase = 1;
-        }
          else if (arg_match(&arg, &error_resilient, argi))
              cfg.g_error_resilient = arg_parse_uint(&arg);
          else if (arg_match(&arg, &lag_in_frames, argi))
@@ -851,11 +1216,13 @@ int main(int argc, const char **argv_)
  
      /* Handle non-option arguments */
      in_fn = argv[0];
-    out_fn = argv[1];
  
-    if (!in_fn || !out_fn)
+    if (!in_fn)
          usage_exit();
  
+    if(!out_fn)
+        die("Error: Output file is required (specify with -o)\n");
+
      memset(&stats, 0, sizeof(stats));
  
      for (pass = one_pass_only ? one_pass_only - 1 : 0; pass < arg_passes; pass++)
@@ -883,16 +1250,16 @@ int main(int argc, const char **argv_)
                  file_type = FILE_TYPE_Y4M;
                  cfg.g_w = y4m.pic_w;
                  cfg.g_h = y4m.pic_h;
+
                  /* Use the frame rate from the file only if none was specified
                   * on the command-line.
                   */
-                if (!arg_have_timebase)
+                if (!arg_have_framerate)
                  {
-                    cfg.g_timebase.num = y4m.fps_d;
-                    cfg.g_timebase.den = y4m.fps_n;
-                    /* And don't reset it in the second pass.*/
-                    arg_have_timebase = 1;
+                    arg_framerate.num = y4m.fps_n;
+                    arg_framerate.den = y4m.fps_d;
                  }
+
                  arg_use_i420 = 0;
              }
              else
@@ -972,13 +1339,6 @@ int main(int argc, const char **argv_)
              else
                  vpx_img_alloc(&raw, arg_use_i420 ? VPX_IMG_FMT_I420 : VPX_IMG_FMT_YV12,
                                cfg.g_w, cfg.g_h, 1);
-
-            // This was added so that ivfenc will create monotically increasing
-            // timestamps.  Since we create new timestamps for alt-reference frames
-            // we need to make room in the series of timestamps.  Since there can
-            // only be 1 alt-ref frame ( current bitstream) multiplying by 2
-            // gives us enough room.
-            cfg.g_timebase.den *= 2;
          }
  
          outfile = strcmp(out_fn, "-") ? fopen(out_fn, "wb") : stdout;
@@ -989,6 +1349,12 @@ int main(int argc, const char **argv_)
              return EXIT_FAILURE;
          }
  
+        if(write_webm && fseek(outfile, 0, SEEK_CUR))
+        {
+            fprintf(stderr, "WebM output to pipes not supported.\n");
+            return EXIT_FAILURE;
+        }
+
          if (stats_fn)
          {
              if (!stats_open_file(&stats, stats_fn, pass))
@@ -1018,7 +1384,13 @@ int main(int argc, const char **argv_)
  
  #endif
  
-        write_ivf_file_header(outfile, &cfg, codec->fourcc, 0);
+        if(write_webm)
+        {
+            ebml.stream = outfile;
+            write_webm_file_header(&ebml, &cfg, &arg_framerate);
+        }
+        else
+            write_ivf_file_header(outfile, &cfg, codec->fourcc, 0);
  
  
          /* Construct Encoder Context */
@@ -1047,6 +1419,7 @@ int main(int argc, const char **argv_)
              vpx_codec_iter_t iter = NULL;
              const vpx_codec_cx_pkt_t *pkt;
              struct vpx_usec_timer timer;
+            int64_t frame_start;
  
              if (!arg_limit || frames_in < arg_limit)
              {
@@ -1065,10 +1438,12 @@ int main(int argc, const char **argv_)
  
              vpx_usec_timer_start(&timer);
  
-            // since we halved our timebase we need to double the timestamps
-            // and duration we pass in.
-            vpx_codec_encode(&encoder, frame_avail ? &raw : NULL, (frames_in - 1) * 2,
-                             2, 0, arg_deadline);
+            frame_start = (cfg.g_timebase.den * (int64_t)(frames_in - 1)
+                          * arg_framerate.den) / cfg.g_timebase.num / arg_framerate.num;
+            vpx_codec_encode(&encoder, frame_avail ? &raw : NULL, frame_start,
+                             cfg.g_timebase.den * arg_framerate.den
+                             / cfg.g_timebase.num / arg_framerate.num,
+                             0, arg_deadline);
              vpx_usec_timer_mark(&timer);
              cx_time += vpx_usec_timer_elapsed(&timer);
              ctx_exit_on_error(&encoder, "Failed to encode frame");
@@ -1084,8 +1459,15 @@ int main(int argc, const char **argv_)
                      frames_out++;
                      fprintf(stderr, " %6luF",
                              (unsigned long)pkt->data.frame.sz);
-                    write_ivf_frame_header(outfile, pkt);
-                    fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile);
+                    if(write_webm)
+                    {
+                        write_webm_block(&ebml, &cfg, pkt);
+                    }
+                    else
+                    {
+                        write_ivf_frame_header(outfile, pkt);
+                        fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile);
+                    }
                      nbytes += pkt->data.raw.sz;
                      break;
                  case VPX_CODEC_STATS_PKT:
@@ -1116,14 +1498,11 @@ int main(int argc, const char **argv_)
              fflush(stdout);
          }
  
-        /* this bitrate calc is simplified and relies on the fact that this
-         * application uses 1/timebase for framerate.
-         */
          fprintf(stderr,
                 "\rPass %d/%d frame %4d/%-4d %7ldB %7ldb/f %7"PRId64"b/s"
                 " %7lu %s (%.2f fps)\033[K", pass + 1,
                 arg_passes, frames_in, frames_out, nbytes, nbytes * 8 / frames_in,
-               nbytes * 8 *(int64_t)cfg.g_timebase.den/2/ cfg.g_timebase.num / frames_in,
+               nbytes * 8 *(int64_t)arg_framerate.num / arg_framerate.den / frames_in,
                 cx_time > 9999999 ? cx_time / 1000 : cx_time,
                 cx_time > 9999999 ? "ms" : "us",
                 (float)frames_in * 1000000.0 / (float)cx_time);
@@ -1132,8 +1511,15 @@ int main(int argc, const char **argv_)
  
          fclose(infile);
  
-        if (!fseek(outfile, 0, SEEK_SET))
-            write_ivf_file_header(outfile, &cfg, codec->fourcc, frames_out);
+        if(write_webm)
+        {
+            write_webm_file_footer(&ebml);
+        }
+        else
+        {
+            if (!fseek(outfile, 0, SEEK_SET))
+                write_ivf_file_header(outfile, &cfg, codec->fourcc, frames_out);
+        }
  
          fclose(outfile);
          stats_close(&stats);
author	Jim Bankoski <jimbankoski@google.com>
	Tue, 26 Oct 2010 11:34:57 +0000 (07:34 -0400)
committer	Jim Bankoski <jimbankoski@google.com>
	Tue, 26 Oct 2010 11:34:57 +0000 (07:34 -0400)
args.c		patch \| blob \| history
build/make/Makefile		patch \| blob \| history
build/make/configure.sh		patch \| blob \| history
configure		patch \| blob \| history
examples.mk		patch \| blob \| history
ivfdec.c	[deleted file]	patch \| blob \| history
libmkv/EbmlBufferWriter.c	[new file with mode: 0644]	patch \| blob
libmkv/EbmlBufferWriter.h	[new file with mode: 0644]	patch \| blob
libmkv/EbmlIDs.h	[new file with mode: 0644]	patch \| blob
libmkv/EbmlWriter.c	[new file with mode: 0644]	patch \| blob
libmkv/EbmlWriter.h	[new file with mode: 0644]	patch \| blob
libmkv/Makefile	[new file with mode: 0644]	patch \| blob
libmkv/WebMElement.c	[new file with mode: 0644]	patch \| blob
libmkv/WebMElement.h	[new file with mode: 0644]	patch \| blob
libmkv/testlibmkv.c	[new file with mode: 0644]	patch \| blob
libs.mk		patch \| blob \| history
nestegg/.gitignore	[new file with mode: 0644]	patch \| blob
nestegg/AUTHORS	[new file with mode: 0644]	patch \| blob
nestegg/INSTALL	[new file with mode: 0644]	patch \| blob
nestegg/LICENSE	[new file with mode: 0644]	patch \| blob
nestegg/Makefile.am	[new file with mode: 0644]	patch \| blob
nestegg/README	[new file with mode: 0644]	patch \| blob
nestegg/TODO	[new file with mode: 0644]	patch \| blob
nestegg/configure.ac	[new file with mode: 0644]	patch \| blob
nestegg/docs/Doxyfile.in	[new file with mode: 0644]	patch \| blob
nestegg/docs/Makefile.am	[new file with mode: 0644]	patch \| blob
nestegg/halloc/README	[new file with mode: 0644]	patch \| blob
nestegg/halloc/halloc.h	[new file with mode: 0644]	patch \| blob
nestegg/halloc/src/align.h	[new file with mode: 0644]	patch \| blob
nestegg/halloc/src/halloc.c	[new file with mode: 0644]	patch \| blob
nestegg/halloc/src/hlist.h	[new file with mode: 0644]	patch \| blob
nestegg/halloc/src/macros.h	[new file with mode: 0644]	patch \| blob
nestegg/include/nestegg/nestegg.h	[new file with mode: 0644]	patch \| blob
nestegg/m4/as-ac-expand.m4	[new file with mode: 0644]	patch \| blob
nestegg/m4/ax_create_stdint_h.m4	[new file with mode: 0644]	patch \| blob
nestegg/m4/pkg.m4	[new file with mode: 0644]	patch \| blob
nestegg/nestegg-uninstalled.pc.in	[new file with mode: 0644]	patch \| blob
nestegg/nestegg.pc.in	[new file with mode: 0644]	patch \| blob
nestegg/src/nestegg.c	[new file with mode: 0644]	patch \| blob
nestegg/test/test.c	[new file with mode: 0644]	patch \| blob
release.sh	[deleted file]	patch \| blob \| history
solution.mk		patch \| blob \| history
vp8/common/arm/arm_systemdependent.c	[new file with mode: 0644]	patch \| blob
vp8/common/arm/idct_arm.h		patch \| blob \| history
vp8/common/arm/loopfilter_arm.h		patch \| blob \| history
vp8/common/arm/neon/loopfilter_neon.asm	[new file with mode: 0644]	patch \| blob
vp8/common/arm/neon/loopfilterhorizontaledge_uv_neon.asm	[deleted file]	patch \| blob \| history
vp8/common/arm/neon/loopfilterhorizontaledge_y_neon.asm	[deleted file]	patch \| blob \| history
vp8/common/arm/neon/loopfilterverticaledge_uv_neon.asm	[deleted file]	patch \| blob \| history
vp8/common/arm/neon/loopfilterverticaledge_y_neon.asm	[deleted file]	patch \| blob \| history
vp8/common/arm/neon/mbloopfilter_neon.asm	[new file with mode: 0644]	patch \| blob
vp8/common/arm/neon/mbloopfilterhorizontaledge_uv_neon.asm	[deleted file]	patch \| blob \| history
vp8/common/arm/neon/mbloopfilterhorizontaledge_y_neon.asm	[deleted file]	patch \| blob \| history
vp8/common/arm/neon/mbloopfilterverticaledge_uv_neon.asm	[deleted file]	patch \| blob \| history
vp8/common/arm/neon/mbloopfilterverticaledge_y_neon.asm	[deleted file]	patch \| blob \| history
vp8/common/arm/recon_arm.h		patch \| blob \| history
vp8/common/arm/subpixel_arm.h		patch \| blob \| history
vp8/common/arm/systemdependent.c	[deleted file]	patch \| blob \| history
vp8/common/blockd.h		patch \| blob \| history
vp8/common/generic/systemdependent.c		patch \| blob \| history
vp8/common/onyxc_int.h		patch \| blob \| history
vp8/common/postproc.c		patch \| blob \| history
vp8/common/ppflags.h		patch \| blob \| history
vp8/common/reconinter.c		patch \| blob \| history
vp8/common/textblit.c		patch \| blob \| history
vp8/decoder/arm/arm_dsystemdependent.c	[new file with mode: 0644]	patch \| blob
vp8/decoder/arm/dequantize_arm.c		patch \| blob \| history
vp8/decoder/arm/dequantize_arm.h		patch \| blob \| history
vp8/decoder/arm/dsystemdependent.c	[deleted file]	patch \| blob \| history
vp8/decoder/decodframe.c		patch \| blob \| history
vp8/decoder/dequantize.c		patch \| blob \| history
vp8/decoder/generic/dsystemdependent.c		patch \| blob \| history
vp8/decoder/onyxd_if.c		patch \| blob \| history
vp8/decoder/threading.c		patch \| blob \| history
vp8/encoder/arm/arm_csystemdependent.c	[new file with mode: 0644]	patch \| blob
vp8/encoder/arm/armv5te/boolhuff_armv5te.asm	[moved from vp8/encoder/arm/neon/boolhuff_armv7.asm with 95% similarity]	patch \| blob \| history
vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm	[moved from vp8/encoder/arm/neon/vp8_packtokens_armv7.asm with 93% similarity]	patch \| blob \| history
vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm	[moved from vp8/encoder/arm/neon/vp8_packtokens_mbrow_armv7.asm with 94% similarity]	patch \| blob \| history
vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm	[moved from vp8/encoder/arm/neon/vp8_packtokens_partitions_armv7.asm with 95% similarity]	patch \| blob \| history
vp8/encoder/arm/csystemdependent.c	[deleted file]	patch \| blob \| history
vp8/encoder/arm/dct_arm.h		patch \| blob \| history
vp8/encoder/arm/encodemb_arm.h		patch \| blob \| history
vp8/encoder/arm/mcomp_arm.c		patch \| blob \| history
vp8/encoder/arm/quantize_arm.c		patch \| blob \| history
vp8/encoder/arm/variance_arm.h		patch \| blob \| history
vp8/encoder/bitstream.h		patch \| blob \| history
vp8/encoder/block.h		patch \| blob \| history
vp8/encoder/encodeframe.c		patch \| blob \| history
vp8/encoder/encodeintra.c		patch \| blob \| history
vp8/encoder/encodemb.c		patch \| blob \| history
vp8/encoder/firstpass.c		patch \| blob \| history
vp8/encoder/firstpass.h		patch \| blob \| history
vp8/encoder/generic/csystemdependent.c		patch \| blob \| history
vp8/encoder/mcomp.c		patch \| blob \| history
vp8/encoder/onyx_if.c		patch \| blob \| history
vp8/encoder/onyx_int.h		patch \| blob \| history
vp8/encoder/picklpf.c		patch \| blob \| history
vp8/encoder/quantize.c		patch \| blob \| history
vp8/encoder/temporal_filter.c	[new file with mode: 0644]	patch \| blob
vp8/encoder/temporal_filter.h	[new file with mode: 0644]	patch \| blob
vp8/encoder/x86/encodemb_x86.h		patch \| blob \| history
vp8/encoder/x86/fwalsh_sse2.asm		patch \| blob \| history
vp8/encoder/x86/quantize_sse2.asm		patch \| blob \| history
vp8/encoder/x86/sad_mmx.asm		patch \| blob \| history
vp8/encoder/x86/sad_sse2.asm		patch \| blob \| history
vp8/encoder/x86/sad_sse3.asm		patch \| blob \| history
vp8/encoder/x86/sad_ssse3.asm		patch \| blob \| history
vp8/encoder/x86/subtract_mmx.asm		patch \| blob \| history
vp8/encoder/x86/subtract_sse2.asm	[new file with mode: 0644]	patch \| blob
vp8/encoder/x86/x86_csystemdependent.c		patch \| blob \| history
vp8/vp8_common.mk		patch \| blob \| history
vp8/vp8_cx_iface.c		patch \| blob \| history
vp8/vp8cx.mk		patch \| blob \| history
vp8/vp8cx_arm.mk		patch \| blob \| history
vp8/vp8dx_arm.mk		patch \| blob \| history
vpx_ports/arm.h	[new file with mode: 0644]	patch \| blob
vpx_ports/arm_cpudetect.c	[new file with mode: 0644]	patch \| blob
vpx_scale/arm/scalesystemdependant.c		patch \| blob \| history
vpxdec.c	[new file with mode: 0644]	patch \| blob
vpxenc.c	[moved from ivfenc.c with 72% similarity]	patch \| blob \| history