]> granicus.if.org Git - php/commitdiff
Bundle pcre-8.35 with sljit support
authorDmitry Stogov <dmitry@zend.com>
Fri, 6 Jun 2014 14:46:19 +0000 (18:46 +0400)
committerDmitry Stogov <dmitry@zend.com>
Fri, 6 Jun 2014 14:46:19 +0000 (18:46 +0400)
93 files changed:
ext/pcre/config.w32
ext/pcre/config0.m4
ext/pcre/pcrelib/AUTHORS
ext/pcre/pcrelib/ChangeLog
ext/pcre/pcrelib/LICENCE
ext/pcre/pcrelib/NEWS
ext/pcre/pcrelib/README
ext/pcre/pcrelib/config.h
ext/pcre/pcrelib/dftables.c
ext/pcre/pcrelib/doc/pcre.txt
ext/pcre/pcrelib/pcre.h
ext/pcre/pcrelib/pcre_chartables.c
ext/pcre/pcrelib/pcre_compile.c
ext/pcre/pcrelib/pcre_config.c
ext/pcre/pcrelib/pcre_exec.c
ext/pcre/pcrelib/pcre_fullinfo.c
ext/pcre/pcrelib/pcre_get.c
ext/pcre/pcrelib/pcre_globals.c
ext/pcre/pcrelib/pcre_internal.h
ext/pcre/pcrelib/pcre_jit_compile.c [new file with mode: 0644]
ext/pcre/pcrelib/pcre_maketables.c
ext/pcre/pcrelib/pcre_newline.c
ext/pcre/pcrelib/pcre_ord2utf8.c
ext/pcre/pcrelib/pcre_printint.c [new file with mode: 0644]
ext/pcre/pcrelib/pcre_printint.src [deleted file]
ext/pcre/pcrelib/pcre_refcount.c
ext/pcre/pcrelib/pcre_study.c
ext/pcre/pcrelib/pcre_tables.c
ext/pcre/pcrelib/pcre_ucd.c
ext/pcre/pcrelib/pcre_valid_utf8.c
ext/pcre/pcrelib/pcre_version.c
ext/pcre/pcrelib/pcre_xclass.c
ext/pcre/pcrelib/pcreposix.c
ext/pcre/pcrelib/sljit/sljitConfig.h [new file with mode: 0644]
ext/pcre/pcrelib/sljit/sljitConfigInternal.h [new file with mode: 0644]
ext/pcre/pcrelib/sljit/sljitExecAllocator.c [new file with mode: 0644]
ext/pcre/pcrelib/sljit/sljitLir.c [new file with mode: 0644]
ext/pcre/pcrelib/sljit/sljitLir.h [new file with mode: 0644]
ext/pcre/pcrelib/sljit/sljitNativeARM_32.c [new file with mode: 0644]
ext/pcre/pcrelib/sljit/sljitNativeARM_64.c [new file with mode: 0644]
ext/pcre/pcrelib/sljit/sljitNativeARM_T2_32.c [new file with mode: 0644]
ext/pcre/pcrelib/sljit/sljitNativeMIPS_32.c [new file with mode: 0644]
ext/pcre/pcrelib/sljit/sljitNativeMIPS_64.c [new file with mode: 0644]
ext/pcre/pcrelib/sljit/sljitNativeMIPS_common.c [new file with mode: 0644]
ext/pcre/pcrelib/sljit/sljitNativePPC_32.c [new file with mode: 0644]
ext/pcre/pcrelib/sljit/sljitNativePPC_64.c [new file with mode: 0644]
ext/pcre/pcrelib/sljit/sljitNativePPC_common.c [new file with mode: 0644]
ext/pcre/pcrelib/sljit/sljitNativeSPARC_32.c [new file with mode: 0644]
ext/pcre/pcrelib/sljit/sljitNativeSPARC_common.c [new file with mode: 0644]
ext/pcre/pcrelib/sljit/sljitNativeTILEGX-encoder.c [new file with mode: 0644]
ext/pcre/pcrelib/sljit/sljitNativeTILEGX_64.c [new file with mode: 0644]
ext/pcre/pcrelib/sljit/sljitNativeX86_32.c [new file with mode: 0644]
ext/pcre/pcrelib/sljit/sljitNativeX86_64.c [new file with mode: 0644]
ext/pcre/pcrelib/sljit/sljitNativeX86_common.c [new file with mode: 0644]
ext/pcre/pcrelib/sljit/sljitUtils.c [new file with mode: 0644]
ext/pcre/pcrelib/testdata/greppatN4
ext/pcre/pcrelib/testdata/saved16BE-1
ext/pcre/pcrelib/testdata/saved16LE-1
ext/pcre/pcrelib/testdata/saved32BE-1
ext/pcre/pcrelib/testdata/saved32LE-1
ext/pcre/pcrelib/testdata/testinput18
ext/pcre/pcrelib/testdata/testinput2
ext/pcre/pcrelib/testdata/testinput25
ext/pcre/pcrelib/testdata/testinput3
ext/pcre/pcrelib/testdata/testinput4
ext/pcre/pcrelib/testdata/testinput5
ext/pcre/pcrelib/testdata/testinput6
ext/pcre/pcrelib/testdata/testinput7
ext/pcre/pcrelib/testdata/testoutput12
ext/pcre/pcrelib/testdata/testoutput13
ext/pcre/pcrelib/testdata/testoutput14
ext/pcre/pcrelib/testdata/testoutput15
ext/pcre/pcrelib/testdata/testoutput16
ext/pcre/pcrelib/testdata/testoutput17
ext/pcre/pcrelib/testdata/testoutput18-16
ext/pcre/pcrelib/testdata/testoutput18-32
ext/pcre/pcrelib/testdata/testoutput19
ext/pcre/pcrelib/testdata/testoutput2
ext/pcre/pcrelib/testdata/testoutput21-16
ext/pcre/pcrelib/testdata/testoutput21-32
ext/pcre/pcrelib/testdata/testoutput22-16
ext/pcre/pcrelib/testdata/testoutput22-32
ext/pcre/pcrelib/testdata/testoutput23
ext/pcre/pcrelib/testdata/testoutput25
ext/pcre/pcrelib/testdata/testoutput3
ext/pcre/pcrelib/testdata/testoutput4
ext/pcre/pcrelib/testdata/testoutput5
ext/pcre/pcrelib/testdata/testoutput6
ext/pcre/pcrelib/testdata/testoutput7
ext/pcre/pcrelib/testdata/testoutput8
ext/pcre/pcrelib/testdata/wintestoutput3
ext/pcre/tests/006.phpt
ext/pcre/tests/recursion_limit.phpt

index 8279f0a3318c1dda31bf78ffade5bfd3b265347e..594b1cb474907aea0bd1dab5e644f1dd89e42b9a 100644 (file)
@@ -3,7 +3,7 @@
 
 EXTENSION("pcre", "php_pcre.c", false /* never shared */,
                "-Iext/pcre/pcrelib");
-ADD_SOURCES("ext/pcre/pcrelib", "pcre_chartables.c pcre_ucd.c pcre_compile.c pcre_config.c pcre_exec.c pcre_fullinfo.c pcre_get.c pcre_globals.c pcre_maketables.c pcre_newline.c pcre_ord2utf8.c pcre_refcount.c pcre_study.c pcre_tables.c pcre_valid_utf8.c pcre_version.c pcre_xclass.c", "pcre");
+ADD_SOURCES("ext/pcre/pcrelib", "pcre_chartables.c pcre_ucd.c pcre_compile.c pcre_config.c pcre_exec.c pcre_fullinfo.c pcre_get.c pcre_globals.c pcre_maketables.c pcre_newline.c pcre_ord2utf8.c pcre_refcount.c pcre_study.c pcre_tables.c pcre_valid_utf8.c pcre_version.c pcre_xclass.c pcre_jit_compile.c", "pcre");
 ADD_DEF_FILE("ext\\pcre\\php_pcre.def");
 
 AC_DEFINE('HAVE_BUNDLED_PCRE', 1, 'Using bundled PCRE library');
index 4f8af76e63ab0d5de3e0cba0e08e52b36b844885..bfe2009aa097690ae61821c4343278aba4d81ec6 100644 (file)
@@ -58,7 +58,8 @@ PHP_ARG_WITH(pcre-regex,,
                                 pcrelib/pcre_maketables.c pcrelib/pcre_newline.c \
                                 pcrelib/pcre_ord2utf8.c pcrelib/pcre_refcount.c pcrelib/pcre_study.c \
                                 pcrelib/pcre_tables.c pcrelib/pcre_valid_utf8.c \
-                                pcrelib/pcre_version.c pcrelib/pcre_xclass.c"
+                                pcrelib/pcre_version.c pcrelib/pcre_xclass.c \
+                                pcrelib/pcre_jit_compile.c"
     PHP_PCRE_CFLAGS="-DHAVE_CONFIG_H -I@ext_srcdir@/pcrelib"
     PHP_NEW_EXTENSION(pcre, $pcrelib_sources php_pcre.c, no,,$PHP_PCRE_CFLAGS)
     PHP_ADD_BUILD_DIR($ext_builddir/pcrelib)
index 97d8c71dd67b43110677aa8b7defa25872ebd7f6..5eee1af4c6fd252594ccc538d121c13e4ac5e431 100644 (file)
@@ -8,7 +8,7 @@ Email domain:     cam.ac.uk
 University of Cambridge Computing Service,
 Cambridge, England.
 
-Copyright (c) 1997-2013 University of Cambridge
+Copyright (c) 1997-2014 University of Cambridge
 All rights reserved
 
 
@@ -19,7 +19,7 @@ Written by:       Zoltan Herczeg
 Email local part: hzmester
 Emain domain:     freemail.hu
 
-Copyright(c) 2010-2013 Zoltan Herczeg
+Copyright(c) 2010-2014 Zoltan Herczeg
 All rights reserved.
 
 
@@ -30,7 +30,7 @@ Written by:       Zoltan Herczeg
 Email local part: hzmester
 Emain domain:     freemail.hu
 
-Copyright(c) 2009-2013 Zoltan Herczeg
+Copyright(c) 2009-2014 Zoltan Herczeg
 All rights reserved.
 
 
index 1f1e86003563de96e0d1756e3b49bd10cb05cc9a..7801ef841179c7ca8030646d62af4ea85e6e50db 100644 (file)
@@ -1,6 +1,126 @@
 ChangeLog for PCRE
 ------------------
 
+Version 8.35 04-April-2014
+--------------------------
+
+1.  A new flag is set, when property checks are present in an XCLASS.
+    When this flag is not set, PCRE can perform certain optimizations
+    such as studying these XCLASS-es.
+
+2.  The auto-possessification of character sets were improved: a normal
+    and an extended character set can be compared now. Furthermore
+    the JIT compiler optimizes more character set checks.
+
+3.  Got rid of some compiler warnings for potentially uninitialized variables
+    that show up only when compiled with -O2.
+
+4.  A pattern such as (?=ab\K) that uses \K in an assertion can set the start
+    of a match later then the end of the match. The pcretest program was not
+    handling the case sensibly - it was outputting from the start to the next
+    binary zero. It now reports this situation in a message, and outputs the
+    text from the end to the start.
+
+5.  Fast forward search is improved in JIT. Instead of the first three
+    characters, any three characters with fixed position can be searched.
+    Search order: first, last, middle.
+
+6.  Improve character range checks in JIT. Characters are read by an inprecise
+    function now, which returns with an unknown value if the character code is
+    above a certain treshold (e.g: 256). The only limitation is that the value
+    must be bigger than the treshold as well. This function is useful, when
+    the characters above the treshold are handled in the same way.
+
+7.  The macros whose names start with RAWUCHAR are placeholders for a future
+    mode in which only the bottom 21 bits of 32-bit data items are used. To
+    make this more memorable for those maintaining the code, the names have
+    been changed to start with UCHAR21, and an extensive comment has been added
+    to their definition.
+
+8.  Add missing (new) files sljitNativeTILEGX.c and sljitNativeTILEGX-encoder.c
+    to the export list in Makefile.am (they were accidentally omitted from the
+    8.34 tarball).
+
+9.  The informational output from pcretest used the phrase "starting byte set"
+    which is inappropriate for the 16-bit and 32-bit libraries. As the output
+    for "first char" and "need char" really means "non-UTF-char", I've changed
+    "byte" to "char", and slightly reworded the output. The documentation about
+    these values has also been (I hope) clarified.
+
+10. Another JIT related optimization: use table jumps for selecting the correct
+    backtracking path, when more than four alternatives are present inside a
+    bracket.
+
+11. Empty match is not possible, when the minimum length is greater than zero,
+    and there is no \K in the pattern. JIT should avoid empty match checks in
+    such cases.
+
+12. In a caseless character class with UCP support, when a character with more
+    than one alternative case was not the first character of a range, not all
+    the alternative cases were added to the class. For example, s and \x{17f}
+    are both alternative cases for S: the class [RST] was handled correctly,
+    but [R-T] was not.
+
+13. The configure.ac file always checked for pthread support when JIT was
+    enabled. This is not used in Windows, so I have put this test inside a
+    check for the presence of windows.h (which was already tested for).
+
+14. Improve pattern prefix search by a simplified Boyer-Moore algorithm in JIT.
+    The algorithm provides a way to skip certain starting offsets, and usually
+    faster than linear prefix searches.
+
+15. Change 13 for 8.20 updated RunTest to check for the 'fr' locale as well
+    as for 'fr_FR' and 'french'. For some reason, however, it then used the
+    Windows-specific input and output files, which have 'french' screwed in.
+    So this could never have worked. One of the problems with locales is that
+    they aren't always the same. I have now updated RunTest so that it checks
+    the output of the locale test (test 3) against three different output
+    files, and it allows the test to pass if any one of them matches. With luck
+    this should make the test pass on some versions of Solaris where it was
+    failing. Because of the uncertainty, the script did not used to stop if
+    test 3 failed; it now does. If further versions of a French locale ever
+    come to light, they can now easily be added.
+
+16. If --with-pcregrep-bufsize was given a non-integer value such as "50K",
+    there was a message during ./configure, but it did not stop. This now
+    provokes an error. The invalid example in README has been corrected.
+    If a value less than the minimum is given, the minimum value has always
+    been used, but now a warning is given.
+
+17. If --enable-bsr-anycrlf was set, the special 16/32-bit test failed. This
+    was a bug in the test system, which is now fixed. Also, the list of various
+    configurations that are tested for each release did not have one with both
+    16/32 bits and --enable-bar-anycrlf. It now does.
+
+18. pcretest was missing "-C bsr" for displaying the \R default setting.
+
+19. Little endian PowerPC systems are supported now by the JIT compiler.
+
+20. The fast forward newline mechanism could enter to an infinite loop on
+    certain invalid UTF-8 input. Although we don't support these cases
+    this issue can be fixed by a performance optimization.
+
+21. Change 33 of 8.34 is not sufficient to ensure stack safety because it does
+    not take account if existing stack usage. There is now a new global
+    variable called pcre_stack_guard that can be set to point to an external
+    function to check stack availability. It is called at the start of
+    processing every parenthesized group.
+
+22. A typo in the code meant that in ungreedy mode the max/min qualifier
+    behaved like a min-possessive qualifier, and, for example, /a{1,3}b/U did
+    not match "ab".
+
+23. When UTF was disabled, the JIT program reported some incorrect compile
+    errors. These messages are silenced now.
+
+24. Experimental support for ARM-64 and MIPS-64 has been added to the JIT
+    compiler.
+
+25. Change all the temporary files used in RunGrepTest to be different to those
+    used by RunTest so that the tests can be run simultaneously, for example by
+    "make -j check".
+
+
 Version 8.34 15-December-2013
 -----------------------------
 
index 3aff6a62c002aa1464d6c743727deb373f0d71a8..602e4ae680467f9db3260f16ae0f65fcfa01544a 100644 (file)
@@ -24,7 +24,7 @@ Email domain:     cam.ac.uk
 University of Cambridge Computing Service,
 Cambridge, England.
 
-Copyright (c) 1997-2013 University of Cambridge
+Copyright (c) 1997-2014 University of Cambridge
 All rights reserved.
 
 
@@ -35,7 +35,7 @@ Written by:       Zoltan Herczeg
 Email local part: hzmester
 Emain domain:     freemail.hu
 
-Copyright(c) 2010-2013 Zoltan Herczeg
+Copyright(c) 2010-2014 Zoltan Herczeg
 All rights reserved.
 
 
@@ -46,7 +46,7 @@ Written by:       Zoltan Herczeg
 Email local part: hzmester
 Emain domain:     freemail.hu
 
-Copyright(c) 2009-2013 Zoltan Herczeg
+Copyright(c) 2009-2014 Zoltan Herczeg
 All rights reserved.
 
 
index 5f52f1534601de9b4b9079f2c7bbc4e62485681f..6331e9908d1261de213d4b9571e6def61bd9ce15 100644 (file)
@@ -1,6 +1,17 @@
 News about PCRE releases
 ------------------------
 
+Release 8.35 04-April-2014
+--------------------------
+
+There have been performance improvements for classes containing non-ASCII
+characters and the "auto-possessification" feature has been extended. Other
+minor improvements have been implemented and bugs fixed. There is a new callout
+feature to enable applications to do detailed stack checks at compile time, to
+avoid running out of stack for deeply nested parentheses. The JIT compiler has
+been extended with experimental support for ARM-64, MIPS-64, and PPC-LE.
+
+
 Release 8.34 15-December-2013
 -----------------------------
 
index 51197df7213433e276f4d439d6425249fcfd5fc5..88f2dfd4efd677651ddc8ddb9eb19d385891ea43 100644 (file)
@@ -85,11 +85,12 @@ documentation is supplied in two other forms:
   1. There are files called doc/pcre.txt, doc/pcregrep.txt, and
      doc/pcretest.txt in the source distribution. The first of these is a
      concatenation of the text forms of all the section 3 man pages except
-     those that summarize individual functions. The other two are the text
-     forms of the section 1 man pages for the pcregrep and pcretest commands.
-     These text forms are provided for ease of scanning with text editors or
-     similar tools. They are installed in <prefix>/share/doc/pcre, where
-     <prefix> is the installation prefix (defaulting to /usr/local).
+     the listing of pcredemo.c and those that summarize individual functions.
+     The other two are the text forms of the section 1 man pages for the
+     pcregrep and pcretest commands. These text forms are provided for ease of
+     scanning with text editors or similar tools. They are installed in
+     <prefix>/share/doc/pcre, where <prefix> is the installation prefix
+     (defaulting to /usr/local).
 
   2. A set of files containing all the documentation in HTML form, hyperlinked
      in various ways, and rooted in a file called index.html, is distributed in
@@ -372,12 +373,12 @@ library. They are also documented in the pcrebuild man page.
 
   Of course, the relevant libraries must be installed on your system.
 
-. The default size of internal buffer used by pcregrep can be set by, for
-  example:
+. The default size (in bytes) of the internal buffer used by pcregrep can be
+  set by, for example:
 
-  --with-pcregrep-bufsize=50K
+  --with-pcregrep-bufsize=51200
 
-  The default value is 20K.
+  The value must be a plain integer. The default is 20480.
 
 . It is possible to compile pcretest so that it links with the libreadline
   or libedit libraries, by specifying, respectively,
@@ -987,4 +988,4 @@ pcre_xxx, one with the name pcre16_xx, and a third with the name pcre32_xxx.
 Philip Hazel
 Email local part: ph10
 Email domain: cam.ac.uk
-Last updated: 05 November 2013
+Last updated: 17 January 2014
index a3b1b5dd38171f43e0aea40812c07cf3645bbc39..1bfc232a0085f0e0b6dfa0400f842780f2fd4dfa 100644 (file)
 #endif
 
 
-/* Exclude these below definitions when building within PHP */
-#ifndef ZEND_API
-
 /* config.h.  Generated from config.h.in by configure.  */
 /* config.h.in.  Generated from configure.ac by autoheader.  */
 
-
 /* PCRE is written in Standard C, but there are a few non-standard things it
 can cope with, allowing it to run on SunOS4 and other "close to standard"
 systems.
 
-In environments that support the facilities, config.h.in is converted by
-"configure", or config-cmake.h.in is converted by CMake, into config.h. If you
-are going to build PCRE "by hand" without using "configure" or CMake, you
-should copy the distributed config.h.generic to config.h, and then edit the
-macro definitions to be the way you need them. You must then add
--DHAVE_CONFIG_H to all of your compile commands, so that config.h is included
-at the start of every source.
+In environments that support the GNU autotools, config.h.in is converted into
+config.h by the "configure" script. In environments that use CMake,
+config-cmake.in is converted into config.h. If you are going to build PCRE "by
+hand" without using "configure" or CMake, you should copy the distributed
+config.h.generic to config.h, and edit the macro definitions to be the way you
+need them. You must then add -DHAVE_CONFIG_H to all of your compile commands,
+so that config.h is included at the start of every source.
 
 Alternatively, you can avoid editing by using -D on the compiler command line
-to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H.
+to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H,
+but if you do, default values will be taken from config.h for non-boolean
+macros that are not defined on the command line.
+
+Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE8 should either be defined
+(conventionally to 1) for TRUE, and not defined at all for FALSE. All such
+macros are listed as a commented #undef in config.h.generic. Macros such as
+MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
+surrounded by #ifndef/#endif lines so that the value can be overridden by -D.
 
-PCRE uses memmove() if HAVE_MEMMOVE is set to 1; otherwise it uses bcopy() if
-HAVE_BCOPY is set to 1. If your system has neither bcopy() nor memmove(), set
-them both to 0; an emulation function will be used. */
+PCRE uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
+HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make
+sure both macros are undefined; an emulation function will then be used. */
 
 /* By default, the \R escape sequence matches any Unicode line ending
    character or sequence of characters. If BSR_ANYCRLF is defined (to any
    value), this is changed so that backslash-R matches only CR, LF, or CRLF.
    The build-time default can be overridden by the user of PCRE at runtime. */
-#undef BSR_ANYCRLF
+/* #undef BSR_ANYCRLF */
 
 /* If you are compiling for a system that uses EBCDIC instead of ASCII
    character codes, define this macro to any value. You must also edit the
@@ -64,113 +68,80 @@ them both to 0; an emulation function will be used. */
    strings are in EBCDIC. If you do not define this macro, PCRE will assume
    input strings are ASCII or UTF-8/16/32 Unicode. It is not possible to build
    a version of PCRE that supports both EBCDIC and UTF-8/16/32. */
-#undef EBCDIC
+/* #undef EBCDIC */
 
 /* In an EBCDIC environment, define this macro to any value to arrange for the
    NL character to be 0x25 instead of the default 0x15. NL plays the role that
    LF does in an ASCII/Unicode environment. The value must also be set in the
    NEWLINE macro below. On systems that can use "configure" or CMake to set
    EBCDIC_NL25, the adjustment of NEWLINE is automatic. */
-#undef EBCDIC_NL25
+/* #undef EBCDIC_NL25 */
 
 /* Define to 1 if you have the `bcopy' function. */
-#ifndef HAVE_BCOPY
-#define HAVE_BCOPY 1
-#endif
+/* #undef HAVE_BCOPY */
 
 /* Define to 1 if you have the <bits/type_traits.h> header file. */
 /* #undef HAVE_BITS_TYPE_TRAITS_H */
 
 /* Define to 1 if you have the <bzlib.h> header file. */
-#ifndef HAVE_BZLIB_H
-#define HAVE_BZLIB_H 1
-#endif
+/* #undef HAVE_BZLIB_H */
 
 /* Define to 1 if you have the <dirent.h> header file. */
-#ifndef HAVE_DIRENT_H
-#define HAVE_DIRENT_H 1
-#endif
+/* #undef HAVE_DIRENT_H */
 
 /* Define to 1 if you have the <dlfcn.h> header file. */
-#ifndef HAVE_DLFCN_H
-#define HAVE_DLFCN_H 1
-#endif
+/* #undef HAVE_DLFCN_H */
 
 /* Define to 1 if you have the <editline/readline.h> header file. */
-/*#undef HAVE_EDITLINE_READLINE_H*/
+/* #undef HAVE_EDITLINE_READLINE_H */
 
 /* Define to 1 if you have the <edit/readline/readline.h> header file. */
 /* #undef HAVE_EDIT_READLINE_READLINE_H */
 
 /* Define to 1 if you have the <inttypes.h> header file. */
-#ifndef HAVE_INTTYPES_H
-#define HAVE_INTTYPES_H 1
-#endif
+/* #undef HAVE_INTTYPES_H */
 
 /* Define to 1 if you have the <limits.h> header file. */
-#ifndef HAVE_LIMITS_H
-#define HAVE_LIMITS_H 1
-#endif
+/* #undef HAVE_LIMITS_H */
 
 /* Define to 1 if the system has the type `long long'. */
-#ifndef HAVE_LONG_LONG
-#define HAVE_LONG_LONG 1
-#endif
+/* #undef HAVE_LONG_LONG */
 
 /* Define to 1 if you have the `memmove' function. */
-#ifndef HAVE_MEMMOVE
-#define HAVE_MEMMOVE 1
-#endif
+/* #undef HAVE_MEMMOVE */
 
 /* Define to 1 if you have the <memory.h> header file. */
-#ifndef HAVE_MEMORY_H
-#define HAVE_MEMORY_H 1
-#endif
+/* #undef HAVE_MEMORY_H */
 
 /* Define if you have POSIX threads libraries and header files. */
-#undef HAVE_PTHREAD
+/* #undef HAVE_PTHREAD */
 
 /* Have PTHREAD_PRIO_INHERIT. */
-#undef HAVE_PTHREAD_PRIO_INHERIT
+/* #undef HAVE_PTHREAD_PRIO_INHERIT */
+
 /* Define to 1 if you have the <readline/history.h> header file. */
-#ifndef HAVE_READLINE_HISTORY_H
-#define HAVE_READLINE_HISTORY_H 1
-#endif
+/* #undef HAVE_READLINE_HISTORY_H */
 
 /* Define to 1 if you have the <readline/readline.h> header file. */
-#ifndef HAVE_READLINE_READLINE_H
-#define HAVE_READLINE_READLINE_H 1
-#endif
+/* #undef HAVE_READLINE_READLINE_H */
 
 /* Define to 1 if you have the <stdint.h> header file. */
-#ifndef HAVE_STDINT_H
-#define HAVE_STDINT_H 1
-#endif
+/* #undef HAVE_STDINT_H */
 
 /* Define to 1 if you have the <stdlib.h> header file. */
-#ifndef HAVE_STDLIB_H
-#define HAVE_STDLIB_H 1
-#endif
+/* #undef HAVE_STDLIB_H */
 
 /* Define to 1 if you have the `strerror' function. */
-#ifndef HAVE_STRERROR
-#define HAVE_STRERROR 1
-#endif
+/* #undef HAVE_STRERROR */
 
 /* Define to 1 if you have the <string> header file. */
-#ifndef HAVE_STRING
-#define HAVE_STRING 1
-#endif
+/* #undef HAVE_STRING */
 
 /* Define to 1 if you have the <strings.h> header file. */
-#ifndef HAVE_STRINGS_H
-#define HAVE_STRINGS_H 1
-#endif
+/* #undef HAVE_STRINGS_H */
 
 /* Define to 1 if you have the <string.h> header file. */
-#ifndef HAVE_STRING_H
-#define HAVE_STRING_H 1
-#endif
+/* #undef HAVE_STRING_H */
 
 /* Define to 1 if you have `strtoimax'. */
 /* #undef HAVE_STRTOIMAX */
@@ -179,63 +150,47 @@ them both to 0; an emulation function will be used. */
 /* #undef HAVE_STRTOLL */
 
 /* Define to 1 if you have `strtoq'. */
-#ifndef HAVE_STRTOQ
-#define HAVE_STRTOQ 1
-#endif
+/* #undef HAVE_STRTOQ */
 
 /* Define to 1 if you have the <sys/stat.h> header file. */
-#ifndef HAVE_SYS_STAT_H
-#define HAVE_SYS_STAT_H 1
-#endif
+/* #undef HAVE_SYS_STAT_H */
 
 /* Define to 1 if you have the <sys/types.h> header file. */
-#ifndef HAVE_SYS_TYPES_H
-#define HAVE_SYS_TYPES_H 1
-#endif
+/* #undef HAVE_SYS_TYPES_H */
 
 /* Define to 1 if you have the <type_traits.h> header file. */
 /* #undef HAVE_TYPE_TRAITS_H */
 
 /* Define to 1 if you have the <unistd.h> header file. */
-#ifndef HAVE_UNISTD_H
-#define HAVE_UNISTD_H 1
-#endif
+/* #undef HAVE_UNISTD_H */
 
 /* Define to 1 if the system has the type `unsigned long long'. */
-#ifndef HAVE_UNSIGNED_LONG_LONG
-#define HAVE_UNSIGNED_LONG_LONG 1
-#endif
+/* #undef HAVE_UNSIGNED_LONG_LONG */
 
-/* Define to 1 or 0, depending whether the compiler supports simple visibility
-   declarations. */
+/* Define to 1 if the compiler supports simple visibility declarations. */
 /* #undef HAVE_VISIBILITY */
 
 /* Define to 1 if you have the <windows.h> header file. */
 /* #undef HAVE_WINDOWS_H */
 
 /* Define to 1 if you have the <zlib.h> header file. */
-#ifndef HAVE_ZLIB_H
-#define HAVE_ZLIB_H 1
-#endif
+/* #undef HAVE_ZLIB_H */
 
 /* Define to 1 if you have `_strtoi64'. */
 /* #undef HAVE__STRTOI64 */
 
-/* Exclude these above definitions when building within PHP */
-#endif
-
 /* The value of LINK_SIZE determines the number of bytes used to store links
    as offsets within the compiled regex. The default is 2, which allows for
    compiled patterns up to 64K long. This covers the vast majority of cases.
    However, PCRE can also be compiled to use 3 or 4 bytes instead. This allows
-   for longer patterns in extreme cases. On systems that support it,
-   "configure" can be used to override this default. */
+   for longer patterns in extreme cases. */
 #ifndef LINK_SIZE
 #define LINK_SIZE 2
 #endif
 
 /* Define to the sub-directory in which libtool stores uninstalled libraries.
    */
+/* This is ignored unless you are using libtool. */
 #ifndef LT_OBJDIR
 #define LT_OBJDIR ".libs/"
 #endif
@@ -245,8 +200,7 @@ them both to 0; an emulation function will be used. */
    pcre_exec(). There is a runtime interface for setting a different limit.
    The limit exists in order to catch runaway regular expressions that take
    for ever to determine that they do not match. The default is set very large
-   so that it does not accidentally catch legitimate cases. On systems that
-   support it, "configure" can be used to override this default default. */
+   so that it does not accidentally catch legitimate cases. */
 #ifndef MATCH_LIMIT
 #define MATCH_LIMIT 10000000
 #endif
@@ -258,8 +212,7 @@ them both to 0; an emulation function will be used. */
    used. The value of MATCH_LIMIT_RECURSION applies only to recursive calls of
    match(). To have any useful effect, it must be less than the value of
    MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT. There is
-   a runtime method for setting a different limit. On systems that support it,
-   "configure" can be used to override the default. */
+   a runtime method for setting a different limit. */
 #ifndef MATCH_LIMIT_RECURSION
 #define MATCH_LIMIT_RECURSION MATCH_LIMIT
 #endif
@@ -290,9 +243,6 @@ them both to 0; an emulation function will be used. */
 #define NEWLINE 10
 #endif
 
-/* Define to 1 if your C compiler doesn't accept -c and -o together. */
-/* #undef NO_MINUS_C_MINUS_O */
-
 /* PCRE uses recursive function calls to handle backtracking while matching.
    This can sometimes be a problem on systems that have stacks of limited
    size. Define NO_RECURSE to any value to get a version that doesn't use
@@ -302,8 +252,6 @@ them both to 0; an emulation function will be used. */
    */
 /* #undef NO_RECURSE */
 
-#define PARENS_NEST_LIMIT 250
-
 /* Name of package */
 #define PACKAGE "pcre"
 
@@ -314,7 +262,7 @@ them both to 0; an emulation function will be used. */
 #define PACKAGE_NAME "PCRE"
 
 /* Define to the full name and version of this package. */
-#define PACKAGE_STRING "PCRE 8.32"
+#define PACKAGE_STRING "PCRE 8.35"
 
 /* Define to the one symbol short name of this package. */
 #define PACKAGE_TARNAME "pcre"
@@ -323,33 +271,23 @@ them both to 0; an emulation function will be used. */
 #define PACKAGE_URL ""
 
 /* Define to the version of this package. */
-#define PACKAGE_VERSION "8.32"
-
-/* to make a symbol visible */
-/* #undef PCRECPP_EXP_DECL */
+#define PACKAGE_VERSION "8.35"
 
-/* to make a symbol visible */
-/* #undef PCRECPP_EXP_DEFN */
+/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
+   parentheses (of any kind) in a pattern. This limits the amount of system
+   stack that is used while compiling a pattern. */
+#ifndef PARENS_NEST_LIMIT
+#define PARENS_NEST_LIMIT 250
+#endif
 
 /* The value of PCREGREP_BUFSIZE determines the size of buffer used by
    pcregrep to hold parts of the file it is searching. This is also the
    minimum value. The actual amount of memory used by pcregrep is three times
    this number, because it allows for the buffering of "before" and "after"
    lines. */
-/* #undef PCREGREP_BUFSIZE */
-
-/* to make a symbol visible */
-/* #undef PCREPOSIX_EXP_DECL */
-
-/* to make a symbol visible */
-/* #undef PCREPOSIX_EXP_DEFN */
-
-/* to make a symbol visible */
-/* #undef PCRE_EXP_DATA_DEFN */
-
-/* to make a symbol visible */
-/* #undef PCRE_EXP_DECL */
-
+#ifndef PCREGREP_BUFSIZE
+#define PCREGREP_BUFSIZE 20480
+#endif
 
 /* If you are compiling for a system other than a Unix-like system or
    Win32, and it needs some magic to be inserted before the definition
@@ -381,23 +319,17 @@ them both to 0; an emulation function will be used. */
 /* #undef PTHREAD_CREATE_JOINABLE */
 
 /* Define to 1 if you have the ANSI C header files. */
-#ifndef STDC_HEADERS
-#define STDC_HEADERS 1
-#endif
-
-/* Define to allow pcretest and pcregrep to be linked with gcov, so that they
-   are able to generate code coverage reports. */
-#undef SUPPORT_GCOV
+/* #undef STDC_HEADERS */
 
 /* Define to any value to enable support for Just-In-Time compiling. */
-#undef SUPPORT_JIT
+#define SUPPORT_JIT
 
 /* Define to any value to allow pcregrep to be linked with libbz2, so that it
    is able to handle .bz2 files. */
 /* #undef SUPPORT_LIBBZ2 */
 
 /* Define to any value to allow pcretest to be linked with libedit. */
-#undef SUPPORT_LIBEDIT
+/* #undef SUPPORT_LIBEDIT */
 
 /* Define to any value to allow pcretest to be linked with libreadline. */
 /* #undef SUPPORT_LIBREADLINE */
@@ -416,24 +348,22 @@ them both to 0; an emulation function will be used. */
 /* #undef SUPPORT_PCRE8 */
 
 /* Define to any value to enable JIT support in pcregrep. */
-/* #undef SUPPORT_PCREGREP_JIT */
+#define SUPPORT_PCREGREP_JIT
 
-/* Define to enable support for Unicode properties */
+/* Define to any value to enable support for Unicode properties. */
 /* #undef SUPPORT_UCP */
 
 /* Define to any value to enable support for the UTF-8/16/32 Unicode encoding.
    This will work even in an EBCDIC environment, but it is incompatible with
    the EBCDIC macro. That is, PCRE can support *either* EBCDIC code *or*
    ASCII/UTF-8/16/32, but not both at once. */
-/* #undef SUPPORT_UTF8 */
+/* #undef SUPPORT_UTF */
 
-/* Valgrind support to find invalid memory reads. */
+/* Define to any value for valgrind support to find invalid memory reads. */
 /* #undef SUPPORT_VALGRIND */
 
 /* Version number of package */
-#ifndef VERSION
-#define VERSION "8.34"
-#endif
+#define VERSION "8.35"
 
 /* Define to empty if `const' does not conform to ANSI C. */
 /* #undef const */
index 1fdc8e0f23182e1573f1343edf3c8a1aec0b008d..4ec32883cf0f12aafa6978f4c99a61fc47c6253d 100644 (file)
@@ -43,9 +43,7 @@ character tables for PCRE. The tables are built according to the current
 locale. Now that pcre_maketables is a function visible to the outside world, we
 make use of its code from here in order to be consistent. */
 
-#ifdef HAVE_CONFIG_H
 #include "config.h"
-#endif
 
 #include <ctype.h>
 #include <stdio.h>
index 9d69515c3b803f17b5c64971ef904ba268357cc4..14cbb8bf2be4aff7f5e2adf2cb253b2fc02e40e9 100644 (file)
@@ -130,9 +130,11 @@ USER DOCUMENTATION
        The  user  documentation  for PCRE comprises a number of different sec-
        tions. In the "man" format, each of these is a separate "man page".  In
        the  HTML  format, each is a separate page, linked from the index page.
-       In the plain text format, all the sections, except  the  pcredemo  sec-
-       tion, are concatenated, for ease of searching. The sections are as fol-
-       lows:
+       In the plain text format, the descriptions of the pcregrep and pcretest
+       programs  are  in  files  called pcregrep.txt and pcretest.txt, respec-
+       tively. The remaining sections, except for the pcredemo section  (which
+       is  a  program  listing),  are  concatenated  in  pcre.txt, for ease of
+       searching. The sections are as follows:
 
          pcre              this document
          pcre-config       show PCRE installation configuration information
@@ -160,8 +162,8 @@ USER DOCUMENTATION
          pcretest          description of the pcretest testing command
          pcreunicode       discussion of Unicode and UTF-8/16/32 support
 
-       In addition, in the "man" and HTML formats, there is a short  page  for
-       each C library function, listing its arguments and results.
+       In the "man" and HTML formats, there is also a short page  for  each  C
+       library function, listing its arguments and results.
 
 
 AUTHOR
@@ -177,8 +179,8 @@ AUTHOR
 
 REVISION
 
-       Last updated: 13 May 2013
-       Copyright (c) 1997-2013 University of Cambridge.
+       Last updated: 08 January 2014
+       Copyright (c) 1997-2014 University of Cambridge.
 ------------------------------------------------------------------------------
 
 
@@ -1674,6 +1676,8 @@ PCRE NATIVE API INDIRECTED FUNCTIONS
 
        int (*pcre_callout)(pcre_callout_block *);
 
+       int (*pcre_stack_guard)(void);
+
 
 PCRE 8-BIT, 16-BIT, AND 32-BIT LIBRARIES
 
@@ -1809,6 +1813,14 @@ PCRE API OVERVIEW
        specified  points during a matching operation. Details are given in the
        pcrecallout documentation.
 
+       The global variable pcre_stack_guard initially contains NULL. It can be
+       set  by  the  caller  to  a function that is called by PCRE whenever it
+       starts to compile a parenthesized part of a pattern.  When  parentheses
+       are nested, PCRE uses recursive function calls, which use up the system
+       stack. This function is provided so that applications  with  restricted
+       stacks  can  force a compilation error if the stack runs out. The func-
+       tion should return zero if all is well, or non-zero to force an error.
+
 
 NEWLINES
 
@@ -1849,25 +1861,26 @@ MULTITHREADING
        The  PCRE  functions  can be used in multi-threading applications, with
        the  proviso  that  the  memory  management  functions  pointed  to  by
        pcre_malloc, pcre_free, pcre_stack_malloc, and pcre_stack_free, and the
-       callout function pointed to by pcre_callout, are shared by all threads.
+       callout and stack-checking functions pointed  to  by  pcre_callout  and
+       pcre_stack_guard, are shared by all threads.
 
-       The compiled form of a regular expression is not altered during  match-
+       The  compiled form of a regular expression is not altered during match-
        ing, so the same compiled pattern can safely be used by several threads
        at once.
 
-       If the just-in-time optimization feature is being used, it needs  sepa-
-       rate  memory stack areas for each thread. See the pcrejit documentation
+       If  the just-in-time optimization feature is being used, it needs sepa-
+       rate memory stack areas for each thread. See the pcrejit  documentation
        for more details.
 
 
 SAVING PRECOMPILED PATTERNS FOR LATER USE
 
        The compiled form of a regular expression can be saved and re-used at a
-       later  time,  possibly by a different program, and even on a host other
-       than the one on which  it  was  compiled.  Details  are  given  in  the
-       pcreprecompile  documentation,  which  includes  a  description  of the
-       pcre_pattern_to_host_byte_order() function. However, compiling a  regu-
-       lar  expression  with one version of PCRE for use with a different ver-
+       later time, possibly by a different program, and even on a  host  other
+       than  the  one  on  which  it  was  compiled.  Details are given in the
+       pcreprecompile documentation,  which  includes  a  description  of  the
+       pcre_pattern_to_host_byte_order()  function. However, compiling a regu-
+       lar expression with one version of PCRE for use with a  different  ver-
        sion is not guaranteed to work and may cause crashes.
 
 
@@ -1875,45 +1888,45 @@ CHECKING BUILD-TIME OPTIONS
 
        int pcre_config(int what, void *where);
 
-       The function pcre_config() makes it possible for a PCRE client to  dis-
+       The  function pcre_config() makes it possible for a PCRE client to dis-
        cover which optional features have been compiled into the PCRE library.
-       The pcrebuild documentation has more details about these optional  fea-
+       The  pcrebuild documentation has more details about these optional fea-
        tures.
 
-       The  first  argument  for pcre_config() is an integer, specifying which
+       The first argument for pcre_config() is an  integer,  specifying  which
        information is required; the second argument is a pointer to a variable
-       into  which  the  information  is placed. The returned value is zero on
-       success, or the negative error code PCRE_ERROR_BADOPTION if  the  value
-       in  the  first argument is not recognized. The following information is
+       into which the information is placed. The returned  value  is  zero  on
+       success,  or  the negative error code PCRE_ERROR_BADOPTION if the value
+       in the first argument is not recognized. The following  information  is
        available:
 
          PCRE_CONFIG_UTF8
 
-       The output is an integer that is set to one if UTF-8 support is  avail-
-       able;  otherwise it is set to zero. This value should normally be given
+       The  output is an integer that is set to one if UTF-8 support is avail-
+       able; otherwise it is set to zero. This value should normally be  given
        to the 8-bit version of this function, pcre_config(). If it is given to
-       the   16-bit  or  32-bit  version  of  this  function,  the  result  is
+       the  16-bit  or  32-bit  version  of  this  function,  the  result   is
        PCRE_ERROR_BADOPTION.
 
          PCRE_CONFIG_UTF16
 
        The output is an integer that is set to one if UTF-16 support is avail-
-       able;  otherwise it is set to zero. This value should normally be given
+       able; otherwise it is set to zero. This value should normally be  given
        to the 16-bit version of this function, pcre16_config(). If it is given
-       to  the  8-bit  or  32-bit  version  of  this  function,  the result is
+       to the 8-bit  or  32-bit  version  of  this  function,  the  result  is
        PCRE_ERROR_BADOPTION.
 
          PCRE_CONFIG_UTF32
 
        The output is an integer that is set to one if UTF-32 support is avail-
-       able;  otherwise it is set to zero. This value should normally be given
+       able; otherwise it is set to zero. This value should normally be  given
        to the 32-bit version of this function, pcre32_config(). If it is given
-       to  the  8-bit  or  16-bit  version  of  this  function,  the result is
+       to the 8-bit  or  16-bit  version  of  this  function,  the  result  is
        PCRE_ERROR_BADOPTION.
 
          PCRE_CONFIG_UNICODE_PROPERTIES
 
-       The output is an integer that is set to  one  if  support  for  Unicode
+       The  output  is  an  integer  that is set to one if support for Unicode
        character properties is available; otherwise it is set to zero.
 
          PCRE_CONFIG_JIT
@@ -1923,55 +1936,58 @@ CHECKING BUILD-TIME OPTIONS
 
          PCRE_CONFIG_JITTARGET
 
-       The output is a pointer to a zero-terminated "const char *" string.  If
+       The  output is a pointer to a zero-terminated "const char *" string. If
        JIT support is available, the string contains the name of the architec-
-       ture for which the JIT compiler is configured, for example  "x86  32bit
-       (little  endian  +  unaligned)".  If  JIT support is not available, the
+       ture  for  which the JIT compiler is configured, for example "x86 32bit
+       (little endian + unaligned)". If JIT  support  is  not  available,  the
        result is NULL.
 
          PCRE_CONFIG_NEWLINE
 
-       The output is an integer whose value specifies  the  default  character
-       sequence  that  is recognized as meaning "newline". The values that are
+       The  output  is  an integer whose value specifies the default character
+       sequence that is recognized as meaning "newline". The values  that  are
        supported in ASCII/Unicode environments are: 10 for LF, 13 for CR, 3338
-       for  CRLF,  -2 for ANYCRLF, and -1 for ANY. In EBCDIC environments, CR,
-       ANYCRLF, and ANY yield the same values. However, the value  for  LF  is
-       normally  21, though some EBCDIC environments use 37. The corresponding
-       values for CRLF are 3349 and 3365. The default should  normally  corre-
+       for CRLF, -2 for ANYCRLF, and -1 for ANY. In EBCDIC  environments,  CR,
+       ANYCRLF,  and  ANY  yield the same values. However, the value for LF is
+       normally 21, though some EBCDIC environments use 37. The  corresponding
+       values  for  CRLF are 3349 and 3365. The default should normally corre-
        spond to the standard sequence for your operating system.
 
          PCRE_CONFIG_BSR
 
        The output is an integer whose value indicates what character sequences
-       the \R escape sequence matches by default. A value of 0 means  that  \R
-       matches  any  Unicode  line ending sequence; a value of 1 means that \R
+       the  \R  escape sequence matches by default. A value of 0 means that \R
+       matches any Unicode line ending sequence; a value of 1  means  that  \R
        matches only CR, LF, or CRLF. The default can be overridden when a pat-
        tern is compiled or matched.
 
          PCRE_CONFIG_LINK_SIZE
 
-       The  output  is  an  integer that contains the number of bytes used for
+       The output is an integer that contains the number  of  bytes  used  for
        internal  linkage  in  compiled  regular  expressions.  For  the  8-bit
        library, the value can be 2, 3, or 4. For the 16-bit library, the value
-       is either 2 or 4 and is  still  a  number  of  bytes.  For  the  32-bit
+       is  either  2  or  4  and  is  still  a number of bytes. For the 32-bit
        library, the value is either 2 or 4 and is still a number of bytes. The
        default value of 2 is sufficient for all but the most massive patterns,
-       since  it  allows  the compiled pattern to be up to 64K in size. Larger
-       values allow larger regular expressions to be compiled, at the  expense
+       since it allows the compiled pattern to be up to 64K  in  size.  Larger
+       values  allow larger regular expressions to be compiled, at the expense
        of slower matching.
 
          PCRE_CONFIG_POSIX_MALLOC_THRESHOLD
 
-       The  output  is  an integer that contains the threshold above which the
-       POSIX interface uses malloc() for output vectors. Further  details  are
+       The output is an integer that contains the threshold  above  which  the
+       POSIX  interface  uses malloc() for output vectors. Further details are
        given in the pcreposix documentation.
 
          PCRE_CONFIG_PARENS_LIMIT
 
        The output is a long integer that gives the maximum depth of nesting of
-       parentheses (of any kind) in a pattern. This limit is  imposed  to  cap
+       parentheses  (of  any  kind) in a pattern. This limit is imposed to cap
        the amount of system stack used when a pattern is compiled. It is spec-
-       ified when PCRE is built; the default is 250.
+       ified  when PCRE is built; the default is 250. This limit does not take
+       into account the stack that may already be used by the calling applica-
+       tion.  For  finer  control  over compilation stack usage, you can set a
+       pointer to an external checking function in pcre_stack_guard.
 
          PCRE_CONFIG_MATCH_LIMIT
 
@@ -2474,6 +2490,8 @@ COMPILATION ERROR CODES
          81  missing opening brace after \o
          82  parentheses are too deeply nested
          83  invalid range in character class
+         84  group name must start with a non-digit
+         85  parentheses are too deeply nested (stack check)
 
        The  numbers  32  and 10000 in errors 48 and 49 are defaults; different
        values may be used if the limits were changed when PCRE was built.
@@ -2714,12 +2732,16 @@ INFORMATION ABOUT A PATTERN
        tion. External callers can cause PCRE to use  its  internal  tables  by
        passing a NULL table pointer.
 
-         PCRE_INFO_FIRSTBYTE
+         PCRE_INFO_FIRSTBYTE (deprecated)
 
        Return information about the first data unit of any matched string, for
-       a non-anchored pattern. (The name of this option refers  to  the  8-bit
-       library,  where data units are bytes.) The fourth argument should point
-       to an int variable.
+       a non-anchored pattern. The name of this option  refers  to  the  8-bit
+       library,  where  data units are bytes. The fourth argument should point
+       to an int variable. Negative values are used for  special  cases.  How-
+       ever,  this  means  that when the 32-bit library is in non-UTF-32 mode,
+       the full 32-bit range of characters cannot be returned. For  this  rea-
+       son,  this  value  is deprecated; use PCRE_INFO_FIRSTCHARACTERFLAGS and
+       PCRE_INFO_FIRSTCHARACTER instead.
 
        If there is a fixed first value, for example, the  letter  "c"  from  a
        pattern  such  as (cat|cow|coyote), its value is returned. In the 8-bit
@@ -2739,10 +2761,38 @@ INFORMATION ABOUT A PATTERN
        of  a  subject string or after any newline within the string. Otherwise
        -2 is returned. For anchored patterns, -2 is returned.
 
-       Since for the 32-bit library using the non-UTF-32 mode,  this  function
-       is  unable to return the full 32-bit range of the character, this value
-       is   deprecated;   instead   the   PCRE_INFO_FIRSTCHARACTERFLAGS    and
-       PCRE_INFO_FIRSTCHARACTER values should be used.
+         PCRE_INFO_FIRSTCHARACTER
+
+       Return the value of the first data  unit  (non-UTF  character)  of  any
+       matched  string  in  the  situation where PCRE_INFO_FIRSTCHARACTERFLAGS
+       returns 1; otherwise return 0. The fourth argument should point  to  an
+       uint_t variable.
+
+       In  the 8-bit library, the value is always less than 256. In the 16-bit
+       library the value can be up to 0xffff. In the 32-bit library in  UTF-32
+       mode  the  value  can  be up to 0x10ffff, and up to 0xffffffff when not
+       using UTF-32 mode.
+
+         PCRE_INFO_FIRSTCHARACTERFLAGS
+
+       Return information about the first data unit of any matched string, for
+       a  non-anchored  pattern.  The  fourth  argument should point to an int
+       variable.
+
+       If there is a fixed first value, for example, the  letter  "c"  from  a
+       pattern  such  as  (cat|cow|coyote),  1  is returned, and the character
+       value can be retrieved using PCRE_INFO_FIRSTCHARACTER. If there  is  no
+       fixed first value, and if either
+
+       (a)  the pattern was compiled with the PCRE_MULTILINE option, and every
+       branch starts with "^", or
+
+       (b) every branch of the pattern starts with ".*" and PCRE_DOTALL is not
+       set (if it were set, the pattern would be anchored),
+
+       2 is returned, indicating that the pattern matches only at the start of
+       a subject string or after any newline within the string. Otherwise 0 is
+       returned. For anchored patterns, 0 is returned.
 
          PCRE_INFO_FIRSTTABLE
 
@@ -2954,57 +3004,24 @@ INFORMATION ABOUT A PATTERN
        option so that it can be saved and  restored  (see  the  pcreprecompile
        documentation for details).
 
-         PCRE_INFO_FIRSTCHARACTERFLAGS
-
-       Return information about the first data unit of any matched string, for
-       a non-anchored pattern. The fourth argument  should  point  to  an  int
-       variable.
-
-       If  there  is  a  fixed first value, for example, the letter "c" from a
-       pattern such as (cat|cow|coyote), 1  is  returned,  and  the  character
-       value can be retrieved using PCRE_INFO_FIRSTCHARACTER.
-
-       If there is no fixed first value, and if either
-
-       (a)  the pattern was compiled with the PCRE_MULTILINE option, and every
-       branch starts with "^", or
-
-       (b) every branch of the pattern starts with ".*" and PCRE_DOTALL is not
-       set (if it were set, the pattern would be anchored),
-
-       2 is returned, indicating that the pattern matches only at the start of
-       a subject string or after any newline within the string. Otherwise 0 is
-       returned. For anchored patterns, 0 is returned.
-
-         PCRE_INFO_FIRSTCHARACTER
-
-       Return   the  fixed  first  character  value  in  the  situation  where
-       PCRE_INFO_FIRSTCHARACTERFLAGS returns 1; otherwise return 0. The fourth
-       argument should point to an uint_t variable.
-
-       In  the 8-bit library, the value is always less than 256. In the 16-bit
-       library the value can be up to 0xffff. In the 32-bit library in  UTF-32
-       mode  the  value  can  be up to 0x10ffff, and up to 0xffffffff when not
-       using UTF-32 mode.
-
          PCRE_INFO_REQUIREDCHARFLAGS
 
-       Returns 1 if there is a rightmost literal data unit that must exist  in
+       Returns  1 if there is a rightmost literal data unit that must exist in
        any matched string, other than at its start. The fourth argument should
-       point to an int variable. If there is no such value, 0 is returned.  If
+       point  to an int variable. If there is no such value, 0 is returned. If
        returning  1,  the  character  value  itself  can  be  retrieved  using
        PCRE_INFO_REQUIREDCHAR.
 
        For anchored patterns, a last literal value is recorded only if it fol-
-       lows  something  of  variable  length.  For  example,  for  the pattern
-       /^a\d+z\d+/  the   returned   value   1   (with   "z"   returned   from
+       lows something  of  variable  length.  For  example,  for  the  pattern
+       /^a\d+z\d+/   the   returned   value   1   (with   "z"   returned  from
        PCRE_INFO_REQUIREDCHAR), but for /^a\dz\d/ the returned value is 0.
 
          PCRE_INFO_REQUIREDCHAR
 
-       Return  the value of the rightmost literal data unit that must exist in
-       any matched string, other than at its start, if such a value  has  been
-       recorded.  The fourth argument should point to an uint32_t variable. If
+       Return the value of the rightmost literal data unit that must exist  in
+       any  matched  string, other than at its start, if such a value has been
+       recorded. The fourth argument should point to an uint32_t variable.  If
        there is no such value, 0 is returned.
 
 
@@ -3012,21 +3029,21 @@ REFERENCE COUNTS
 
        int pcre_refcount(pcre *code, int adjust);
 
-       The pcre_refcount() function is used to maintain a reference  count  in
+       The  pcre_refcount()  function is used to maintain a reference count in
        the data block that contains a compiled pattern. It is provided for the
-       benefit of applications that  operate  in  an  object-oriented  manner,
+       benefit  of  applications  that  operate  in an object-oriented manner,
        where different parts of the application may be using the same compiled
        pattern, but you want to free the block when they are all done.
 
        When a pattern is compiled, the reference count field is initialized to
-       zero.   It is changed only by calling this function, whose action is to
-       add the adjust value (which may be positive or  negative)  to  it.  The
+       zero.  It is changed only by calling this function, whose action is  to
+       add  the  adjust  value  (which may be positive or negative) to it. The
        yield of the function is the new value. However, the value of the count
-       is constrained to lie between 0 and 65535, inclusive. If the new  value
+       is  constrained to lie between 0 and 65535, inclusive. If the new value
        is outside these limits, it is forced to the appropriate limit value.
 
-       Except  when it is zero, the reference count is not correctly preserved
-       if a pattern is compiled on one host and then  transferred  to  a  host
+       Except when it is zero, the reference count is not correctly  preserved
+       if  a  pattern  is  compiled on one host and then transferred to a host
        whose byte-order is different. (This seems a highly unlikely scenario.)
 
 
@@ -3036,22 +3053,22 @@ MATCHING A PATTERN: THE TRADITIONAL FUNCTION
             const char *subject, int length, int startoffset,
             int options, int *ovector, int ovecsize);
 
-       The  function pcre_exec() is called to match a subject string against a
-       compiled pattern, which is passed in the code argument. If the  pattern
-       was  studied,  the  result  of  the study should be passed in the extra
-       argument. You can call pcre_exec() with the same code and  extra  argu-
-       ments  as  many  times as you like, in order to match different subject
+       The function pcre_exec() is called to match a subject string against  a
+       compiled  pattern, which is passed in the code argument. If the pattern
+       was studied, the result of the study should  be  passed  in  the  extra
+       argument.  You  can call pcre_exec() with the same code and extra argu-
+       ments as many times as you like, in order to  match  different  subject
        strings with the same pattern.
 
-       This function is the main matching facility  of  the  library,  and  it
-       operates  in  a  Perl-like  manner. For specialist use there is also an
-       alternative matching function, which is described below in the  section
+       This  function  is  the  main  matching facility of the library, and it
+       operates in a Perl-like manner. For specialist use  there  is  also  an
+       alternative  matching function, which is described below in the section
        about the pcre_dfa_exec() function.
 
-       In  most applications, the pattern will have been compiled (and option-
-       ally studied) in the same process that calls pcre_exec().  However,  it
+       In most applications, the pattern will have been compiled (and  option-
+       ally  studied)  in the same process that calls pcre_exec(). However, it
        is possible to save compiled patterns and study data, and then use them
-       later in different processes, possibly even on different hosts.  For  a
+       later  in  different processes, possibly even on different hosts. For a
        discussion about this, see the pcreprecompile documentation.
 
        Here is an example of a simple call to pcre_exec():
@@ -3070,10 +3087,10 @@ MATCHING A PATTERN: THE TRADITIONAL FUNCTION
 
    Extra data for pcre_exec()
 
-       If  the  extra argument is not NULL, it must point to a pcre_extra data
-       block. The pcre_study() function returns such a block (when it  doesn't
-       return  NULL), but you can also create one for yourself, and pass addi-
-       tional information in it. The pcre_extra block contains  the  following
+       If the extra argument is not NULL, it must point to a  pcre_extra  data
+       block.  The pcre_study() function returns such a block (when it doesn't
+       return NULL), but you can also create one for yourself, and pass  addi-
+       tional  information  in it. The pcre_extra block contains the following
        fields (not necessarily in this order):
 
          unsigned long int flags;
@@ -3085,13 +3102,13 @@ MATCHING A PATTERN: THE TRADITIONAL FUNCTION
          const unsigned char *tables;
          unsigned char **mark;
 
-       In  the  16-bit  version  of  this  structure,  the mark field has type
+       In the 16-bit version of  this  structure,  the  mark  field  has  type
        "PCRE_UCHAR16 **".
 
-       In the 32-bit version of  this  structure,  the  mark  field  has  type
+       In  the  32-bit  version  of  this  structure,  the mark field has type
        "PCRE_UCHAR32 **".
 
-       The  flags  field is used to specify which of the other fields are set.
+       The flags field is used to specify which of the other fields  are  set.
        The flag bits are:
 
          PCRE_EXTRA_CALLOUT_DATA
@@ -3102,134 +3119,134 @@ MATCHING A PATTERN: THE TRADITIONAL FUNCTION
          PCRE_EXTRA_STUDY_DATA
          PCRE_EXTRA_TABLES
 
-       Other flag bits should be set to zero. The study_data field  and  some-
-       times  the executable_jit field are set in the pcre_extra block that is
-       returned by pcre_study(), together with the appropriate flag bits.  You
-       should  not set these yourself, but you may add to the block by setting
+       Other  flag  bits should be set to zero. The study_data field and some-
+       times the executable_jit field are set in the pcre_extra block that  is
+       returned  by pcre_study(), together with the appropriate flag bits. You
+       should not set these yourself, but you may add to the block by  setting
        other fields and their corresponding flag bits.
 
        The match_limit field provides a means of preventing PCRE from using up
-       a  vast amount of resources when running patterns that are not going to
-       match, but which have a very large number  of  possibilities  in  their
-       search  trees. The classic example is a pattern that uses nested unlim-
+       a vast amount of resources when running patterns that are not going  to
+       match,  but  which  have  a very large number of possibilities in their
+       search trees. The classic example is a pattern that uses nested  unlim-
        ited repeats.
 
-       Internally, pcre_exec() uses a function called match(), which it  calls
-       repeatedly  (sometimes  recursively).  The  limit set by match_limit is
-       imposed on the number of times this function is called during a  match,
-       which  has  the  effect of limiting the amount of backtracking that can
+       Internally,  pcre_exec() uses a function called match(), which it calls
+       repeatedly (sometimes recursively). The limit  set  by  match_limit  is
+       imposed  on the number of times this function is called during a match,
+       which has the effect of limiting the amount of  backtracking  that  can
        take place. For patterns that are not anchored, the count restarts from
        zero for each position in the subject string.
 
        When pcre_exec() is called with a pattern that was successfully studied
-       with a JIT option, the way that the matching is  executed  is  entirely
+       with  a  JIT  option, the way that the matching is executed is entirely
        different.  However, there is still the possibility of runaway matching
        that goes on for a very long time, and so the match_limit value is also
        used in this case (but in a different way) to limit how long the match-
        ing can continue.
 
-       The default value for the limit can be set  when  PCRE  is  built;  the
-       default  default  is 10 million, which handles all but the most extreme
-       cases. You can override the default  by  suppling  pcre_exec()  with  a
-       pcre_extra     block    in    which    match_limit    is    set,    and
-       PCRE_EXTRA_MATCH_LIMIT is set in the  flags  field.  If  the  limit  is
+       The  default  value  for  the  limit can be set when PCRE is built; the
+       default default is 10 million, which handles all but the  most  extreme
+       cases.  You  can  override  the  default by suppling pcre_exec() with a
+       pcre_extra    block    in    which    match_limit    is    set,     and
+       PCRE_EXTRA_MATCH_LIMIT  is  set  in  the  flags  field. If the limit is
        exceeded, pcre_exec() returns PCRE_ERROR_MATCHLIMIT.
 
-       A  value  for  the  match  limit may also be supplied by an item at the
+       A value for the match limit may also be supplied  by  an  item  at  the
        start of a pattern of the form
 
          (*LIMIT_MATCH=d)
 
-       where d is a decimal number. However, such a setting is ignored  unless
-       d  is  less  than  the limit set by the caller of pcre_exec() or, if no
+       where  d is a decimal number. However, such a setting is ignored unless
+       d is less than the limit set by the caller of  pcre_exec()  or,  if  no
        such limit is set, less than the default.
 
-       The match_limit_recursion field is similar to match_limit, but  instead
+       The  match_limit_recursion field is similar to match_limit, but instead
        of limiting the total number of times that match() is called, it limits
-       the depth of recursion. The recursion depth is a  smaller  number  than
-       the  total number of calls, because not all calls to match() are recur-
+       the  depth  of  recursion. The recursion depth is a smaller number than
+       the total number of calls, because not all calls to match() are  recur-
        sive.  This limit is of use only if it is set smaller than match_limit.
 
-       Limiting the recursion depth limits the amount of  machine  stack  that
-       can  be used, or, when PCRE has been compiled to use memory on the heap
-       instead of the stack, the amount of heap memory that can be used.  This
-       limit  is not relevant, and is ignored, when matching is done using JIT
+       Limiting  the  recursion  depth limits the amount of machine stack that
+       can be used, or, when PCRE has been compiled to use memory on the  heap
+       instead  of the stack, the amount of heap memory that can be used. This
+       limit is not relevant, and is ignored, when matching is done using  JIT
        compiled code.
 
-       The default value for match_limit_recursion can be  set  when  PCRE  is
-       built;  the  default  default  is  the  same  value  as the default for
-       match_limit. You can override the default by suppling pcre_exec()  with
-       a   pcre_extra   block  in  which  match_limit_recursion  is  set,  and
-       PCRE_EXTRA_MATCH_LIMIT_RECURSION is set in  the  flags  field.  If  the
+       The  default  value  for  match_limit_recursion can be set when PCRE is
+       built; the default default  is  the  same  value  as  the  default  for
+       match_limit.  You can override the default by suppling pcre_exec() with
+       a  pcre_extra  block  in  which  match_limit_recursion  is   set,   and
+       PCRE_EXTRA_MATCH_LIMIT_RECURSION  is  set  in  the  flags field. If the
        limit is exceeded, pcre_exec() returns PCRE_ERROR_RECURSIONLIMIT.
 
-       A  value for the recursion limit may also be supplied by an item at the
+       A value for the recursion limit may also be supplied by an item at  the
        start of a pattern of the form
 
          (*LIMIT_RECURSION=d)
 
-       where d is a decimal number. However, such a setting is ignored  unless
-       d  is  less  than  the limit set by the caller of pcre_exec() or, if no
+       where  d is a decimal number. However, such a setting is ignored unless
+       d is less than the limit set by the caller of  pcre_exec()  or,  if  no
        such limit is set, less than the default.
 
-       The callout_data field is used in conjunction with the  "callout"  fea-
+       The  callout_data  field is used in conjunction with the "callout" fea-
        ture, and is described in the pcrecallout documentation.
 
-       The  tables field is provided for use with patterns that have been pre-
+       The tables field is provided for use with patterns that have been  pre-
        compiled using custom character tables, saved to disc or elsewhere, and
-       then  reloaded,  because the tables that were used to compile a pattern
-       are not saved with it. See the pcreprecompile documentation for a  dis-
-       cussion  of  saving  compiled patterns for later use. If NULL is passed
+       then reloaded, because the tables that were used to compile  a  pattern
+       are  not saved with it. See the pcreprecompile documentation for a dis-
+       cussion of saving compiled patterns for later use. If  NULL  is  passed
        using this mechanism, it forces PCRE's internal tables to be used.
 
-       Warning: The tables that pcre_exec() uses must be  the  same  as  those
-       that  were used when the pattern was compiled. If this is not the case,
+       Warning:  The  tables  that  pcre_exec() uses must be the same as those
+       that were used when the pattern was compiled. If this is not the  case,
        the behaviour of pcre_exec() is undefined. Therefore, when a pattern is
-       compiled  and  matched  in the same process, this field should never be
+       compiled and matched in the same process, this field  should  never  be
        set. In this (the most common) case, the correct table pointer is auto-
-       matically  passed  with  the  compiled  pattern  from pcre_compile() to
+       matically passed with  the  compiled  pattern  from  pcre_compile()  to
        pcre_exec().
 
-       If PCRE_EXTRA_MARK is set in the flags field, the mark  field  must  be
-       set  to point to a suitable variable. If the pattern contains any back-
-       tracking control verbs such as (*MARK:NAME), and the execution ends  up
-       with  a  name  to  pass back, a pointer to the name string (zero termi-
-       nated) is placed in the variable pointed to  by  the  mark  field.  The
-       names  are  within  the  compiled pattern; if you wish to retain such a
-       name you must copy it before freeing the memory of a compiled  pattern.
-       If  there  is no name to pass back, the variable pointed to by the mark
-       field is set to NULL. For details of the  backtracking  control  verbs,
+       If  PCRE_EXTRA_MARK  is  set in the flags field, the mark field must be
+       set to point to a suitable variable. If the pattern contains any  back-
+       tracking  control verbs such as (*MARK:NAME), and the execution ends up
+       with a name to pass back, a pointer to the  name  string  (zero  termi-
+       nated)  is  placed  in  the  variable pointed to by the mark field. The
+       names are within the compiled pattern; if you wish  to  retain  such  a
+       name  you must copy it before freeing the memory of a compiled pattern.
+       If there is no name to pass back, the variable pointed to by  the  mark
+       field  is  set  to NULL. For details of the backtracking control verbs,
        see the section entitled "Backtracking control" in the pcrepattern doc-
        umentation.
 
    Option bits for pcre_exec()
 
-       The unused bits of the options argument for pcre_exec() must  be  zero.
-       The  only  bits  that  may  be set are PCRE_ANCHORED, PCRE_NEWLINE_xxx,
-       PCRE_NOTBOL,   PCRE_NOTEOL,    PCRE_NOTEMPTY,    PCRE_NOTEMPTY_ATSTART,
-       PCRE_NO_START_OPTIMIZE,   PCRE_NO_UTF8_CHECK,   PCRE_PARTIAL_HARD,  and
+       The  unused  bits of the options argument for pcre_exec() must be zero.
+       The only bits that may  be  set  are  PCRE_ANCHORED,  PCRE_NEWLINE_xxx,
+       PCRE_NOTBOL,    PCRE_NOTEOL,    PCRE_NOTEMPTY,   PCRE_NOTEMPTY_ATSTART,
+       PCRE_NO_START_OPTIMIZE,  PCRE_NO_UTF8_CHECK,   PCRE_PARTIAL_HARD,   and
        PCRE_PARTIAL_SOFT.
 
-       If the pattern was successfully studied with one  of  the  just-in-time
+       If  the  pattern  was successfully studied with one of the just-in-time
        (JIT) compile options, the only supported options for JIT execution are
-       PCRE_NO_UTF8_CHECK,    PCRE_NOTBOL,     PCRE_NOTEOL,     PCRE_NOTEMPTY,
-       PCRE_NOTEMPTY_ATSTART,  PCRE_PARTIAL_HARD, and PCRE_PARTIAL_SOFT. If an
-       unsupported option is used, JIT execution is disabled  and  the  normal
+       PCRE_NO_UTF8_CHECK,     PCRE_NOTBOL,     PCRE_NOTEOL,    PCRE_NOTEMPTY,
+       PCRE_NOTEMPTY_ATSTART, PCRE_PARTIAL_HARD, and PCRE_PARTIAL_SOFT. If  an
+       unsupported  option  is  used, JIT execution is disabled and the normal
        interpretive code in pcre_exec() is run.
 
          PCRE_ANCHORED
 
-       The  PCRE_ANCHORED  option  limits pcre_exec() to matching at the first
-       matching position. If a pattern was  compiled  with  PCRE_ANCHORED,  or
-       turned  out to be anchored by virtue of its contents, it cannot be made
+       The PCRE_ANCHORED option limits pcre_exec() to matching  at  the  first
+       matching  position.  If  a  pattern was compiled with PCRE_ANCHORED, or
+       turned out to be anchored by virtue of its contents, it cannot be  made
        unachored at matching time.
 
          PCRE_BSR_ANYCRLF
          PCRE_BSR_UNICODE
 
        These options (which are mutually exclusive) control what the \R escape
-       sequence  matches.  The choice is either to match only CR, LF, or CRLF,
-       or to match any Unicode newline sequence. These  options  override  the
+       sequence matches. The choice is either to match only CR, LF,  or  CRLF,
+       or  to  match  any Unicode newline sequence. These options override the
        choice that was made or defaulted when the pattern was compiled.
 
          PCRE_NEWLINE_CR
@@ -3238,345 +3255,345 @@ MATCHING A PATTERN: THE TRADITIONAL FUNCTION
          PCRE_NEWLINE_ANYCRLF
          PCRE_NEWLINE_ANY
 
-       These  options  override  the  newline  definition  that  was chosen or
-       defaulted when the pattern was compiled. For details, see the  descrip-
-       tion  of  pcre_compile()  above.  During  matching,  the newline choice
-       affects the behaviour of the dot, circumflex,  and  dollar  metacharac-
-       ters.  It may also alter the way the match position is advanced after a
+       These options override  the  newline  definition  that  was  chosen  or
+       defaulted  when the pattern was compiled. For details, see the descrip-
+       tion of pcre_compile()  above.  During  matching,  the  newline  choice
+       affects  the  behaviour  of the dot, circumflex, and dollar metacharac-
+       ters. It may also alter the way the match position is advanced after  a
        match failure for an unanchored pattern.
 
-       When PCRE_NEWLINE_CRLF, PCRE_NEWLINE_ANYCRLF,  or  PCRE_NEWLINE_ANY  is
-       set,  and a match attempt for an unanchored pattern fails when the cur-
-       rent position is at a  CRLF  sequence,  and  the  pattern  contains  no
-       explicit  matches  for  CR  or  LF  characters,  the  match position is
+       When  PCRE_NEWLINE_CRLF,  PCRE_NEWLINE_ANYCRLF,  or PCRE_NEWLINE_ANY is
+       set, and a match attempt for an unanchored pattern fails when the  cur-
+       rent  position  is  at  a  CRLF  sequence,  and the pattern contains no
+       explicit matches for  CR  or  LF  characters,  the  match  position  is
        advanced by two characters instead of one, in other words, to after the
        CRLF.
 
        The above rule is a compromise that makes the most common cases work as
-       expected. For example, if the  pattern  is  .+A  (and  the  PCRE_DOTALL
+       expected.  For  example,  if  the  pattern  is .+A (and the PCRE_DOTALL
        option is not set), it does not match the string "\r\nA" because, after
-       failing at the start, it skips both the CR and the LF before  retrying.
-       However,  the  pattern  [\r\n]A does match that string, because it con-
+       failing  at the start, it skips both the CR and the LF before retrying.
+       However, the pattern [\r\n]A does match that string,  because  it  con-
        tains an explicit CR or LF reference, and so advances only by one char-
        acter after the first failure.
 
        An explicit match for CR of LF is either a literal appearance of one of
-       those characters, or one of the \r or  \n  escape  sequences.  Implicit
-       matches  such  as [^X] do not count, nor does \s (which includes CR and
+       those  characters,  or  one  of the \r or \n escape sequences. Implicit
+       matches such as [^X] do not count, nor does \s (which includes  CR  and
        LF in the characters that it matches).
 
-       Notwithstanding the above, anomalous effects may still occur when  CRLF
+       Notwithstanding  the above, anomalous effects may still occur when CRLF
        is a valid newline sequence and explicit \r or \n escapes appear in the
        pattern.
 
          PCRE_NOTBOL
 
        This option specifies that first character of the subject string is not
-       the  beginning  of  a  line, so the circumflex metacharacter should not
-       match before it. Setting this without PCRE_MULTILINE (at compile  time)
-       causes  circumflex  never to match. This option affects only the behav-
+       the beginning of a line, so the  circumflex  metacharacter  should  not
+       match  before it. Setting this without PCRE_MULTILINE (at compile time)
+       causes circumflex never to match. This option affects only  the  behav-
        iour of the circumflex metacharacter. It does not affect \A.
 
          PCRE_NOTEOL
 
        This option specifies that the end of the subject string is not the end
-       of  a line, so the dollar metacharacter should not match it nor (except
-       in multiline mode) a newline immediately before it. Setting this  with-
+       of a line, so the dollar metacharacter should not match it nor  (except
+       in  multiline mode) a newline immediately before it. Setting this with-
        out PCRE_MULTILINE (at compile time) causes dollar never to match. This
-       option affects only the behaviour of the dollar metacharacter. It  does
+       option  affects only the behaviour of the dollar metacharacter. It does
        not affect \Z or \z.
 
          PCRE_NOTEMPTY
 
        An empty string is not considered to be a valid match if this option is
-       set. If there are alternatives in the pattern, they are tried.  If  all
-       the  alternatives  match  the empty string, the entire match fails. For
+       set.  If  there are alternatives in the pattern, they are tried. If all
+       the alternatives match the empty string, the entire  match  fails.  For
        example, if the pattern
 
          a?b?
 
-       is applied to a string not beginning with "a" or  "b",  it  matches  an
-       empty  string at the start of the subject. With PCRE_NOTEMPTY set, this
+       is  applied  to  a  string not beginning with "a" or "b", it matches an
+       empty string at the start of the subject. With PCRE_NOTEMPTY set,  this
        match is not valid, so PCRE searches further into the string for occur-
        rences of "a" or "b".
 
          PCRE_NOTEMPTY_ATSTART
 
-       This  is  like PCRE_NOTEMPTY, except that an empty string match that is
-       not at the start of  the  subject  is  permitted.  If  the  pattern  is
+       This is like PCRE_NOTEMPTY, except that an empty string match  that  is
+       not  at  the  start  of  the  subject  is  permitted. If the pattern is
        anchored, such a match can occur only if the pattern contains \K.
 
-       Perl     has    no    direct    equivalent    of    PCRE_NOTEMPTY    or
-       PCRE_NOTEMPTY_ATSTART, but it does make a special  case  of  a  pattern
-       match  of  the empty string within its split() function, and when using
-       the /g modifier. It is  possible  to  emulate  Perl's  behaviour  after
+       Perl    has    no    direct    equivalent    of    PCRE_NOTEMPTY     or
+       PCRE_NOTEMPTY_ATSTART,  but  it  does  make a special case of a pattern
+       match of the empty string within its split() function, and  when  using
+       the  /g  modifier.  It  is  possible  to emulate Perl's behaviour after
        matching a null string by first trying the match again at the same off-
-       set with PCRE_NOTEMPTY_ATSTART and  PCRE_ANCHORED,  and  then  if  that
+       set  with  PCRE_NOTEMPTY_ATSTART  and  PCRE_ANCHORED,  and then if that
        fails, by advancing the starting offset (see below) and trying an ordi-
-       nary match again. There is some code that demonstrates how to  do  this
-       in  the  pcredemo sample program. In the most general case, you have to
-       check to see if the newline convention recognizes CRLF  as  a  newline,
-       and  if so, and the current character is CR followed by LF, advance the
+       nary  match  again. There is some code that demonstrates how to do this
+       in the pcredemo sample program. In the most general case, you  have  to
+       check  to  see  if the newline convention recognizes CRLF as a newline,
+       and if so, and the current character is CR followed by LF, advance  the
        starting offset by two characters instead of one.
 
          PCRE_NO_START_OPTIMIZE
 
-       There are a number of optimizations that pcre_exec() uses at the  start
-       of  a  match,  in  order to speed up the process. For example, if it is
+       There  are a number of optimizations that pcre_exec() uses at the start
+       of a match, in order to speed up the process. For  example,  if  it  is
        known that an unanchored match must start with a specific character, it
-       searches  the  subject  for that character, and fails immediately if it
-       cannot find it, without actually running the  main  matching  function.
+       searches the subject for that character, and fails  immediately  if  it
+       cannot  find  it,  without actually running the main matching function.
        This means that a special item such as (*COMMIT) at the start of a pat-
-       tern is not considered until after a suitable starting  point  for  the
-       match  has been found. Also, when callouts or (*MARK) items are in use,
+       tern  is  not  considered until after a suitable starting point for the
+       match has been found. Also, when callouts or (*MARK) items are in  use,
        these "start-up" optimizations can cause them to be skipped if the pat-
        tern is never actually used. The start-up optimizations are in effect a
        pre-scan of the subject that takes place before the pattern is run.
 
-       The PCRE_NO_START_OPTIMIZE option disables the start-up  optimizations,
-       possibly  causing  performance  to  suffer,  but ensuring that in cases
-       where the result is "no match", the callouts do occur, and  that  items
+       The  PCRE_NO_START_OPTIMIZE option disables the start-up optimizations,
+       possibly causing performance to suffer,  but  ensuring  that  in  cases
+       where  the  result is "no match", the callouts do occur, and that items
        such as (*COMMIT) and (*MARK) are considered at every possible starting
-       position in the subject string. If  PCRE_NO_START_OPTIMIZE  is  set  at
-       compile  time,  it  cannot  be  unset  at  matching  time.  The  use of
+       position  in  the  subject  string. If PCRE_NO_START_OPTIMIZE is set at
+       compile time,  it  cannot  be  unset  at  matching  time.  The  use  of
        PCRE_NO_START_OPTIMIZE  at  matching  time  (that  is,  passing  it  to
-       pcre_exec())  disables  JIT  execution;  in this situation, matching is
+       pcre_exec()) disables JIT execution; in  this  situation,  matching  is
        always done using interpretively.
 
-       Setting PCRE_NO_START_OPTIMIZE can change the  outcome  of  a  matching
+       Setting  PCRE_NO_START_OPTIMIZE  can  change  the outcome of a matching
        operation.  Consider the pattern
 
          (*COMMIT)ABC
 
-       When  this  is  compiled, PCRE records the fact that a match must start
-       with the character "A". Suppose the subject  string  is  "DEFABC".  The
-       start-up  optimization  scans along the subject, finds "A" and runs the
-       first match attempt from there. The (*COMMIT) item means that the  pat-
-       tern  must  match the current starting position, which in this case, it
-       does. However, if the same match  is  run  with  PCRE_NO_START_OPTIMIZE
-       set,  the  initial  scan  along the subject string does not happen. The
-       first match attempt is run starting  from  "D"  and  when  this  fails,
-       (*COMMIT)  prevents  any  further  matches  being tried, so the overall
-       result is "no match". If the pattern is studied,  more  start-up  opti-
-       mizations  may  be  used. For example, a minimum length for the subject
+       When this is compiled, PCRE records the fact that a  match  must  start
+       with  the  character  "A".  Suppose the subject string is "DEFABC". The
+       start-up optimization scans along the subject, finds "A" and  runs  the
+       first  match attempt from there. The (*COMMIT) item means that the pat-
+       tern must match the current starting position, which in this  case,  it
+       does.  However,  if  the  same match is run with PCRE_NO_START_OPTIMIZE
+       set, the initial scan along the subject string  does  not  happen.  The
+       first  match  attempt  is  run  starting  from "D" and when this fails,
+       (*COMMIT) prevents any further matches  being  tried,  so  the  overall
+       result  is  "no  match". If the pattern is studied, more start-up opti-
+       mizations may be used. For example, a minimum length  for  the  subject
        may be recorded. Consider the pattern
 
          (*MARK:A)(X|Y)
 
-       The minimum length for a match is one  character.  If  the  subject  is
-       "ABC",  there  will  be  attempts  to  match "ABC", "BC", "C", and then
-       finally an empty string.  If the pattern is studied, the final  attempt
-       does  not take place, because PCRE knows that the subject is too short,
-       and so the (*MARK) is never encountered.  In this  case,  studying  the
-       pattern  does  not  affect the overall match result, which is still "no
+       The  minimum  length  for  a  match is one character. If the subject is
+       "ABC", there will be attempts to  match  "ABC",  "BC",  "C",  and  then
+       finally  an empty string.  If the pattern is studied, the final attempt
+       does not take place, because PCRE knows that the subject is too  short,
+       and  so  the  (*MARK) is never encountered.  In this case, studying the
+       pattern does not affect the overall match result, which  is  still  "no
        match", but it does affect the auxiliary information that is returned.
 
          PCRE_NO_UTF8_CHECK
 
        When PCRE_UTF8 is set at compile time, the validity of the subject as a
-       UTF-8  string is automatically checked when pcre_exec() is subsequently
+       UTF-8 string is automatically checked when pcre_exec() is  subsequently
        called.  The entire string is checked before any other processing takes
-       place.  The  value  of  startoffset  is  also checked to ensure that it
-       points to the start of a UTF-8 character. There is a  discussion  about
-       the  validity  of  UTF-8 strings in the pcreunicode page. If an invalid
-       sequence  of  bytes   is   found,   pcre_exec()   returns   the   error
+       place. The value of startoffset is  also  checked  to  ensure  that  it
+       points  to  the start of a UTF-8 character. There is a discussion about
+       the validity of UTF-8 strings in the pcreunicode page.  If  an  invalid
+       sequence   of   bytes   is   found,   pcre_exec()   returns  the  error
        PCRE_ERROR_BADUTF8 or, if PCRE_PARTIAL_HARD is set and the problem is a
        truncated character at the end of the subject, PCRE_ERROR_SHORTUTF8. In
-       both  cases, information about the precise nature of the error may also
-       be returned (see the descriptions of these errors in the section  enti-
-       tled  Error return values from pcre_exec() below).  If startoffset con-
+       both cases, information about the precise nature of the error may  also
+       be  returned (see the descriptions of these errors in the section enti-
+       tled Error return values from pcre_exec() below).  If startoffset  con-
        tains a value that does not point to the start of a UTF-8 character (or
        to the end of the subject), PCRE_ERROR_BADUTF8_OFFSET is returned.
 
-       If  you  already  know that your subject is valid, and you want to skip
-       these   checks   for   performance   reasons,   you   can    set    the
-       PCRE_NO_UTF8_CHECK  option  when calling pcre_exec(). You might want to
-       do this for the second and subsequent calls to pcre_exec() if  you  are
-       making  repeated  calls  to  find  all  the matches in a single subject
-       string. However, you should be  sure  that  the  value  of  startoffset
-       points  to  the  start of a character (or the end of the subject). When
+       If you already know that your subject is valid, and you  want  to  skip
+       these    checks    for   performance   reasons,   you   can   set   the
+       PCRE_NO_UTF8_CHECK option when calling pcre_exec(). You might  want  to
+       do  this  for the second and subsequent calls to pcre_exec() if you are
+       making repeated calls to find all  the  matches  in  a  single  subject
+       string.  However,  you  should  be  sure  that the value of startoffset
+       points to the start of a character (or the end of  the  subject).  When
        PCRE_NO_UTF8_CHECK is set, the effect of passing an invalid string as a
-       subject  or  an invalid value of startoffset is undefined. Your program
+       subject or an invalid value of startoffset is undefined.  Your  program
        may crash or loop.
 
          PCRE_PARTIAL_HARD
          PCRE_PARTIAL_SOFT
 
-       These options turn on the partial matching feature. For backwards  com-
-       patibility,  PCRE_PARTIAL is a synonym for PCRE_PARTIAL_SOFT. A partial
-       match occurs if the end of the subject string is reached  successfully,
-       but  there  are not enough subject characters to complete the match. If
+       These  options turn on the partial matching feature. For backwards com-
+       patibility, PCRE_PARTIAL is a synonym for PCRE_PARTIAL_SOFT. A  partial
+       match  occurs if the end of the subject string is reached successfully,
+       but there are not enough subject characters to complete the  match.  If
        this happens when PCRE_PARTIAL_SOFT (but not PCRE_PARTIAL_HARD) is set,
-       matching  continues  by  testing any remaining alternatives. Only if no
-       complete match can be found is PCRE_ERROR_PARTIAL returned  instead  of
-       PCRE_ERROR_NOMATCH.  In  other  words,  PCRE_PARTIAL_SOFT says that the
-       caller is prepared to handle a partial match, but only if  no  complete
+       matching continues by testing any remaining alternatives.  Only  if  no
+       complete  match  can be found is PCRE_ERROR_PARTIAL returned instead of
+       PCRE_ERROR_NOMATCH. In other words,  PCRE_PARTIAL_SOFT  says  that  the
+       caller  is  prepared to handle a partial match, but only if no complete
        match can be found.
 
-       If  PCRE_PARTIAL_HARD  is  set, it overrides PCRE_PARTIAL_SOFT. In this
-       case, if a partial match  is  found,  pcre_exec()  immediately  returns
-       PCRE_ERROR_PARTIAL,  without  considering  any  other  alternatives. In
-       other words, when PCRE_PARTIAL_HARD is set, a partial match is  consid-
+       If PCRE_PARTIAL_HARD is set, it overrides  PCRE_PARTIAL_SOFT.  In  this
+       case,  if  a  partial  match  is found, pcre_exec() immediately returns
+       PCRE_ERROR_PARTIAL, without  considering  any  other  alternatives.  In
+       other  words, when PCRE_PARTIAL_HARD is set, a partial match is consid-
        ered to be more important that an alternative complete match.
 
-       In  both  cases,  the portion of the string that was inspected when the
+       In both cases, the portion of the string that was  inspected  when  the
        partial match was found is set as the first matching string. There is a
-       more  detailed  discussion  of partial and multi-segment matching, with
+       more detailed discussion of partial and  multi-segment  matching,  with
        examples, in the pcrepartial documentation.
 
    The string to be matched by pcre_exec()
 
-       The subject string is passed to pcre_exec() as a pointer in subject,  a
-       length  in  length, and a starting offset in startoffset. The units for
-       length and startoffset are bytes for the  8-bit  library,  16-bit  data
-       items  for  the  16-bit  library,  and 32-bit data items for the 32-bit
+       The  subject string is passed to pcre_exec() as a pointer in subject, a
+       length in length, and a starting offset in startoffset. The  units  for
+       length  and  startoffset  are  bytes for the 8-bit library, 16-bit data
+       items for the 16-bit library, and 32-bit  data  items  for  the  32-bit
        library.
 
-       If startoffset is negative or greater than the length of  the  subject,
-       pcre_exec()  returns  PCRE_ERROR_BADOFFSET. When the starting offset is
-       zero, the search for a match starts at the beginning  of  the  subject,
-       and  this  is by far the most common case. In UTF-8 or UTF-16 mode, the
-       offset must point to the start of a character, or the end of  the  sub-
-       ject  (in  UTF-32 mode, one data unit equals one character, so all off-
-       sets are valid). Unlike the pattern string,  the  subject  may  contain
+       If  startoffset  is negative or greater than the length of the subject,
+       pcre_exec() returns PCRE_ERROR_BADOFFSET. When the starting  offset  is
+       zero,  the  search  for a match starts at the beginning of the subject,
+       and this is by far the most common case. In UTF-8 or UTF-16  mode,  the
+       offset  must  point to the start of a character, or the end of the sub-
+       ject (in UTF-32 mode, one data unit equals one character, so  all  off-
+       sets  are  valid).  Unlike  the pattern string, the subject may contain
        binary zeroes.
 
-       A  non-zero  starting offset is useful when searching for another match
-       in the same subject by calling pcre_exec() again after a previous  suc-
-       cess.   Setting  startoffset differs from just passing over a shortened
-       string and setting PCRE_NOTBOL in the case of  a  pattern  that  begins
+       A non-zero starting offset is useful when searching for  another  match
+       in  the same subject by calling pcre_exec() again after a previous suc-
+       cess.  Setting startoffset differs from just passing over  a  shortened
+       string  and  setting  PCRE_NOTBOL  in the case of a pattern that begins
        with any kind of lookbehind. For example, consider the pattern
 
          \Biss\B
 
-       which  finds  occurrences  of "iss" in the middle of words. (\B matches
-       only if the current position in the subject is not  a  word  boundary.)
-       When  applied  to the string "Mississipi" the first call to pcre_exec()
-       finds the first occurrence. If pcre_exec() is called  again  with  just
-       the  remainder  of  the  subject,  namely  "issipi", it does not match,
+       which finds occurrences of "iss" in the middle of  words.  (\B  matches
+       only  if  the  current position in the subject is not a word boundary.)
+       When applied to the string "Mississipi" the first call  to  pcre_exec()
+       finds  the  first  occurrence. If pcre_exec() is called again with just
+       the remainder of the subject,  namely  "issipi",  it  does  not  match,
        because \B is always false at the start of the subject, which is deemed
-       to  be  a  word  boundary. However, if pcre_exec() is passed the entire
+       to be a word boundary. However, if pcre_exec()  is  passed  the  entire
        string again, but with startoffset set to 4, it finds the second occur-
-       rence  of "iss" because it is able to look behind the starting point to
+       rence of "iss" because it is able to look behind the starting point  to
        discover that it is preceded by a letter.
 
-       Finding all the matches in a subject is tricky  when  the  pattern  can
+       Finding  all  the  matches  in a subject is tricky when the pattern can
        match an empty string. It is possible to emulate Perl's /g behaviour by
-       first  trying  the  match  again  at  the   same   offset,   with   the
-       PCRE_NOTEMPTY_ATSTART  and  PCRE_ANCHORED  options,  and  then  if that
-       fails, advancing the starting  offset  and  trying  an  ordinary  match
+       first   trying   the   match   again  at  the  same  offset,  with  the
+       PCRE_NOTEMPTY_ATSTART and  PCRE_ANCHORED  options,  and  then  if  that
+       fails,  advancing  the  starting  offset  and  trying an ordinary match
        again. There is some code that demonstrates how to do this in the pcre-
        demo sample program. In the most general case, you have to check to see
-       if  the newline convention recognizes CRLF as a newline, and if so, and
+       if the newline convention recognizes CRLF as a newline, and if so,  and
        the current character is CR followed by LF, advance the starting offset
        by two characters instead of one.
 
-       If  a  non-zero starting offset is passed when the pattern is anchored,
+       If a non-zero starting offset is passed when the pattern  is  anchored,
        one attempt to match at the given offset is made. This can only succeed
-       if  the  pattern  does  not require the match to be at the start of the
+       if the pattern does not require the match to be at  the  start  of  the
        subject.
 
    How pcre_exec() returns captured substrings
 
-       In general, a pattern matches a certain portion of the subject, and  in
-       addition,  further  substrings  from  the  subject may be picked out by
-       parts of the pattern. Following the usage  in  Jeffrey  Friedl's  book,
-       this  is  called "capturing" in what follows, and the phrase "capturing
-       subpattern" is used for a fragment of a pattern that picks out  a  sub-
-       string.  PCRE  supports several other kinds of parenthesized subpattern
+       In  general, a pattern matches a certain portion of the subject, and in
+       addition, further substrings from the subject  may  be  picked  out  by
+       parts  of  the  pattern.  Following the usage in Jeffrey Friedl's book,
+       this is called "capturing" in what follows, and the  phrase  "capturing
+       subpattern"  is  used for a fragment of a pattern that picks out a sub-
+       string. PCRE supports several other kinds of  parenthesized  subpattern
        that do not cause substrings to be captured.
 
        Captured substrings are returned to the caller via a vector of integers
-       whose  address is passed in ovector. The number of elements in the vec-
-       tor is passed in ovecsize, which must be a non-negative  number.  Note:
+       whose address is passed in ovector. The number of elements in the  vec-
+       tor  is  passed in ovecsize, which must be a non-negative number. Note:
        this argument is NOT the size of ovector in bytes.
 
-       The  first  two-thirds of the vector is used to pass back captured sub-
-       strings, each substring using a pair of integers. The  remaining  third
-       of  the  vector is used as workspace by pcre_exec() while matching cap-
-       turing subpatterns, and is not available for passing back  information.
-       The  number passed in ovecsize should always be a multiple of three. If
+       The first two-thirds of the vector is used to pass back  captured  sub-
+       strings,  each  substring using a pair of integers. The remaining third
+       of the vector is used as workspace by pcre_exec() while  matching  cap-
+       turing  subpatterns, and is not available for passing back information.
+       The number passed in ovecsize should always be a multiple of three.  If
        it is not, it is rounded down.
 
-       When a match is successful, information about  captured  substrings  is
-       returned  in  pairs  of integers, starting at the beginning of ovector,
-       and continuing up to two-thirds of its length at the  most.  The  first
-       element  of  each pair is set to the offset of the first character in a
-       substring, and the second is set to the offset of the  first  character
-       after  the  end  of a substring. These values are always data unit off-
-       sets, even in UTF mode. They are byte offsets  in  the  8-bit  library,
-       16-bit  data  item  offsets in the 16-bit library, and 32-bit data item
+       When  a  match  is successful, information about captured substrings is
+       returned in pairs of integers, starting at the  beginning  of  ovector,
+       and  continuing  up  to two-thirds of its length at the most. The first
+       element of each pair is set to the offset of the first character  in  a
+       substring,  and  the second is set to the offset of the first character
+       after the end of a substring. These values are always  data  unit  off-
+       sets,  even  in  UTF  mode. They are byte offsets in the 8-bit library,
+       16-bit data item offsets in the 16-bit library, and  32-bit  data  item
        offsets in the 32-bit library. Note: they are not character counts.
 
-       The first pair of integers, ovector[0]  and  ovector[1],  identify  the
-       portion  of  the subject string matched by the entire pattern. The next
-       pair is used for the first capturing subpattern, and so on.  The  value
+       The  first  pair  of  integers, ovector[0] and ovector[1], identify the
+       portion of the subject string matched by the entire pattern.  The  next
+       pair  is  used for the first capturing subpattern, and so on. The value
        returned by pcre_exec() is one more than the highest numbered pair that
-       has been set.  For example, if two substrings have been  captured,  the
-       returned  value is 3. If there are no capturing subpatterns, the return
+       has  been  set.  For example, if two substrings have been captured, the
+       returned value is 3. If there are no capturing subpatterns, the  return
        value from a successful match is 1, indicating that just the first pair
        of offsets has been set.
 
        If a capturing subpattern is matched repeatedly, it is the last portion
        of the string that it matched that is returned.
 
-       If the vector is too small to hold all the captured substring  offsets,
+       If  the vector is too small to hold all the captured substring offsets,
        it is used as far as possible (up to two-thirds of its length), and the
-       function returns a value of zero. If neither the actual string  matched
-       nor  any captured substrings are of interest, pcre_exec() may be called
-       with ovector passed as NULL and ovecsize as zero. However, if the  pat-
-       tern  contains  back  references  and  the ovector is not big enough to
-       remember the related substrings, PCRE has to get additional memory  for
-       use  during matching. Thus it is usually advisable to supply an ovector
+       function  returns a value of zero. If neither the actual string matched
+       nor any captured substrings are of interest, pcre_exec() may be  called
+       with  ovector passed as NULL and ovecsize as zero. However, if the pat-
+       tern contains back references and the ovector  is  not  big  enough  to
+       remember  the related substrings, PCRE has to get additional memory for
+       use during matching. Thus it is usually advisable to supply an  ovector
        of reasonable size.
 
-       There are some cases where zero is returned  (indicating  vector  over-
-       flow)  when  in fact the vector is exactly the right size for the final
+       There  are  some  cases where zero is returned (indicating vector over-
+       flow) when in fact the vector is exactly the right size for  the  final
        match. For example, consider the pattern
 
          (a)(?:(b)c|bd)
 
-       If a vector of 6 elements (allowing for only 1 captured  substring)  is
+       If  a  vector of 6 elements (allowing for only 1 captured substring) is
        given with subject string "abd", pcre_exec() will try to set the second
        captured string, thereby recording a vector overflow, before failing to
-       match  "c"  and  backing  up  to  try  the second alternative. The zero
-       return, however, does correctly indicate that  the  maximum  number  of
+       match "c" and backing up  to  try  the  second  alternative.  The  zero
+       return,  however,  does  correctly  indicate that the maximum number of
        slots (namely 2) have been filled. In similar cases where there is tem-
-       porary overflow, but the final number of used slots  is  actually  less
+       porary  overflow,  but  the final number of used slots is actually less
        than the maximum, a non-zero value is returned.
 
        The pcre_fullinfo() function can be used to find out how many capturing
-       subpatterns there are in a compiled  pattern.  The  smallest  size  for
-       ovector  that  will allow for n captured substrings, in addition to the
+       subpatterns  there  are  in  a  compiled pattern. The smallest size for
+       ovector that will allow for n captured substrings, in addition  to  the
        offsets of the substring matched by the whole pattern, is (n+1)*3.
 
-       It is possible for capturing subpattern number n+1 to match  some  part
+       It  is  possible for capturing subpattern number n+1 to match some part
        of the subject when subpattern n has not been used at all. For example,
-       if the string "abc" is matched  against  the  pattern  (a|(z))(bc)  the
+       if  the  string  "abc"  is  matched against the pattern (a|(z))(bc) the
        return from the function is 4, and subpatterns 1 and 3 are matched, but
-       2 is not. When this happens, both values in  the  offset  pairs  corre-
+       2  is  not.  When  this happens, both values in the offset pairs corre-
        sponding to unused subpatterns are set to -1.
 
-       Offset  values  that correspond to unused subpatterns at the end of the
-       expression are also set to -1. For example,  if  the  string  "abc"  is
-       matched  against the pattern (abc)(x(yz)?)? subpatterns 2 and 3 are not
-       matched. The return from the function is 2, because  the  highest  used
-       capturing  subpattern  number  is 1, and the offsets for for the second
-       and third capturing subpatterns (assuming the vector is  large  enough,
+       Offset values that correspond to unused subpatterns at the end  of  the
+       expression  are  also  set  to  -1. For example, if the string "abc" is
+       matched against the pattern (abc)(x(yz)?)? subpatterns 2 and 3 are  not
+       matched.  The  return  from the function is 2, because the highest used
+       capturing subpattern number is 1, and the offsets for  for  the  second
+       and  third  capturing subpatterns (assuming the vector is large enough,
        of course) are set to -1.
 
-       Note:  Elements  in  the first two-thirds of ovector that do not corre-
-       spond to capturing parentheses in the pattern are never  changed.  That
-       is,  if  a pattern contains n capturing parentheses, no more than ovec-
-       tor[0] to ovector[2n+1] are set by pcre_exec(). The other elements  (in
+       Note: Elements in the first two-thirds of ovector that  do  not  corre-
+       spond  to  capturing parentheses in the pattern are never changed. That
+       is, if a pattern contains n capturing parentheses, no more  than  ovec-
+       tor[0]  to ovector[2n+1] are set by pcre_exec(). The other elements (in
        the first two-thirds) retain whatever values they previously had.
 
-       Some  convenience  functions  are  provided for extracting the captured
+       Some convenience functions are provided  for  extracting  the  captured
        substrings as separate strings. These are described below.
 
    Error return values from pcre_exec()
 
-       If pcre_exec() fails, it returns a negative number. The  following  are
+       If  pcre_exec()  fails, it returns a negative number. The following are
        defined in the header file:
 
          PCRE_ERROR_NOMATCH        (-1)
@@ -3585,7 +3602,7 @@ MATCHING A PATTERN: THE TRADITIONAL FUNCTION
 
          PCRE_ERROR_NULL           (-2)
 
-       Either  code  or  subject  was  passed as NULL, or ovector was NULL and
+       Either code or subject was passed as NULL,  or  ovector  was  NULL  and
        ovecsize was not zero.
 
          PCRE_ERROR_BADOPTION      (-3)
@@ -3594,82 +3611,82 @@ MATCHING A PATTERN: THE TRADITIONAL FUNCTION
 
          PCRE_ERROR_BADMAGIC       (-4)
 
-       PCRE stores a 4-byte "magic number" at the start of the compiled  code,
+       PCRE  stores a 4-byte "magic number" at the start of the compiled code,
        to catch the case when it is passed a junk pointer and to detect when a
        pattern that was compiled in an environment of one endianness is run in
-       an  environment  with the other endianness. This is the error that PCRE
+       an environment with the other endianness. This is the error  that  PCRE
        gives when the magic number is not present.
 
          PCRE_ERROR_UNKNOWN_OPCODE (-5)
 
        While running the pattern match, an unknown item was encountered in the
-       compiled  pattern.  This  error  could be caused by a bug in PCRE or by
+       compiled pattern. This error could be caused by a bug  in  PCRE  or  by
        overwriting of the compiled pattern.
 
          PCRE_ERROR_NOMEMORY       (-6)
 
-       If a pattern contains back references, but the ovector that  is  passed
+       If  a  pattern contains back references, but the ovector that is passed
        to pcre_exec() is not big enough to remember the referenced substrings,
-       PCRE gets a block of memory at the start of matching to  use  for  this
-       purpose.  If the call via pcre_malloc() fails, this error is given. The
+       PCRE  gets  a  block of memory at the start of matching to use for this
+       purpose. If the call via pcre_malloc() fails, this error is given.  The
        memory is automatically freed at the end of matching.
 
-       This error is also given if pcre_stack_malloc() fails  in  pcre_exec().
-       This  can happen only when PCRE has been compiled with --disable-stack-
+       This  error  is also given if pcre_stack_malloc() fails in pcre_exec().
+       This can happen only when PCRE has been compiled with  --disable-stack-
        for-recursion.
 
          PCRE_ERROR_NOSUBSTRING    (-7)
 
-       This error is used by the pcre_copy_substring(),  pcre_get_substring(),
+       This  error is used by the pcre_copy_substring(), pcre_get_substring(),
        and  pcre_get_substring_list()  functions  (see  below).  It  is  never
        returned by pcre_exec().
 
          PCRE_ERROR_MATCHLIMIT     (-8)
 
-       The backtracking limit, as specified by  the  match_limit  field  in  a
-       pcre_extra  structure  (or  defaulted) was reached. See the description
+       The  backtracking  limit,  as  specified  by the match_limit field in a
+       pcre_extra structure (or defaulted) was reached.  See  the  description
        above.
 
          PCRE_ERROR_CALLOUT        (-9)
 
        This error is never generated by pcre_exec() itself. It is provided for
-       use  by  callout functions that want to yield a distinctive error code.
+       use by callout functions that want to yield a distinctive  error  code.
        See the pcrecallout documentation for details.
 
          PCRE_ERROR_BADUTF8        (-10)
 
-       A string that contains an invalid UTF-8 byte sequence was passed  as  a
-       subject,  and the PCRE_NO_UTF8_CHECK option was not set. If the size of
-       the output vector (ovecsize) is at least 2,  the  byte  offset  to  the
-       start  of  the  the invalid UTF-8 character is placed in the first ele-
-       ment, and a reason code is placed in the  second  element.  The  reason
+       A  string  that contains an invalid UTF-8 byte sequence was passed as a
+       subject, and the PCRE_NO_UTF8_CHECK option was not set. If the size  of
+       the  output  vector  (ovecsize)  is  at least 2, the byte offset to the
+       start of the the invalid UTF-8 character is placed in  the  first  ele-
+       ment,  and  a  reason  code is placed in the second element. The reason
        codes are listed in the following section.  For backward compatibility,
-       if PCRE_PARTIAL_HARD is set and the problem is a truncated UTF-8  char-
-       acter   at   the   end   of   the   subject  (reason  codes  1  to  5),
+       if  PCRE_PARTIAL_HARD is set and the problem is a truncated UTF-8 char-
+       acter  at  the  end  of  the   subject   (reason   codes   1   to   5),
        PCRE_ERROR_SHORTUTF8 is returned instead of PCRE_ERROR_BADUTF8.
 
          PCRE_ERROR_BADUTF8_OFFSET (-11)
 
-       The UTF-8 byte sequence that was passed as a subject  was  checked  and
-       found  to be valid (the PCRE_NO_UTF8_CHECK option was not set), but the
-       value of startoffset did not point to the beginning of a UTF-8  charac-
+       The  UTF-8  byte  sequence that was passed as a subject was checked and
+       found to be valid (the PCRE_NO_UTF8_CHECK option was not set), but  the
+       value  of startoffset did not point to the beginning of a UTF-8 charac-
        ter or the end of the subject.
 
          PCRE_ERROR_PARTIAL        (-12)
 
-       The  subject  string did not match, but it did match partially. See the
+       The subject string did not match, but it did match partially.  See  the
        pcrepartial documentation for details of partial matching.
 
          PCRE_ERROR_BADPARTIAL     (-13)
 
-       This code is no longer in  use.  It  was  formerly  returned  when  the
-       PCRE_PARTIAL  option  was used with a compiled pattern containing items
-       that were  not  supported  for  partial  matching.  From  release  8.00
+       This  code  is  no  longer  in  use.  It was formerly returned when the
+       PCRE_PARTIAL option was used with a compiled pattern  containing  items
+       that  were  not  supported  for  partial  matching.  From  release 8.00
        onwards, there are no restrictions on partial matching.
 
          PCRE_ERROR_INTERNAL       (-14)
 
-       An  unexpected  internal error has occurred. This error could be caused
+       An unexpected internal error has occurred. This error could  be  caused
        by a bug in PCRE or by overwriting of the compiled pattern.
 
          PCRE_ERROR_BADCOUNT       (-15)
@@ -3679,7 +3696,7 @@ MATCHING A PATTERN: THE TRADITIONAL FUNCTION
          PCRE_ERROR_RECURSIONLIMIT (-21)
 
        The internal recursion limit, as specified by the match_limit_recursion
-       field  in  a  pcre_extra  structure (or defaulted) was reached. See the
+       field in a pcre_extra structure (or defaulted)  was  reached.  See  the
        description above.
 
          PCRE_ERROR_BADNEWLINE     (-23)
@@ -3693,29 +3710,29 @@ MATCHING A PATTERN: THE TRADITIONAL FUNCTION
 
          PCRE_ERROR_SHORTUTF8      (-25)
 
-       This  error  is returned instead of PCRE_ERROR_BADUTF8 when the subject
-       string ends with a truncated UTF-8 character and the  PCRE_PARTIAL_HARD
-       option  is  set.   Information  about  the  failure  is returned as for
-       PCRE_ERROR_BADUTF8. It is in fact sufficient to detect this  case,  but
-       this  special error code for PCRE_PARTIAL_HARD precedes the implementa-
-       tion of returned information; it is retained for backwards  compatibil-
+       This error is returned instead of PCRE_ERROR_BADUTF8 when  the  subject
+       string  ends with a truncated UTF-8 character and the PCRE_PARTIAL_HARD
+       option is set.  Information  about  the  failure  is  returned  as  for
+       PCRE_ERROR_BADUTF8.  It  is in fact sufficient to detect this case, but
+       this special error code for PCRE_PARTIAL_HARD precedes the  implementa-
+       tion  of returned information; it is retained for backwards compatibil-
        ity.
 
          PCRE_ERROR_RECURSELOOP    (-26)
 
        This error is returned when pcre_exec() detects a recursion loop within
-       the pattern. Specifically, it means that either the whole pattern or  a
-       subpattern  has been called recursively for the second time at the same
+       the  pattern. Specifically, it means that either the whole pattern or a
+       subpattern has been called recursively for the second time at the  same
        position in the subject string. Some simple patterns that might do this
-       are  detected  and faulted at compile time, but more complicated cases,
+       are detected and faulted at compile time, but more  complicated  cases,
        in particular mutual recursions between two different subpatterns, can-
        not be detected until run time.
 
          PCRE_ERROR_JIT_STACKLIMIT (-27)
 
-       This  error  is  returned  when a pattern that was successfully studied
-       using a JIT compile option is being matched, but the  memory  available
-       for  the  just-in-time  processing  stack  is not large enough. See the
+       This error is returned when a pattern  that  was  successfully  studied
+       using  a  JIT compile option is being matched, but the memory available
+       for the just-in-time processing stack is  not  large  enough.  See  the
        pcrejit documentation for more details.
 
          PCRE_ERROR_BADMODE        (-28)
@@ -3725,38 +3742,38 @@ MATCHING A PATTERN: THE TRADITIONAL FUNCTION
 
          PCRE_ERROR_BADENDIANNESS  (-29)
 
-       This  error  is  given  if  a  pattern  that  was compiled and saved is
-       reloaded on a host with  different  endianness.  The  utility  function
+       This error is given if  a  pattern  that  was  compiled  and  saved  is
+       reloaded  on  a  host  with  different endianness. The utility function
        pcre_pattern_to_host_byte_order() can be used to convert such a pattern
        so that it runs on the new host.
 
          PCRE_ERROR_JIT_BADOPTION
 
-       This error is returned when a pattern  that  was  successfully  studied
-       using  a  JIT  compile  option  is being matched, but the matching mode
-       (partial or complete match) does not correspond to any JIT  compilation
-       mode.  When  the JIT fast path function is used, this error may be also
-       given for invalid options.  See  the  pcrejit  documentation  for  more
+       This  error  is  returned  when a pattern that was successfully studied
+       using a JIT compile option is being  matched,  but  the  matching  mode
+       (partial  or complete match) does not correspond to any JIT compilation
+       mode. When the JIT fast path function is used, this error may  be  also
+       given  for  invalid  options.  See  the  pcrejit documentation for more
        details.
 
          PCRE_ERROR_BADLENGTH      (-32)
 
-       This  error is given if pcre_exec() is called with a negative value for
+       This error is given if pcre_exec() is called with a negative value  for
        the length argument.
 
        Error numbers -16 to -20, -22, and 30 are not used by pcre_exec().
 
    Reason codes for invalid UTF-8 strings
 
-       This section applies only  to  the  8-bit  library.  The  corresponding
-       information  for the 16-bit and 32-bit libraries is given in the pcre16
+       This  section  applies  only  to  the  8-bit library. The corresponding
+       information for the 16-bit and 32-bit libraries is given in the  pcre16
        and pcre32 pages.
 
        When pcre_exec() returns either PCRE_ERROR_BADUTF8 or PCRE_ERROR_SHORT-
-       UTF8,  and  the size of the output vector (ovecsize) is at least 2, the
-       offset of the start of the invalid UTF-8 character  is  placed  in  the
+       UTF8, and the size of the output vector (ovecsize) is at least  2,  the
+       offset  of  the  start  of the invalid UTF-8 character is placed in the
        first output vector element (ovector[0]) and a reason code is placed in
-       the second element (ovector[1]). The reason codes are  given  names  in
+       the  second  element  (ovector[1]). The reason codes are given names in
        the pcre.h header file:
 
          PCRE_UTF8_ERR1
@@ -3765,10 +3782,10 @@ MATCHING A PATTERN: THE TRADITIONAL FUNCTION
          PCRE_UTF8_ERR4
          PCRE_UTF8_ERR5
 
-       The  string  ends  with a truncated UTF-8 character; the code specifies
-       how many bytes are missing (1 to 5). Although RFC 3629 restricts  UTF-8
-       characters  to  be  no longer than 4 bytes, the encoding scheme (origi-
-       nally defined by RFC 2279) allows for  up  to  6  bytes,  and  this  is
+       The string ends with a truncated UTF-8 character;  the  code  specifies
+       how  many bytes are missing (1 to 5). Although RFC 3629 restricts UTF-8
+       characters to be no longer than 4 bytes, the  encoding  scheme  (origi-
+       nally  defined  by  RFC  2279)  allows  for  up to 6 bytes, and this is
        checked first; hence the possibility of 4 or 5 missing bytes.
 
          PCRE_UTF8_ERR6
@@ -3778,24 +3795,24 @@ MATCHING A PATTERN: THE TRADITIONAL FUNCTION
          PCRE_UTF8_ERR10
 
        The two most significant bits of the 2nd, 3rd, 4th, 5th, or 6th byte of
-       the character do not have the binary value 0b10 (that  is,  either  the
+       the  character  do  not have the binary value 0b10 (that is, either the
        most significant bit is 0, or the next bit is 1).
 
          PCRE_UTF8_ERR11
          PCRE_UTF8_ERR12
 
-       A  character that is valid by the RFC 2279 rules is either 5 or 6 bytes
+       A character that is valid by the RFC 2279 rules is either 5 or 6  bytes
        long; these code points are excluded by RFC 3629.
 
          PCRE_UTF8_ERR13
 
-       A 4-byte character has a value greater than 0x10fff; these code  points
+       A  4-byte character has a value greater than 0x10fff; these code points
        are excluded by RFC 3629.
 
          PCRE_UTF8_ERR14
 
-       A  3-byte  character  has  a  value in the range 0xd800 to 0xdfff; this
-       range of code points are reserved by RFC 3629 for use with UTF-16,  and
+       A 3-byte character has a value in the  range  0xd800  to  0xdfff;  this
+       range  of code points are reserved by RFC 3629 for use with UTF-16, and
        so are excluded from UTF-8.
 
          PCRE_UTF8_ERR15
@@ -3804,28 +3821,28 @@ MATCHING A PATTERN: THE TRADITIONAL FUNCTION
          PCRE_UTF8_ERR18
          PCRE_UTF8_ERR19
 
-       A  2-, 3-, 4-, 5-, or 6-byte character is "overlong", that is, it codes
-       for a value that can be represented by fewer bytes, which  is  invalid.
-       For  example,  the two bytes 0xc0, 0xae give the value 0x2e, whose cor-
+       A 2-, 3-, 4-, 5-, or 6-byte character is "overlong", that is, it  codes
+       for  a  value that can be represented by fewer bytes, which is invalid.
+       For example, the two bytes 0xc0, 0xae give the value 0x2e,  whose  cor-
        rect coding uses just one byte.
 
          PCRE_UTF8_ERR20
 
        The two most significant bits of the first byte of a character have the
-       binary  value 0b10 (that is, the most significant bit is 1 and the sec-
-       ond is 0). Such a byte can only validly occur as the second  or  subse-
+       binary value 0b10 (that is, the most significant bit is 1 and the  sec-
+       ond  is  0). Such a byte can only validly occur as the second or subse-
        quent byte of a multi-byte character.
 
          PCRE_UTF8_ERR21
 
-       The  first byte of a character has the value 0xfe or 0xff. These values
+       The first byte of a character has the value 0xfe or 0xff. These  values
        can never occur in a valid UTF-8 string.
 
          PCRE_UTF8_ERR22
 
-       This error code was formerly used when  the  presence  of  a  so-called
-       "non-character"  caused an error. Unicode corrigendum #9 makes it clear
-       that such characters should not cause a string to be rejected,  and  so
+       This  error  code  was  formerly  used when the presence of a so-called
+       "non-character" caused an error. Unicode corrigendum #9 makes it  clear
+       that  such  characters should not cause a string to be rejected, and so
        this code is no longer in use and is never returned.
 
 
@@ -3842,78 +3859,78 @@ EXTRACTING CAPTURED SUBSTRINGS BY NUMBER
        int pcre_get_substring_list(const char *subject,
             int *ovector, int stringcount, const char ***listptr);
 
-       Captured  substrings  can  be  accessed  directly  by using the offsets
-       returned by pcre_exec() in  ovector.  For  convenience,  the  functions
+       Captured substrings can be  accessed  directly  by  using  the  offsets
+       returned  by  pcre_exec()  in  ovector.  For convenience, the functions
        pcre_copy_substring(),    pcre_get_substring(),    and    pcre_get_sub-
-       string_list() are provided for extracting captured substrings  as  new,
-       separate,  zero-terminated strings. These functions identify substrings
-       by number. The next section describes functions  for  extracting  named
+       string_list()  are  provided for extracting captured substrings as new,
+       separate, zero-terminated strings. These functions identify  substrings
+       by  number.  The  next section describes functions for extracting named
        substrings.
 
-       A  substring that contains a binary zero is correctly extracted and has
-       a further zero added on the end, but the result is not, of course, a  C
-       string.   However,  you  can  process such a string by referring to the
-       length that is  returned  by  pcre_copy_substring()  and  pcre_get_sub-
+       A substring that contains a binary zero is correctly extracted and  has
+       a  further zero added on the end, but the result is not, of course, a C
+       string.  However, you can process such a string  by  referring  to  the
+       length  that  is  returned  by  pcre_copy_substring() and pcre_get_sub-
        string().  Unfortunately, the interface to pcre_get_substring_list() is
-       not adequate for handling strings containing binary zeros, because  the
+       not  adequate for handling strings containing binary zeros, because the
        end of the final string is not independently indicated.
 
-       The  first  three  arguments  are the same for all three of these func-
-       tions: subject is the subject string that has  just  been  successfully
+       The first three arguments are the same for all  three  of  these  func-
+       tions:  subject  is  the subject string that has just been successfully
        matched, ovector is a pointer to the vector of integer offsets that was
        passed to pcre_exec(), and stringcount is the number of substrings that
-       were  captured  by  the match, including the substring that matched the
+       were captured by the match, including the substring  that  matched  the
        entire regular expression. This is the value returned by pcre_exec() if
-       it  is greater than zero. If pcre_exec() returned zero, indicating that
-       it ran out of space in ovector, the value passed as stringcount  should
+       it is greater than zero. If pcre_exec() returned zero, indicating  that
+       it  ran out of space in ovector, the value passed as stringcount should
        be the number of elements in the vector divided by three.
 
-       The  functions pcre_copy_substring() and pcre_get_substring() extract a
-       single substring, whose number is given as  stringnumber.  A  value  of
-       zero  extracts  the  substring that matched the entire pattern, whereas
-       higher values  extract  the  captured  substrings.  For  pcre_copy_sub-
-       string(),  the  string  is  placed  in buffer, whose length is given by
-       buffersize, while for pcre_get_substring() a new  block  of  memory  is
-       obtained  via  pcre_malloc,  and its address is returned via stringptr.
-       The yield of the function is the length of the  string,  not  including
+       The functions pcre_copy_substring() and pcre_get_substring() extract  a
+       single  substring,  whose  number  is given as stringnumber. A value of
+       zero extracts the substring that matched the  entire  pattern,  whereas
+       higher  values  extract  the  captured  substrings.  For pcre_copy_sub-
+       string(), the string is placed in buffer,  whose  length  is  given  by
+       buffersize,  while  for  pcre_get_substring()  a new block of memory is
+       obtained via pcre_malloc, and its address is  returned  via  stringptr.
+       The  yield  of  the function is the length of the string, not including
        the terminating zero, or one of these error codes:
 
          PCRE_ERROR_NOMEMORY       (-6)
 
-       The  buffer  was too small for pcre_copy_substring(), or the attempt to
+       The buffer was too small for pcre_copy_substring(), or the  attempt  to
        get memory failed for pcre_get_substring().
 
          PCRE_ERROR_NOSUBSTRING    (-7)
 
        There is no substring whose number is stringnumber.
 
-       The pcre_get_substring_list()  function  extracts  all  available  sub-
-       strings  and  builds  a list of pointers to them. All this is done in a
+       The  pcre_get_substring_list()  function  extracts  all  available sub-
+       strings and builds a list of pointers to them. All this is  done  in  a
        single block of memory that is obtained via pcre_malloc. The address of
-       the  memory  block  is returned via listptr, which is also the start of
-       the list of string pointers. The end of the list is marked  by  a  NULL
-       pointer.  The  yield  of  the function is zero if all went well, or the
+       the memory block is returned via listptr, which is also  the  start  of
+       the  list  of  string pointers. The end of the list is marked by a NULL
+       pointer. The yield of the function is zero if all  went  well,  or  the
        error code
 
          PCRE_ERROR_NOMEMORY       (-6)
 
        if the attempt to get the memory block failed.
 
-       When any of these functions encounter a substring that is unset,  which
-       can  happen  when  capturing subpattern number n+1 matches some part of
-       the subject, but subpattern n has not been used at all, they return  an
+       When  any of these functions encounter a substring that is unset, which
+       can happen when capturing subpattern number n+1 matches  some  part  of
+       the  subject, but subpattern n has not been used at all, they return an
        empty string. This can be distinguished from a genuine zero-length sub-
-       string by inspecting the appropriate offset in ovector, which is  nega-
+       string  by inspecting the appropriate offset in ovector, which is nega-
        tive for unset substrings.
 
-       The  two convenience functions pcre_free_substring() and pcre_free_sub-
-       string_list() can be used to free the memory  returned  by  a  previous
+       The two convenience functions pcre_free_substring() and  pcre_free_sub-
+       string_list()  can  be  used  to free the memory returned by a previous
        call  of  pcre_get_substring()  or  pcre_get_substring_list(),  respec-
-       tively. They do nothing more than  call  the  function  pointed  to  by
-       pcre_free,  which  of course could be called directly from a C program.
-       However, PCRE is used in some situations where it is linked via a  spe-
-       cial   interface  to  another  programming  language  that  cannot  use
-       pcre_free directly; it is for these cases that the functions  are  pro-
+       tively.  They  do  nothing  more  than  call the function pointed to by
+       pcre_free, which of course could be called directly from a  C  program.
+       However,  PCRE is used in some situations where it is linked via a spe-
+       cial  interface  to  another  programming  language  that  cannot   use
+       pcre_free  directly;  it is for these cases that the functions are pro-
        vided.
 
 
@@ -3932,7 +3949,7 @@ EXTRACTING CAPTURED SUBSTRINGS BY NAME
             int stringcount, const char *stringname,
             const char **stringptr);
 
-       To  extract a substring by name, you first have to find associated num-
+       To extract a substring by name, you first have to find associated  num-
        ber.  For example, for this pattern
 
          (a+)b(?<xxx>\d+)...
@@ -3941,35 +3958,35 @@ EXTRACTING CAPTURED SUBSTRINGS BY NAME
        be unique (PCRE_DUPNAMES was not set), you can find the number from the
        name by calling pcre_get_stringnumber(). The first argument is the com-
        piled pattern, and the second is the name. The yield of the function is
-       the subpattern number, or PCRE_ERROR_NOSUBSTRING (-7) if  there  is  no
+       the  subpattern  number,  or PCRE_ERROR_NOSUBSTRING (-7) if there is no
        subpattern of that name.
 
        Given the number, you can extract the substring directly, or use one of
        the functions described in the previous section. For convenience, there
        are also two functions that do the whole job.
 
-       Most    of    the    arguments   of   pcre_copy_named_substring()   and
-       pcre_get_named_substring() are the same  as  those  for  the  similarly
-       named  functions  that extract by number. As these are described in the
-       previous section, they are not re-described here. There  are  just  two
+       Most   of   the   arguments    of    pcre_copy_named_substring()    and
+       pcre_get_named_substring()  are  the  same  as  those for the similarly
+       named functions that extract by number. As these are described  in  the
+       previous  section,  they  are not re-described here. There are just two
        differences:
 
-       First,  instead  of a substring number, a substring name is given. Sec-
+       First, instead of a substring number, a substring name is  given.  Sec-
        ond, there is an extra argument, given at the start, which is a pointer
-       to  the compiled pattern. This is needed in order to gain access to the
+       to the compiled pattern. This is needed in order to gain access to  the
        name-to-number translation table.
 
-       These functions call pcre_get_stringnumber(), and if it succeeds,  they
-       then  call  pcre_copy_substring() or pcre_get_substring(), as appropri-
-       ate. NOTE: If PCRE_DUPNAMES is set and there are duplicate  names,  the
+       These  functions call pcre_get_stringnumber(), and if it succeeds, they
+       then call pcre_copy_substring() or pcre_get_substring(),  as  appropri-
+       ate.  NOTE:  If PCRE_DUPNAMES is set and there are duplicate names, the
        behaviour may not be what you want (see the next section).
 
        Warning: If the pattern uses the (?| feature to set up multiple subpat-
-       terns with the same number, as described in the  section  on  duplicate
-       subpattern  numbers  in  the  pcrepattern page, you cannot use names to
-       distinguish the different subpatterns, because names are  not  included
-       in  the compiled code. The matching process uses only numbers. For this
-       reason, the use of different names for subpatterns of the  same  number
+       terns  with  the  same number, as described in the section on duplicate
+       subpattern numbers in the pcrepattern page, you  cannot  use  names  to
+       distinguish  the  different subpatterns, because names are not included
+       in the compiled code. The matching process uses only numbers. For  this
+       reason,  the  use of different names for subpatterns of the same number
        causes an error at compile time.
 
 
@@ -3978,76 +3995,76 @@ DUPLICATE SUBPATTERN NAMES
        int pcre_get_stringtable_entries(const pcre *code,
             const char *name, char **first, char **last);
 
-       When  a  pattern  is  compiled with the PCRE_DUPNAMES option, names for
-       subpatterns are not required to be unique. (Duplicate names are  always
-       allowed  for subpatterns with the same number, created by using the (?|
-       feature. Indeed, if such subpatterns are named, they  are  required  to
+       When a pattern is compiled with the  PCRE_DUPNAMES  option,  names  for
+       subpatterns  are not required to be unique. (Duplicate names are always
+       allowed for subpatterns with the same number, created by using the  (?|
+       feature.  Indeed,  if  such subpatterns are named, they are required to
        use the same names.)
 
        Normally, patterns with duplicate names are such that in any one match,
-       only one of the named subpatterns participates. An example is shown  in
+       only  one of the named subpatterns participates. An example is shown in
        the pcrepattern documentation.
 
-       When    duplicates   are   present,   pcre_copy_named_substring()   and
-       pcre_get_named_substring() return the first substring corresponding  to
-       the  given  name  that  is set. If none are set, PCRE_ERROR_NOSUBSTRING
-       (-7) is returned; no  data  is  returned.  The  pcre_get_stringnumber()
-       function  returns one of the numbers that are associated with the name,
+       When   duplicates   are   present,   pcre_copy_named_substring()    and
+       pcre_get_named_substring()  return the first substring corresponding to
+       the given name that is set. If  none  are  set,  PCRE_ERROR_NOSUBSTRING
+       (-7)  is  returned;  no  data  is returned. The pcre_get_stringnumber()
+       function returns one of the numbers that are associated with the  name,
        but it is not defined which it is.
 
-       If you want to get full details of all captured substrings for a  given
-       name,  you  must  use  the pcre_get_stringtable_entries() function. The
+       If  you want to get full details of all captured substrings for a given
+       name, you must use  the  pcre_get_stringtable_entries()  function.  The
        first argument is the compiled pattern, and the second is the name. The
-       third  and  fourth  are  pointers to variables which are updated by the
+       third and fourth are pointers to variables which  are  updated  by  the
        function. After it has run, they point to the first and last entries in
-       the  name-to-number  table  for  the  given  name.  The function itself
-       returns the length of each entry,  or  PCRE_ERROR_NOSUBSTRING  (-7)  if
-       there  are none. The format of the table is described above in the sec-
-       tion entitled Information about a pattern above.  Given all  the  rele-
-       vant  entries  for the name, you can extract each of their numbers, and
+       the name-to-number table  for  the  given  name.  The  function  itself
+       returns  the  length  of  each entry, or PCRE_ERROR_NOSUBSTRING (-7) if
+       there are none. The format of the table is described above in the  sec-
+       tion  entitled  Information about a pattern above.  Given all the rele-
+       vant entries for the name, you can extract each of their  numbers,  and
        hence the captured data, if any.
 
 
 FINDING ALL POSSIBLE MATCHES
 
-       The traditional matching function uses a  similar  algorithm  to  Perl,
+       The  traditional  matching  function  uses a similar algorithm to Perl,
        which stops when it finds the first match, starting at a given point in
-       the subject. If you want to find all possible matches, or  the  longest
-       possible  match,  consider using the alternative matching function (see
-       below) instead. If you cannot use the alternative function,  but  still
-       need  to  find all possible matches, you can kludge it up by making use
+       the  subject.  If you want to find all possible matches, or the longest
+       possible match, consider using the alternative matching  function  (see
+       below)  instead.  If you cannot use the alternative function, but still
+       need to find all possible matches, you can kludge it up by  making  use
        of the callout facility, which is described in the pcrecallout documen-
        tation.
 
        What you have to do is to insert a callout right at the end of the pat-
-       tern.  When your callout function is called, extract and save the  cur-
-       rent  matched  substring.  Then  return  1, which forces pcre_exec() to
-       backtrack and try other alternatives. Ultimately, when it runs  out  of
+       tern.   When your callout function is called, extract and save the cur-
+       rent matched substring. Then return  1,  which  forces  pcre_exec()  to
+       backtrack  and  try other alternatives. Ultimately, when it runs out of
        matches, pcre_exec() will yield PCRE_ERROR_NOMATCH.
 
 
 OBTAINING AN ESTIMATE OF STACK USAGE
 
-       Matching  certain  patterns  using pcre_exec() can use a lot of process
-       stack, which in certain environments can be  rather  limited  in  size.
-       Some  users  find it helpful to have an estimate of the amount of stack
-       that is used by pcre_exec(), to help  them  set  recursion  limits,  as
-       described  in  the pcrestack documentation. The estimate that is output
+       Matching certain patterns using pcre_exec() can use a  lot  of  process
+       stack,  which  in  certain  environments can be rather limited in size.
+       Some users find it helpful to have an estimate of the amount  of  stack
+       that  is  used  by  pcre_exec(),  to help them set recursion limits, as
+       described in the pcrestack documentation. The estimate that  is  output
        by pcretest when called with the -m and -C options is obtained by call-
-       ing  pcre_exec with the values NULL, NULL, NULL, -999, and -999 for its
+       ing pcre_exec with the values NULL, NULL, NULL, -999, and -999 for  its
        first five arguments.
 
-       Normally, if  its  first  argument  is  NULL,  pcre_exec()  immediately
-       returns  the negative error code PCRE_ERROR_NULL, but with this special
-       combination of arguments, it returns instead a  negative  number  whose
-       absolute  value  is the approximate stack frame size in bytes. (A nega-
-       tive number is used so that it is clear that no  match  has  happened.)
-       The  value  is  approximate  because  in some cases, recursive calls to
+       Normally,  if  its  first  argument  is  NULL,  pcre_exec() immediately
+       returns the negative error code PCRE_ERROR_NULL, but with this  special
+       combination  of  arguments,  it returns instead a negative number whose
+       absolute value is the approximate stack frame size in bytes.  (A  nega-
+       tive  number  is  used so that it is clear that no match has happened.)
+       The value is approximate because in  some  cases,  recursive  calls  to
        pcre_exec() occur when there are one or two additional variables on the
        stack.
 
-       If  PCRE  has  been  compiled  to use the heap instead of the stack for
-       recursion, the value returned  is  the  size  of  each  block  that  is
+       If PCRE has been compiled to use the heap  instead  of  the  stack  for
+       recursion,  the  value  returned  is  the  size  of  each block that is
        obtained from the heap.
 
 
@@ -4058,26 +4075,26 @@ MATCHING A PATTERN: THE ALTERNATIVE FUNCTION
             int options, int *ovector, int ovecsize,
             int *workspace, int wscount);
 
-       The  function  pcre_dfa_exec()  is  called  to  match  a subject string
-       against a compiled pattern, using a matching algorithm that  scans  the
-       subject  string  just  once, and does not backtrack. This has different
-       characteristics to the normal algorithm, and  is  not  compatible  with
-       Perl.  Some  of the features of PCRE patterns are not supported. Never-
-       theless, there are times when this kind of matching can be useful.  For
-       a  discussion  of  the  two matching algorithms, and a list of features
-       that pcre_dfa_exec() does not support, see the pcrematching  documenta-
+       The function pcre_dfa_exec()  is  called  to  match  a  subject  string
+       against  a  compiled pattern, using a matching algorithm that scans the
+       subject string just once, and does not backtrack.  This  has  different
+       characteristics  to  the  normal  algorithm, and is not compatible with
+       Perl. Some of the features of PCRE patterns are not  supported.  Never-
+       theless,  there are times when this kind of matching can be useful. For
+       a discussion of the two matching algorithms, and  a  list  of  features
+       that  pcre_dfa_exec() does not support, see the pcrematching documenta-
        tion.
 
-       The  arguments  for  the  pcre_dfa_exec()  function are the same as for
+       The arguments for the pcre_dfa_exec() function  are  the  same  as  for
        pcre_exec(), plus two extras. The ovector argument is used in a differ-
-       ent  way,  and  this is described below. The other common arguments are
-       used in the same way as for pcre_exec(), so their  description  is  not
+       ent way, and this is described below. The other  common  arguments  are
+       used  in  the  same way as for pcre_exec(), so their description is not
        repeated here.
 
-       The  two  additional  arguments provide workspace for the function. The
-       workspace vector should contain at least 20 elements. It  is  used  for
+       The two additional arguments provide workspace for  the  function.  The
+       workspace  vector  should  contain at least 20 elements. It is used for
        keeping  track  of  multiple  paths  through  the  pattern  tree.  More
-       workspace will be needed for patterns and subjects where  there  are  a
+       workspace  will  be  needed for patterns and subjects where there are a
        lot of potential matches.
 
        Here is an example of a simple call to pcre_dfa_exec():
@@ -4099,55 +4116,55 @@ MATCHING A PATTERN: THE ALTERNATIVE FUNCTION
 
    Option bits for pcre_dfa_exec()
 
-       The  unused  bits  of  the options argument for pcre_dfa_exec() must be
-       zero. The only bits  that  may  be  set  are  PCRE_ANCHORED,  PCRE_NEW-
+       The unused bits of the options argument  for  pcre_dfa_exec()  must  be
+       zero.  The  only  bits  that  may  be  set are PCRE_ANCHORED, PCRE_NEW-
        LINE_xxx,        PCRE_NOTBOL,        PCRE_NOTEOL,        PCRE_NOTEMPTY,
-       PCRE_NOTEMPTY_ATSTART,      PCRE_NO_UTF8_CHECK,       PCRE_BSR_ANYCRLF,
-       PCRE_BSR_UNICODE,  PCRE_NO_START_OPTIMIZE, PCRE_PARTIAL_HARD, PCRE_PAR-
-       TIAL_SOFT, PCRE_DFA_SHORTEST, and PCRE_DFA_RESTART.  All but  the  last
-       four  of  these  are  exactly  the  same  as  for pcre_exec(), so their
+       PCRE_NOTEMPTY_ATSTART,       PCRE_NO_UTF8_CHECK,      PCRE_BSR_ANYCRLF,
+       PCRE_BSR_UNICODE, PCRE_NO_START_OPTIMIZE, PCRE_PARTIAL_HARD,  PCRE_PAR-
+       TIAL_SOFT,  PCRE_DFA_SHORTEST,  and PCRE_DFA_RESTART.  All but the last
+       four of these are  exactly  the  same  as  for  pcre_exec(),  so  their
        description is not repeated here.
 
          PCRE_PARTIAL_HARD
          PCRE_PARTIAL_SOFT
 
-       These have the same general effect as they do for pcre_exec(), but  the
-       details  are  slightly  different.  When  PCRE_PARTIAL_HARD  is set for
-       pcre_dfa_exec(), it returns PCRE_ERROR_PARTIAL if the end of  the  sub-
-       ject  is  reached  and there is still at least one matching possibility
+       These  have the same general effect as they do for pcre_exec(), but the
+       details are slightly  different.  When  PCRE_PARTIAL_HARD  is  set  for
+       pcre_dfa_exec(),  it  returns PCRE_ERROR_PARTIAL if the end of the sub-
+       ject is reached and there is still at least  one  matching  possibility
        that requires additional characters. This happens even if some complete
        matches have also been found. When PCRE_PARTIAL_SOFT is set, the return
        code PCRE_ERROR_NOMATCH is converted into PCRE_ERROR_PARTIAL if the end
-       of  the  subject  is  reached, there have been no complete matches, but
-       there is still at least one matching possibility. The  portion  of  the
-       string  that  was inspected when the longest partial match was found is
-       set as the first matching string  in  both  cases.   There  is  a  more
-       detailed  discussion  of partial and multi-segment matching, with exam-
+       of the subject is reached, there have been  no  complete  matches,  but
+       there  is  still  at least one matching possibility. The portion of the
+       string that was inspected when the longest partial match was  found  is
+       set  as  the  first  matching  string  in  both cases.  There is a more
+       detailed discussion of partial and multi-segment matching,  with  exam-
        ples, in the pcrepartial documentation.
 
          PCRE_DFA_SHORTEST
 
-       Setting the PCRE_DFA_SHORTEST option causes the matching  algorithm  to
+       Setting  the  PCRE_DFA_SHORTEST option causes the matching algorithm to
        stop as soon as it has found one match. Because of the way the alterna-
-       tive algorithm works, this is necessarily the shortest  possible  match
+       tive  algorithm  works, this is necessarily the shortest possible match
        at the first possible matching point in the subject string.
 
          PCRE_DFA_RESTART
 
        When pcre_dfa_exec() returns a partial match, it is possible to call it
-       again, with additional subject characters, and have  it  continue  with
-       the  same match. The PCRE_DFA_RESTART option requests this action; when
-       it is set, the workspace and wscount options must  reference  the  same
-       vector  as  before  because data about the match so far is left in them
+       again,  with  additional  subject characters, and have it continue with
+       the same match. The PCRE_DFA_RESTART option requests this action;  when
+       it  is  set,  the workspace and wscount options must reference the same
+       vector as before because data about the match so far is  left  in  them
        after a partial match. There is more discussion of this facility in the
        pcrepartial documentation.
 
    Successful returns from pcre_dfa_exec()
 
-       When  pcre_dfa_exec()  succeeds, it may have matched more than one sub-
+       When pcre_dfa_exec() succeeds, it may have matched more than  one  sub-
        string in the subject. Note, however, that all the matches from one run
-       of  the  function  start  at the same point in the subject. The shorter
-       matches are all initial substrings of the longer matches. For  example,
+       of the function start at the same point in  the  subject.  The  shorter
+       matches  are all initial substrings of the longer matches. For example,
        if the pattern
 
          <.*>
@@ -4162,79 +4179,79 @@ MATCHING A PATTERN: THE ALTERNATIVE FUNCTION
          <something> <something else>
          <something> <something else> <something further>
 
-       On  success,  the  yield of the function is a number greater than zero,
-       which is the number of matched substrings.  The  substrings  themselves
-       are  returned  in  ovector. Each string uses two elements; the first is
-       the offset to the start, and the second is the offset to  the  end.  In
-       fact,  all  the  strings  have the same start offset. (Space could have
-       been saved by giving this only once, but it was decided to retain  some
-       compatibility  with  the  way pcre_exec() returns data, even though the
+       On success, the yield of the function is a number  greater  than  zero,
+       which  is  the  number of matched substrings. The substrings themselves
+       are returned in ovector. Each string uses two elements;  the  first  is
+       the  offset  to  the start, and the second is the offset to the end. In
+       fact, all the strings have the same start  offset.  (Space  could  have
+       been  saved by giving this only once, but it was decided to retain some
+       compatibility with the way pcre_exec() returns data,  even  though  the
        meaning of the strings is different.)
 
        The strings are returned in reverse order of length; that is, the long-
-       est  matching  string is given first. If there were too many matches to
-       fit into ovector, the yield of the function is zero, and the vector  is
-       filled  with  the  longest matches. Unlike pcre_exec(), pcre_dfa_exec()
+       est matching string is given first. If there were too many  matches  to
+       fit  into ovector, the yield of the function is zero, and the vector is
+       filled with the longest matches.  Unlike  pcre_exec(),  pcre_dfa_exec()
        can use the entire ovector for returning matched strings.
 
-       NOTE: PCRE's "auto-possessification" optimization  usually  applies  to
-       character  repeats at the end of a pattern (as well as internally). For
-       example, the pattern "a\d+" is compiled as if it were  "a\d++"  because
+       NOTE:  PCRE's  "auto-possessification"  optimization usually applies to
+       character repeats at the end of a pattern (as well as internally).  For
+       example,  the  pattern "a\d+" is compiled as if it were "a\d++" because
        there is no point even considering the possibility of backtracking into
-       the repeated digits. For DFA matching, this means that only one  possi-
-       ble  match  is  found.  If  you really do want multiple matches in such
-       cases,  either  use  an  ungreedy   repeat   ("a\d+?")   or   set   the
+       the  repeated digits. For DFA matching, this means that only one possi-
+       ble match is found. If you really do  want  multiple  matches  in  such
+       cases,   either   use   an   ungreedy   repeat  ("a\d+?")  or  set  the
        PCRE_NO_AUTO_POSSESS option when compiling.
 
    Error returns from pcre_dfa_exec()
 
-       The  pcre_dfa_exec()  function returns a negative number when it fails.
-       Many of the errors are the same  as  for  pcre_exec(),  and  these  are
-       described  above.   There are in addition the following errors that are
+       The pcre_dfa_exec() function returns a negative number when  it  fails.
+       Many  of  the  errors  are  the  same as for pcre_exec(), and these are
+       described above.  There are in addition the following errors  that  are
        specific to pcre_dfa_exec():
 
          PCRE_ERROR_DFA_UITEM      (-16)
 
-       This return is given if pcre_dfa_exec() encounters an item in the  pat-
-       tern  that  it  does not support, for instance, the use of \C or a back
+       This  return is given if pcre_dfa_exec() encounters an item in the pat-
+       tern that it does not support, for instance, the use of \C  or  a  back
        reference.
 
          PCRE_ERROR_DFA_UCOND      (-17)
 
-       This return is given if pcre_dfa_exec()  encounters  a  condition  item
-       that  uses  a back reference for the condition, or a test for recursion
+       This  return  is  given  if pcre_dfa_exec() encounters a condition item
+       that uses a back reference for the condition, or a test  for  recursion
        in a specific group. These are not supported.
 
          PCRE_ERROR_DFA_UMLIMIT    (-18)
 
-       This return is given if pcre_dfa_exec() is called with an  extra  block
-       that  contains  a  setting  of the match_limit or match_limit_recursion
-       fields. This is not supported (these fields  are  meaningless  for  DFA
+       This  return  is given if pcre_dfa_exec() is called with an extra block
+       that contains a setting of  the  match_limit  or  match_limit_recursion
+       fields.  This  is  not  supported (these fields are meaningless for DFA
        matching).
 
          PCRE_ERROR_DFA_WSSIZE     (-19)
 
-       This  return  is  given  if  pcre_dfa_exec()  runs  out of space in the
+       This return is given if  pcre_dfa_exec()  runs  out  of  space  in  the
        workspace vector.
 
          PCRE_ERROR_DFA_RECURSE    (-20)
 
-       When a recursive subpattern is processed, the matching  function  calls
-       itself  recursively,  using  private vectors for ovector and workspace.
-       This error is given if the output vector  is  not  large  enough.  This
+       When  a  recursive subpattern is processed, the matching function calls
+       itself recursively, using private vectors for  ovector  and  workspace.
+       This  error  is  given  if  the output vector is not large enough. This
        should be extremely rare, as a vector of size 1000 is used.
 
          PCRE_ERROR_DFA_BADRESTART (-30)
 
-       When  pcre_dfa_exec()  is called with the PCRE_DFA_RESTART option, some
-       plausibility checks are made on the contents of  the  workspace,  which
-       should  contain  data about the previous partial match. If any of these
+       When pcre_dfa_exec() is called with the PCRE_DFA_RESTART  option,  some
+       plausibility  checks  are  made on the contents of the workspace, which
+       should contain data about the previous partial match. If any  of  these
        checks fail, this error is given.
 
 
 SEE ALSO
 
-       pcre16(3),  pcre32(3),  pcrebuild(3),  pcrecallout(3),   pcrecpp(3)(3),
+       pcre16(3),   pcre32(3),  pcrebuild(3),  pcrecallout(3),  pcrecpp(3)(3),
        pcrematching(3), pcrepartial(3), pcreposix(3), pcreprecompile(3), pcre-
        sample(3), pcrestack(3).
 
@@ -4248,8 +4265,8 @@ AUTHOR
 
 REVISION
 
-       Last updated: 12 November 2013
-       Copyright (c) 1997-2013 University of Cambridge.
+       Last updated: 09 February 2014
+       Copyright (c) 1997-2014 University of Cambridge.
 ------------------------------------------------------------------------------
 
 
@@ -5510,7 +5527,9 @@ BACKSLASH
 
        Perl  documents  that  the  use  of  \K  within assertions is "not well
        defined". In PCRE, \K is acted upon  when  it  occurs  inside  positive
-       assertions, but is ignored in negative assertions.
+       assertions,  but  is  ignored  in negative assertions. Note that when a
+       pattern such as (?=ab\K) matches, the reported start of the  match  can
+       be greater than the end of the match.
 
    Simple assertions
 
@@ -7399,19 +7418,23 @@ BACKTRACKING CONTROL
 
        Note  that  (*COMMIT)  at  the start of a pattern is not the same as an
        anchor, unless PCRE's start-of-match optimizations are turned  off,  as
-       shown in this pcretest example:
+       shown in this output from pcretest:
 
            re> /(*COMMIT)abc/
          data> xyzabc
           0: abc
-         xyzabc\Y
+         data> xyzabc\Y
          No match
 
-       PCRE  knows  that  any  match  must start with "a", so the optimization
-       skips along the subject to "a" before running the first match  attempt,
-       which  succeeds.  When the optimization is disabled by the \Y escape in
-       the second subject, the match starts at "x" and so the (*COMMIT) causes
-       it to fail without trying any other starting points.
+       For this pattern, PCRE knows that any match must start with "a", so the
+       optimization skips along the subject to "a" before applying the pattern
+       to  the first set of data. The match attempt then succeeds. In the sec-
+       ond set of data, the escape sequence \Y is interpreted by the  pcretest
+       program.  It  causes  the  PCRE_NO_START_OPTIMIZE option to be set when
+       pcre_exec() is called.  This disables the optimization that skips along
+       to the first character. The pattern is now applied starting at "x", and
+       so the (*COMMIT) causes the match to  fail  without  trying  any  other
+       starting points.
 
          (*PRUNE) or (*PRUNE:NAME)
 
@@ -7618,8 +7641,8 @@ AUTHOR
 
 REVISION
 
-       Last updated: 03 December 2013
-       Copyright (c) 1997-2013 University of Cambridge.
+       Last updated: 08 January 2014
+       Copyright (c) 1997-2014 University of Cambridge.
 ------------------------------------------------------------------------------
 
 
@@ -7840,6 +7863,8 @@ MATCH POINT RESET
 
          \K          reset start of match
 
+       \K is honoured in positive assertions, but ignored in negative ones.
+
 
 ALTERNATION
 
@@ -7877,11 +7902,13 @@ OPTION SETTING
          (?x)            extended (ignore white space)
          (?-...)         unset option(s)
 
-       The  following  are  recognized only at the start of a pattern or after
-       one of the newline-setting options with similar syntax:
+       The  following  are  recognized  only at the very start of a pattern or
+       after one of the newline or \R options with similar syntax.  More  than
+       one of them may appear.
 
          (*LIMIT_MATCH=d) set the match limit to d (decimal number)
          (*LIMIT_RECURSION=d) set the recursion limit to d (decimal number)
+         (*NO_AUTO_POSSESS) no auto-possessification (PCRE_NO_AUTO_POSSESS)
          (*NO_START_OPT) no start-match optimization (PCRE_NO_START_OPTIMIZE)
          (*UTF8)         set UTF-8 mode: 8-bit library (PCRE_UTF8)
          (*UTF16)        set UTF-16 mode: 16-bit library (PCRE_UTF16)
@@ -7889,10 +7916,31 @@ OPTION SETTING
          (*UTF)          set appropriate UTF mode for the library in use
          (*UCP)          set PCRE_UCP (use Unicode properties for \d etc)
 
-       Note that LIMIT_MATCH and LIMIT_RECURSION can only reduce the value  of
+       Note  that LIMIT_MATCH and LIMIT_RECURSION can only reduce the value of
        the limits set by the caller of pcre_exec(), not increase them.
 
 
+NEWLINE CONVENTION
+
+       These are recognized only at the very start of  the  pattern  or  after
+       option settings with a similar syntax.
+
+         (*CR)           carriage return only
+         (*LF)           linefeed only
+         (*CRLF)         carriage return followed by linefeed
+         (*ANYCRLF)      all three of the above
+         (*ANY)          any Unicode newline sequence
+
+
+WHAT \R MATCHES
+
+       These  are  recognized  only  at the very start of the pattern or after
+       option setting with a similar syntax.
+
+         (*BSR_ANYCRLF)  CR, LF, or CRLF
+         (*BSR_UNICODE)  any Unicode newline sequence
+
+
 LOOKAHEAD AND LOOKBEHIND ASSERTIONS
 
          (?=...)         positive look ahead
@@ -7960,7 +8008,7 @@ BACKTRACKING CONTROL
          (*FAIL)         force backtrack; synonym (*F)
          (*MARK:NAME)    set name to be passed back; synonym (*:NAME)
 
-       The  following  act only when a subsequent match failure causes a back-
+       The following act only when a subsequent match failure causes  a  back-
        track to reach them. They all force a match failure, but they differ in
        what happens afterwards. Those that advance the start-of-match point do
        so only if the pattern is not anchored.
@@ -7975,27 +8023,6 @@ BACKTRACKING CONTROL
          (*THEN:NAME)    equivalent to (*MARK:NAME)(*THEN)
 
 
-NEWLINE CONVENTIONS
-
-       These are recognized only at the very start of the pattern or  after  a
-       (*BSR_...), (*UTF8), (*UTF16), (*UTF32) or (*UCP) option.
-
-         (*CR)           carriage return only
-         (*LF)           linefeed only
-         (*CRLF)         carriage return followed by linefeed
-         (*ANYCRLF)      all three of the above
-         (*ANY)          any Unicode newline sequence
-
-
-WHAT \R MATCHES
-
-       These  are  recognized only at the very start of the pattern or after a
-       (*...) option that sets the newline convention or a UTF or UCP mode.
-
-         (*BSR_ANYCRLF)  CR, LF, or CRLF
-         (*BSR_UNICODE)  any Unicode newline sequence
-
-
 CALLOUTS
 
          (?C)      callout
@@ -8016,8 +8043,8 @@ AUTHOR
 
 REVISION
 
-       Last updated: 12 November 2013
-       Copyright (c) 1997-2013 University of Cambridge.
+       Last updated: 08 January 2014
+       Copyright (c) 1997-2014 University of Cambridge.
 ------------------------------------------------------------------------------
 
 
index c85f36b6bc62a40f03cc5260128e19130eb8f9cc..ce4251c8e6c67ee65195e37b4c6cb34f3e538852 100644 (file)
@@ -5,7 +5,7 @@
 /* This is the public header file for the PCRE library, to be #included by
 applications that call the PCRE functions.
 
-           Copyright (c) 1997-2013 University of Cambridge
+           Copyright (c) 1997-2014 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
 /* The current PCRE version information. */
 
 #define PCRE_MAJOR          8
-#define PCRE_MINOR          34
+#define PCRE_MINOR          35
 #define PCRE_PRERELEASE     
-#define PCRE_DATE           2013-12-15
+#define PCRE_DATE           2014-04-04
 
 /* When an application links to a PCRE DLL in Windows, the symbols that are
 imported have to be identified as such. When building PCRE, the appropriate
@@ -491,36 +491,42 @@ PCRE_EXP_DECL void  (*pcre_free)(void *);
 PCRE_EXP_DECL void *(*pcre_stack_malloc)(size_t);
 PCRE_EXP_DECL void  (*pcre_stack_free)(void *);
 PCRE_EXP_DECL int   (*pcre_callout)(pcre_callout_block *);
+PCRE_EXP_DECL int   (*pcre_stack_guard)(void);
 
 PCRE_EXP_DECL void *(*pcre16_malloc)(size_t);
 PCRE_EXP_DECL void  (*pcre16_free)(void *);
 PCRE_EXP_DECL void *(*pcre16_stack_malloc)(size_t);
 PCRE_EXP_DECL void  (*pcre16_stack_free)(void *);
 PCRE_EXP_DECL int   (*pcre16_callout)(pcre16_callout_block *);
+PCRE_EXP_DECL int   (*pcre16_stack_guard)(void);
 
 PCRE_EXP_DECL void *(*pcre32_malloc)(size_t);
 PCRE_EXP_DECL void  (*pcre32_free)(void *);
 PCRE_EXP_DECL void *(*pcre32_stack_malloc)(size_t);
 PCRE_EXP_DECL void  (*pcre32_stack_free)(void *);
 PCRE_EXP_DECL int   (*pcre32_callout)(pcre32_callout_block *);
+PCRE_EXP_DECL int   (*pcre32_stack_guard)(void);
 #else   /* VPCOMPAT */
 PCRE_EXP_DECL void *pcre_malloc(size_t);
 PCRE_EXP_DECL void  pcre_free(void *);
 PCRE_EXP_DECL void *pcre_stack_malloc(size_t);
 PCRE_EXP_DECL void  pcre_stack_free(void *);
 PCRE_EXP_DECL int   pcre_callout(pcre_callout_block *);
+PCRE_EXP_DECL int   pcre_stack_guard(void);
 
 PCRE_EXP_DECL void *pcre16_malloc(size_t);
 PCRE_EXP_DECL void  pcre16_free(void *);
 PCRE_EXP_DECL void *pcre16_stack_malloc(size_t);
 PCRE_EXP_DECL void  pcre16_stack_free(void *);
 PCRE_EXP_DECL int   pcre16_callout(pcre16_callout_block *);
+PCRE_EXP_DECL int   pcre16_stack_guard(void);
 
 PCRE_EXP_DECL void *pcre32_malloc(size_t);
 PCRE_EXP_DECL void  pcre32_free(void *);
 PCRE_EXP_DECL void *pcre32_stack_malloc(size_t);
 PCRE_EXP_DECL void  pcre32_stack_free(void *);
 PCRE_EXP_DECL int   pcre32_callout(pcre32_callout_block *);
+PCRE_EXP_DECL int   pcre32_stack_guard(void);
 #endif  /* VPCOMPAT */
 
 /* User defined callback which provides a stack just before the match starts. */
index 1e20ec29d05ae879be5c1577ac62d86a522f9f3a..78ede56a93fd082853f39c4c6bff75cfa2e0b245 100644 (file)
@@ -20,9 +20,7 @@ and dead code stripping is activated. This leads to link errors. Pulling in the
 header ensures that the array gets flagged as "someone outside this compilation
 unit might reference this" and so it will always be supplied to the linker. */
 
-#ifdef HAVE_CONFIG_H
 #include "config.h"
-#endif
 
 #include "pcre_internal.h"
 
index 853fb2479350b72f788363f64b4a85a765a4586e..602b15be7bc24b27b15acbc53b7d8e3e81228297 100644 (file)
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
-           Copyright (c) 1997-2013 University of Cambridge
+           Copyright (c) 1997-2014 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -42,9 +42,7 @@ POSSIBILITY OF SUCH DAMAGE.
 supporting internal functions that are not used by other modules. */
 
 
-#ifdef HAVE_CONFIG_H
 #include "config.h"
-#endif
 
 #define NLBLOCK cd             /* Block containing newline information */
 #define PSSTART start_pattern  /* Field containing processed string start */
@@ -547,6 +545,8 @@ static const char error_texts[] =
   "parentheses are too deeply nested\0"
   "invalid range in character class\0"
   "group name must start with a non-digit\0"
+  /* 85 */
+  "parentheses are too deeply nested (stack check)\0"
   ;
 
 /* Table to identify digits and hex digits. This is used when compiling
@@ -3070,8 +3070,11 @@ const pcre_uint32 *chr_ptr;
 const pcre_uint32 *ochr_ptr;
 const pcre_uint32 *list_ptr;
 const pcre_uchar *next_code;
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+const pcre_uchar *xclass_flags;
+#endif
 const pcre_uint8 *class_bitset;
-const pcre_uint32 *set1, *set2, *set_end;
+const pcre_uint8 *set1, *set2, *set_end;
 pcre_uint32 chr;
 BOOL accepted, invert_bits;
 
@@ -3202,12 +3205,12 @@ for(;;)
     if (base_list[0] == OP_CLASS)
 #endif
       {
-      set1 = (pcre_uint32 *)(base_end - base_list[2]);
+      set1 = (pcre_uint8 *)(base_end - base_list[2]);
       list_ptr = list;
       }
     else
       {
-      set1 = (pcre_uint32 *)(code - list[2]);
+      set1 = (pcre_uint8 *)(code - list[2]);
       list_ptr = base_list;
       }
 
@@ -3216,41 +3219,53 @@ for(;;)
       {
       case OP_CLASS:
       case OP_NCLASS:
-      set2 = (pcre_uint32 *)
+      set2 = (pcre_uint8 *)
         ((list_ptr == list ? code : base_end) - list_ptr[2]);
       break;
 
-      /* OP_XCLASS cannot be supported here, because its bitset
-      is not necessarily complete. E.g: [a-\0x{200}] is stored
-      as a character range, and the appropriate bits are not set. */
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+      case OP_XCLASS:
+      xclass_flags = (list_ptr == list ? code : base_end) - list_ptr[2] + LINK_SIZE;
+      if ((*xclass_flags & XCL_HASPROP) != 0) return FALSE;
+      if ((*xclass_flags & XCL_MAP) == 0)
+        {
+        /* No bits are set for characters < 256. */
+        if (list[1] == 0) return TRUE;
+        /* Might be an empty repeat. */
+        continue;
+        }
+      set2 = (pcre_uint8 *)(xclass_flags + 1);
+      break;
+#endif
 
       case OP_NOT_DIGIT:
-        invert_bits = TRUE;
-        /* Fall through */
+      invert_bits = TRUE;
+      /* Fall through */
       case OP_DIGIT:
-        set2 = (pcre_uint32 *)(cd->cbits + cbit_digit);
-        break;
+      set2 = (pcre_uint8 *)(cd->cbits + cbit_digit);
+      break;
 
       case OP_NOT_WHITESPACE:
-        invert_bits = TRUE;
-        /* Fall through */
+      invert_bits = TRUE;
+      /* Fall through */
       case OP_WHITESPACE:
-        set2 = (pcre_uint32 *)(cd->cbits + cbit_space);
-        break;
+      set2 = (pcre_uint8 *)(cd->cbits + cbit_space);
+      break;
 
       case OP_NOT_WORDCHAR:
-        invert_bits = TRUE;
-        /* Fall through */
+      invert_bits = TRUE;
+      /* Fall through */
       case OP_WORDCHAR:
-        set2 = (pcre_uint32 *)(cd->cbits + cbit_word);
-        break;
+      set2 = (pcre_uint8 *)(cd->cbits + cbit_word);
+      break;
 
       default:
       return FALSE;
       }
 
-    /* Compare 4 bytes to improve speed. */
-    set_end = set1 + (32 / 4);
+    /* Because the sets are unaligned, we need
+    to perform byte comparison here. */
+    set_end = set1 + 32;
     if (invert_bits)
       {
       do
@@ -3551,7 +3566,9 @@ for(;;)
   if (list[1] == 0) return TRUE;
   }
 
-return FALSE;
+/* Control never reaches here. There used to be a fail-save return FALSE; here,
+but some compilers complain about an unreachable statement. */
+
 }
 
 
@@ -4062,12 +4079,16 @@ for (c = *cptr; c <= d; c++)
 
 if (c > d) return -1;  /* Reached end of range */
 
+/* Found a character that has a single other case. Search for the end of the
+range, which is either the end of the input range, or a character that has zero
+or more than one other cases. */
+
 *ocptr = othercase;
 next = othercase + 1;
 
 for (++c; c <= d; c++)
   {
-  if (UCD_OTHERCASE(c) != next) break;
+  if ((co = UCD_CASESET(c)) != 0 || UCD_OTHERCASE(c) != next) break;
   next++;
   }
 
@@ -4105,6 +4126,7 @@ add_to_class(pcre_uint8 *classbits, pcre_uchar **uchardptr, int options,
   compile_data *cd, pcre_uint32 start, pcre_uint32 end)
 {
 pcre_uint32 c;
+pcre_uint32 classbits_end = (end <= 0xff ? end : 0xff);
 int n8 = 0;
 
 /* If caseless matching is required, scan the range and process alternate
@@ -4148,7 +4170,7 @@ if ((options & PCRE_CASELESS) != 0)
 
   /* Not UTF-mode, or no UCP */
 
-  for (c = start; c <= end && c < 256; c++)
+  for (c = start; c <= classbits_end; c++)
     {
     SETBIT(classbits, cd->fcc[c]);
     n8++;
@@ -4173,22 +4195,21 @@ in all cases. */
 
 #endif /* COMPILE_PCRE[8|16] */
 
-/* If all characters are less than 256, use the bit map. Otherwise use extra
-data. */
+/* Use the bitmap for characters < 256. Otherwise use extra data.*/
 
-if (end < 0x100)
+for (c = start; c <= classbits_end; c++)
   {
-  for (c = start; c <= end; c++)
-    {
-    n8++;
-    SETBIT(classbits, c);
-    }
+  /* Regardless of start, c will always be <= 255. */
+  SETBIT(classbits, c);
+  n8++;
   }
 
-else
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+if (start <= 0xff) start = 0xff + 1;
+
+if (end >= start)
   {
   pcre_uchar *uchardata = *uchardptr;
-
 #ifdef SUPPORT_UTF
   if ((options & PCRE_UTF8) != 0)  /* All UTFs use the same flag bit */
     {
@@ -4228,6 +4249,7 @@ else
 
   *uchardptr = uchardata;   /* Updata extra data pointer */
   }
+#endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
 
 return n8;    /* Number of 8-bit characters */
 }
@@ -4449,6 +4471,9 @@ for (;; ptr++)
   BOOL reset_bracount;
   int class_has_8bitchar;
   int class_one_char;
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+  BOOL xclass_has_prop;
+#endif
   int newoptions;
   int recno;
   int refsign;
@@ -4783,13 +4808,26 @@ for (;; ptr++)
 
     should_flip_negation = FALSE;
 
+    /* Extended class (xclass) will be used when characters > 255
+    might match. */
+
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+    xclass = FALSE;
+    class_uchardata = code + LINK_SIZE + 2;   /* For XCLASS items */
+    class_uchardata_base = class_uchardata;   /* Save the start */
+#endif
+
     /* For optimization purposes, we track some properties of the class:
     class_has_8bitchar will be non-zero if the class contains at least one <
     256 character; class_one_char will be 1 if the class contains just one
-    character. */
+    character; xclass_has_prop will be TRUE if unicode property checks
+    are present in the class. */
 
     class_has_8bitchar = 0;
     class_one_char = 0;
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+    xclass_has_prop = FALSE;
+#endif
 
     /* Initialize the 32-char bit map to all zeros. We build the map in a
     temporary bit of memory, in case the class contains fewer than two
@@ -4798,12 +4836,6 @@ for (;; ptr++)
 
     memset(classbits, 0, 32 * sizeof(pcre_uint8));
 
-#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
-    xclass = FALSE;
-    class_uchardata = code + LINK_SIZE + 2;   /* For XCLASS items */
-    class_uchardata_base = class_uchardata;   /* Save the start */
-#endif
-
     /* Process characters until ] is reached. By writing this as a "do" it
     means that an initial ] is taken as a data character. At the start of the
     loop, c contains the first byte of the character. */
@@ -4927,6 +4959,7 @@ for (;; ptr++)
             *class_uchardata++ = local_negate? XCL_NOTPROP : XCL_PROP;
             *class_uchardata++ = ptype;
             *class_uchardata++ = 0;
+            xclass_has_prop = TRUE;
             ptr = tempptr + 1;
             continue;
 
@@ -5109,6 +5142,7 @@ for (;; ptr++)
                 XCL_PROP : XCL_NOTPROP;
               *class_uchardata++ = ptype;
               *class_uchardata++ = pdata;
+              xclass_has_prop = TRUE;
               class_has_8bitchar--;                /* Undo! */
               continue;
               }
@@ -5403,6 +5437,7 @@ for (;; ptr++)
       *code++ = OP_XCLASS;
       code += LINK_SIZE;
       *code = negate_class? XCL_NOT:0;
+      if (xclass_has_prop) *code |= XCL_HASPROP;
 
       /* If the map is required, move up the extra data to make room for it;
       otherwise just move the code pointer to the end of the extra data. */
@@ -5412,6 +5447,8 @@ for (;; ptr++)
         *code++ |= XCL_MAP;
         memmove(code + (32 / sizeof(pcre_uchar)), code,
           IN_UCHARS(class_uchardata - code));
+        if (negate_class && !xclass_has_prop)
+          for (c = 0; c < 32; c++) classbits[c] = ~classbits[c];
         memcpy(code, classbits, 32);
         code = class_uchardata + (32 / sizeof(pcre_uchar));
         }
@@ -6580,7 +6617,10 @@ for (;; ptr++)
 
         code[1+LINK_SIZE] = OP_CREF;
         skipbytes = 1+IMM2_SIZE;
-        refsign = -1;
+        refsign = -1;     /* => not a number */
+        namelen = -1;     /* => not a name; must set to avoid warning */
+        name = NULL;      /* Always set to avoid warning */
+        recno = 0;        /* Always set to avoid warning */
 
         /* Check for a test for recursion in a named group. */
 
@@ -6617,7 +6657,6 @@ for (;; ptr++)
 
         if (refsign >= 0)
           {
-          recno = 0;
           while (IS_DIGIT(*ptr))
             {
             recno = recno * 10 + (int)(*ptr - CHAR_0);
@@ -7994,6 +8033,16 @@ unsigned int orig_bracount;
 unsigned int max_bracount;
 branch_chain bc;
 
+/* If set, call the external function that checks for stack availability. */
+
+if (PUBL(stack_guard) != NULL && PUBL(stack_guard)())
+  {
+  *errorcodeptr= ERR85;
+  return FALSE;
+  }
+
+/* Miscellaneous initialization */
+
 bc.outer = bcptr;
 bc.current_branch = code;
 
index 1cbdd9c960cec53fe064151aa63c154e4ccec14e..92e3f0ebdb037a57d95158c8465d405e5068cbea 100644 (file)
@@ -41,9 +41,7 @@ POSSIBILITY OF SUCH DAMAGE.
 /* This module contains the external function pcre_config(). */
 
 
-#ifdef HAVE_CONFIG_H
 #include "config.h"
-#endif
 
 /* Keep the original link size. */
 static int real_link_size = LINK_SIZE;
index a3f0c1923f255dacb16d06ec6d8ba251452c4a67..ab58735d5c2832818a8599ad50fa95c5cfee7812 100644 (file)
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
-           Copyright (c) 1997-2013 University of Cambridge
+           Copyright (c) 1997-2014 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -41,9 +41,7 @@ POSSIBILITY OF SUCH DAMAGE.
 pattern matching using an NFA algorithm, trying to mimic Perl as closely as
 possible. There are also some static supporting functions. */
 
-#ifdef HAVE_CONFIG_H
 #include "config.h"
-#endif
 
 #define NLBLOCK md             /* Block containing newline information */
 #define PSSTART start_subject  /* Field containing processed string start */
@@ -134,7 +132,7 @@ pcre_uint32 c;
 BOOL utf = md->utf;
 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
 while (length-- > 0)
-  if (isprint(c = RAWUCHARINCTEST(p))) printf("%c", (char)c); else printf("\\x{%02x}", c);
+  if (isprint(c = UCHAR21INCTEST(p))) printf("%c", (char)c); else printf("\\x{%02x}", c);
 }
 #endif
 
@@ -237,8 +235,8 @@ if (caseless)
       {
       pcre_uint32 cc, cp;
       if (eptr >= md->end_subject) return -2;   /* Partial match */
-      cc = RAWUCHARTEST(eptr);
-      cp = RAWUCHARTEST(p);
+      cc = UCHAR21TEST(eptr);
+      cp = UCHAR21TEST(p);
       if (TABLE_GET(cp, md->lcc, cp) != TABLE_GET(cc, md->lcc, cc)) return -1;
       p++;
       eptr++;
@@ -254,7 +252,7 @@ else
   while (length-- > 0)
     {
     if (eptr >= md->end_subject) return -2;   /* Partial match */
-    if (RAWUCHARINCTEST(p) != RAWUCHARINCTEST(eptr)) return -1;
+    if (UCHAR21INCTEST(p) != UCHAR21INCTEST(eptr)) return -1;
     }
   }
 
@@ -2103,7 +2101,7 @@ for (;;)
             eptr + 1 >= md->end_subject &&
             NLBLOCK->nltype == NLTYPE_FIXED &&
             NLBLOCK->nllen == 2 &&
-            RAWUCHARTEST(eptr) == NLBLOCK->nl[0])
+            UCHAR21TEST(eptr) == NLBLOCK->nl[0])
           {
           md->hitend = TRUE;
           if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
@@ -2147,7 +2145,7 @@ for (;;)
           eptr + 1 >= md->end_subject &&
           NLBLOCK->nltype == NLTYPE_FIXED &&
           NLBLOCK->nllen == 2 &&
-          RAWUCHARTEST(eptr) == NLBLOCK->nl[0])
+          UCHAR21TEST(eptr) == NLBLOCK->nl[0])
         {
         md->hitend = TRUE;
         if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
@@ -2290,7 +2288,7 @@ for (;;)
         eptr + 1 >= md->end_subject &&
         NLBLOCK->nltype == NLTYPE_FIXED &&
         NLBLOCK->nllen == 2 &&
-        RAWUCHARTEST(eptr) == NLBLOCK->nl[0])
+        UCHAR21TEST(eptr) == NLBLOCK->nl[0])
       {
       md->hitend = TRUE;
       if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
@@ -2444,7 +2442,7 @@ for (;;)
         {
         SCHECK_PARTIAL();
         }
-      else if (RAWUCHARTEST(eptr) == CHAR_LF) eptr++;
+      else if (UCHAR21TEST(eptr) == CHAR_LF) eptr++;
       break;
 
       case CHAR_LF:
@@ -2691,16 +2689,22 @@ for (;;)
       pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
       ecode += 1 + 2*IMM2_SIZE;
 
+      /* Setting the default length first and initializing 'offset' avoids
+      compiler warnings in the REF_REPEAT code. */
+
+      length = (md->jscript_compat)? 0 : -1;
+      offset = 0;
+
       while (count-- > 0)
         {
         offset = GET2(slot, 0) << 1;
-        if (offset < offset_top && md->offset_vector[offset] >= 0) break;
+        if (offset < offset_top && md->offset_vector[offset] >= 0)
+          {
+          length = md->offset_vector[offset+1] - md->offset_vector[offset];
+          break;
+          }
         slot += md->name_entry_size;
         }
-      if (count < 0)
-        length = (md->jscript_compat)? 0 : -1;
-      else
-        length = md->offset_vector[offset+1] - md->offset_vector[offset];
       }
     goto REF_REPEAT;
 
@@ -3212,7 +3216,7 @@ for (;;)
         CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
         RRETURN(MATCH_NOMATCH);
         }
-      while (length-- > 0) if (*ecode++ != RAWUCHARINC(eptr)) RRETURN(MATCH_NOMATCH);
+      while (length-- > 0) if (*ecode++ != UCHAR21INC(eptr)) RRETURN(MATCH_NOMATCH);
       }
     else
 #endif
@@ -3252,7 +3256,7 @@ for (;;)
 
       if (fc < 128)
         {
-        pcre_uint32 cc = RAWUCHAR(eptr);
+        pcre_uint32 cc = UCHAR21(eptr);
         if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH);
         ecode++;
         eptr++;
@@ -3521,7 +3525,7 @@ for (;;)
           SCHECK_PARTIAL();
           RRETURN(MATCH_NOMATCH);
           }
-        cc = RAWUCHARTEST(eptr);
+        cc = UCHAR21TEST(eptr);
         if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
         eptr++;
         }
@@ -3539,7 +3543,7 @@ for (;;)
             SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
             }
-          cc = RAWUCHARTEST(eptr);
+          cc = UCHAR21TEST(eptr);
           if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
           eptr++;
           }
@@ -3556,7 +3560,7 @@ for (;;)
             SCHECK_PARTIAL();
             break;
             }
-          cc = RAWUCHARTEST(eptr);
+          cc = UCHAR21TEST(eptr);
           if (fc != cc && foc != cc) break;
           eptr++;
           }
@@ -3583,7 +3587,7 @@ for (;;)
           SCHECK_PARTIAL();
           RRETURN(MATCH_NOMATCH);
           }
-        if (fc != RAWUCHARINCTEST(eptr)) RRETURN(MATCH_NOMATCH);
+        if (fc != UCHAR21INCTEST(eptr)) RRETURN(MATCH_NOMATCH);
         }
 
       if (min == max) continue;
@@ -3600,7 +3604,7 @@ for (;;)
             SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
             }
-          if (fc != RAWUCHARINCTEST(eptr)) RRETURN(MATCH_NOMATCH);
+          if (fc != UCHAR21INCTEST(eptr)) RRETURN(MATCH_NOMATCH);
           }
         /* Control never gets here */
         }
@@ -3614,7 +3618,7 @@ for (;;)
             SCHECK_PARTIAL();
             break;
             }
-          if (fc != RAWUCHARTEST(eptr)) break;
+          if (fc != UCHAR21TEST(eptr)) break;
           eptr++;
           }
         if (possessive) continue;    /* No backtracking */
@@ -4369,7 +4373,7 @@ for (;;)
               eptr + 1 >= md->end_subject &&
               NLBLOCK->nltype == NLTYPE_FIXED &&
               NLBLOCK->nllen == 2 &&
-              RAWUCHAR(eptr) == NLBLOCK->nl[0])
+              UCHAR21(eptr) == NLBLOCK->nl[0])
             {
             md->hitend = TRUE;
             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
@@ -4411,7 +4415,7 @@ for (;;)
             default: RRETURN(MATCH_NOMATCH);
 
             case CHAR_CR:
-            if (eptr < md->end_subject && RAWUCHAR(eptr) == CHAR_LF) eptr++;
+            if (eptr < md->end_subject && UCHAR21(eptr) == CHAR_LF) eptr++;
             break;
 
             case CHAR_LF:
@@ -4521,7 +4525,7 @@ for (;;)
             SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
             }
-          cc = RAWUCHAR(eptr);
+          cc = UCHAR21(eptr);
           if (cc >= 128 || (md->ctypes[cc] & ctype_digit) == 0)
             RRETURN(MATCH_NOMATCH);
           eptr++;
@@ -4538,7 +4542,7 @@ for (;;)
             SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
             }
-          cc = RAWUCHAR(eptr);
+          cc = UCHAR21(eptr);
           if (cc < 128 && (md->ctypes[cc] & ctype_space) != 0)
             RRETURN(MATCH_NOMATCH);
           eptr++;
@@ -4555,7 +4559,7 @@ for (;;)
             SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
             }
-          cc = RAWUCHAR(eptr);
+          cc = UCHAR21(eptr);
           if (cc >= 128 || (md->ctypes[cc] & ctype_space) == 0)
             RRETURN(MATCH_NOMATCH);
           eptr++;
@@ -4572,7 +4576,7 @@ for (;;)
             SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
             }
-          cc = RAWUCHAR(eptr);
+          cc = UCHAR21(eptr);
           if (cc < 128 && (md->ctypes[cc] & ctype_word) != 0)
             RRETURN(MATCH_NOMATCH);
           eptr++;
@@ -4589,7 +4593,7 @@ for (;;)
             SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
             }
-          cc = RAWUCHAR(eptr);
+          cc = UCHAR21(eptr);
           if (cc >= 128 || (md->ctypes[cc] & ctype_word) == 0)
             RRETURN(MATCH_NOMATCH);
           eptr++;
@@ -5150,7 +5154,7 @@ for (;;)
               {
               default: RRETURN(MATCH_NOMATCH);
               case CHAR_CR:
-              if (eptr < md->end_subject && RAWUCHAR(eptr) == CHAR_LF) eptr++;
+              if (eptr < md->end_subject && UCHAR21(eptr) == CHAR_LF) eptr++;
               break;
 
               case CHAR_LF:
@@ -5689,7 +5693,7 @@ for (;;)
                   eptr + 1 >= md->end_subject &&
                   NLBLOCK->nltype == NLTYPE_FIXED &&
                   NLBLOCK->nllen == 2 &&
-                  RAWUCHAR(eptr) == NLBLOCK->nl[0])
+                  UCHAR21(eptr) == NLBLOCK->nl[0])
                 {
                 md->hitend = TRUE;
                 if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
@@ -5715,7 +5719,7 @@ for (;;)
                   eptr + 1 >= md->end_subject &&
                   NLBLOCK->nltype == NLTYPE_FIXED &&
                   NLBLOCK->nllen == 2 &&
-                  RAWUCHAR(eptr) == NLBLOCK->nl[0])
+                  UCHAR21(eptr) == NLBLOCK->nl[0])
                 {
                 md->hitend = TRUE;
                 if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
@@ -5772,7 +5776,7 @@ for (;;)
             if (c == CHAR_CR)
               {
               if (++eptr >= md->end_subject) break;
-              if (RAWUCHAR(eptr) == CHAR_LF) eptr++;
+              if (UCHAR21(eptr) == CHAR_LF) eptr++;
               }
             else
               {
@@ -5935,8 +5939,8 @@ for (;;)
           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
           eptr--;
           BACKCHAR(eptr);
-          if (ctype == OP_ANYNL && eptr > pp  && RAWUCHAR(eptr) == CHAR_NL &&
-              RAWUCHAR(eptr - 1) == CHAR_CR) eptr--;
+          if (ctype == OP_ANYNL && eptr > pp  && UCHAR21(eptr) == CHAR_NL &&
+              UCHAR21(eptr - 1) == CHAR_CR) eptr--;
           }
         }
       else
@@ -6783,10 +6787,10 @@ for(;;)
 
       if (first_char != first_char2)
         while (start_match < end_subject &&
-          (smc = RAWUCHARTEST(start_match)) != first_char && smc != first_char2)
+          (smc = UCHAR21TEST(start_match)) != first_char && smc != first_char2)
           start_match++;
       else
-        while (start_match < end_subject && RAWUCHARTEST(start_match) != first_char)
+        while (start_match < end_subject && UCHAR21TEST(start_match) != first_char)
           start_match++;
       }
 
@@ -6818,7 +6822,7 @@ for(;;)
         if (start_match[-1] == CHAR_CR &&
              (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
              start_match < end_subject &&
-             RAWUCHARTEST(start_match) == CHAR_NL)
+             UCHAR21TEST(start_match) == CHAR_NL)
           start_match++;
         }
       }
@@ -6829,22 +6833,12 @@ for(;;)
       {
       while (start_match < end_subject)
         {
-        register pcre_uint32 c = RAWUCHARTEST(start_match);
+        register pcre_uint32 c = UCHAR21TEST(start_match);
 #ifndef COMPILE_PCRE8
         if (c > 255) c = 255;
 #endif
-        if ((start_bits[c/8] & (1 << (c&7))) == 0)
-          {
-          start_match++;
-#if defined SUPPORT_UTF && defined COMPILE_PCRE8
-          /* In non 8-bit mode, the iteration will stop for
-          characters > 255 at the beginning or not stop at all. */
-          if (utf)
-            ACROSSCHAR(start_match < end_subject, *start_match,
-              start_match++);
-#endif
-          }
-        else break;
+        if ((start_bits[c/8] & (1 << (c&7))) != 0) break;
+        start_match++;
         }
       }
     }   /* Starting optimizations */
@@ -6897,7 +6891,7 @@ for(;;)
           {
           while (p < end_subject)
             {
-            register pcre_uint32 pp = RAWUCHARINCTEST(p);
+            register pcre_uint32 pp = UCHAR21INCTEST(p);
             if (pp == req_char || pp == req_char2) { p--; break; }
             }
           }
@@ -6905,7 +6899,7 @@ for(;;)
           {
           while (p < end_subject)
             {
-            if (RAWUCHARINCTEST(p) == req_char) { p--; break; }
+            if (UCHAR21INCTEST(p) == req_char) { p--; break; }
             }
           }
 
index a6c2ece6ca5a71ef382e6339875a682e6c76498a..3af0cec60b1f3cba920dd9c15f635f5a4ef7dfd3 100644 (file)
@@ -42,9 +42,7 @@ POSSIBILITY OF SUCH DAMAGE.
 information about a compiled pattern. */
 
 
-#ifdef HAVE_CONFIG_H
 #include "config.h"
-#endif
 
 #include "pcre_internal.h"
 
index 8094b34bbfb7768bc3be312a2f9b5c6c245ddd5c..e7ea3a56a9c67d26dcda0388536a2d79663608c2 100644 (file)
@@ -43,9 +43,7 @@ from the subject string after a regex match has succeeded. The original idea
 for these functions came from Scott Wimer. */
 
 
-#ifdef HAVE_CONFIG_H
 #include "config.h"
-#endif
 
 #include "pcre_internal.h"
 
index 36e6ddb3a89536356619c25a3743f499b714c5cb..4aab651c526e245c38ff1a23f0baa47dbf0a52a2 100644 (file)
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
-           Copyright (c) 1997-2012 University of Cambridge
+           Copyright (c) 1997-2014 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -52,9 +52,7 @@ a local function is used.
 Also, when compiling for Virtual Pascal, things are done differently, and
 global variables are not used. */
 
-#ifdef HAVE_CONFIG_H
 #include "config.h"
-#endif
 
 #include "pcre_internal.h"
 
@@ -72,6 +70,7 @@ PCRE_EXP_DATA_DEFN void  (*PUBL(free))(void *) = LocalPcreFree;
 PCRE_EXP_DATA_DEFN void *(*PUBL(stack_malloc))(size_t) = LocalPcreMalloc;
 PCRE_EXP_DATA_DEFN void  (*PUBL(stack_free))(void *) = LocalPcreFree;
 PCRE_EXP_DATA_DEFN int   (*PUBL(callout))(PUBL(callout_block) *) = NULL;
+PCRE_EXP_DATA_DEFN int   (*PUBL(stack_guard))(void) = NULL;
 
 #elif !defined VPCOMPAT
 PCRE_EXP_DATA_DEFN void *(*PUBL(malloc))(size_t) = malloc;
@@ -79,6 +78,7 @@ PCRE_EXP_DATA_DEFN void  (*PUBL(free))(void *) = free;
 PCRE_EXP_DATA_DEFN void *(*PUBL(stack_malloc))(size_t) = malloc;
 PCRE_EXP_DATA_DEFN void  (*PUBL(stack_free))(void *) = free;
 PCRE_EXP_DATA_DEFN int   (*PUBL(callout))(PUBL(callout_block) *) = NULL;
+PCRE_EXP_DATA_DEFN int   (*PUBL(stack_guard))(void) = NULL;
 #endif
 
 /* End of pcre_globals.c */
index 0b9798c5541abde5e049bc6d404bbd93ed59f5b6..6e915a0e4530d287da207b516b56f228309426b2 100644 (file)
@@ -7,7 +7,7 @@
 and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
-           Copyright (c) 1997-2013 University of Cambridge
+           Copyright (c) 1997-2014 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -316,8 +316,8 @@ start/end of string field names are. */
        &(NLBLOCK->nllen), utf)) \
     : \
     ((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \
-     RAWUCHARTEST(p) == NLBLOCK->nl[0] && \
-     (NLBLOCK->nllen == 1 || RAWUCHARTEST(p+1) == NLBLOCK->nl[1])       \
+     UCHAR21TEST(p) == NLBLOCK->nl[0] && \
+     (NLBLOCK->nllen == 1 || UCHAR21TEST(p+1) == NLBLOCK->nl[1])       \
     ) \
   )
 
@@ -330,8 +330,8 @@ start/end of string field names are. */
        &(NLBLOCK->nllen), utf)) \
     : \
     ((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \
-     RAWUCHARTEST(p - NLBLOCK->nllen) == NLBLOCK->nl[0] &&              \
-     (NLBLOCK->nllen == 1 || RAWUCHARTEST(p - NLBLOCK->nllen + 1) == NLBLOCK->nl[1]) \
+     UCHAR21TEST(p - NLBLOCK->nllen) == NLBLOCK->nl[0] &&              \
+     (NLBLOCK->nllen == 1 || UCHAR21TEST(p - NLBLOCK->nllen + 1) == NLBLOCK->nl[1]) \
     ) \
   )
 
@@ -582,12 +582,27 @@ changed in future to be a fixed number of bytes or to depend on LINK_SIZE. */
 #define MAX_MARK ((1u << 8) - 1)
 #endif
 
+/* There is a proposed future special "UTF-21" mode, in which only the lowest
+21 bits of a 32-bit character are interpreted as UTF, with the remaining 11
+high-order bits available to the application for other uses. In preparation for
+the future implementation of this mode, there are macros that load a data item
+and, if in this special mode, mask it to 21 bits. These macros all have names
+starting with UCHAR21. In all other modes, including the normal 32-bit
+library, the macros all have the same simple definitions. When the new mode is
+implemented, it is expected that these definitions will be varied appropriately
+using #ifdef when compiling the library that supports the special mode. */
+
+#define UCHAR21(eptr)        (*(eptr))
+#define UCHAR21TEST(eptr)    (*(eptr))
+#define UCHAR21INC(eptr)     (*(eptr)++)
+#define UCHAR21INCTEST(eptr) (*(eptr)++)
+
 /* When UTF encoding is being used, a character is no longer just a single
-byte. The macros for character handling generate simple sequences when used in
-character-mode, and more complicated ones for UTF characters. GETCHARLENTEST
-and other macros are not used when UTF is not supported, so they are not
-defined. To make sure they can never even appear when UTF support is omitted,
-we don't even define them. */
+byte in 8-bit mode or a single short in 16-bit mode. The macros for character
+handling generate simple sequences when used in the basic mode, and more
+complicated ones for UTF characters. GETCHARLENTEST and other macros are not
+used when UTF is not supported. To make sure they can never even appear when
+UTF support is omitted, we don't even define them. */
 
 #ifndef SUPPORT_UTF
 
@@ -600,10 +615,6 @@ we don't even define them. */
 #define GETCHARINC(c, eptr) c = *eptr++;
 #define GETCHARINCTEST(c, eptr) c = *eptr++;
 #define GETCHARLEN(c, eptr, len) c = *eptr;
-#define RAWUCHAR(eptr) (*(eptr))
-#define RAWUCHARINC(eptr) (*(eptr)++)
-#define RAWUCHARTEST(eptr) (*(eptr))
-#define RAWUCHARINCTEST(eptr) (*(eptr)++)
 /* #define GETCHARLENTEST(c, eptr, len) */
 /* #define BACKCHAR(eptr) */
 /* #define FORWARDCHAR(eptr) */
@@ -776,30 +787,6 @@ do not know if we are in UTF-8 mode. */
   c = *eptr; \
   if (utf && c >= 0xc0) GETUTF8LEN(c, eptr, len);
 
-/* Returns the next uchar, not advancing the pointer. This is called when
-we know we are in UTF mode. */
-
-#define RAWUCHAR(eptr) \
-  (*(eptr))
-
-/* Returns the next uchar, advancing the pointer. This is called when
-we know we are in UTF mode. */
-
-#define RAWUCHARINC(eptr) \
-  (*((eptr)++))
-
-/* Returns the next uchar, testing for UTF mode, and not advancing the
-pointer. */
-
-#define RAWUCHARTEST(eptr) \
-  (*(eptr))
-
-/* Returns the next uchar, testing for UTF mode, advancing the
-pointer. */
-
-#define RAWUCHARINCTEST(eptr) \
-  (*((eptr)++))
-
 /* If the pointer is not at the start of a character, move it back until
 it is. This is called only in UTF-8 mode - we don't put a test within the macro
 because almost all calls are already within a block of UTF-8 only code. */
@@ -895,30 +882,6 @@ we do not know if we are in UTF-16 mode. */
   c = *eptr; \
   if (utf && (c & 0xfc00) == 0xd800) GETUTF16LEN(c, eptr, len);
 
-/* Returns the next uchar, not advancing the pointer. This is called when
-we know we are in UTF mode. */
-
-#define RAWUCHAR(eptr) \
-  (*(eptr))
-
-/* Returns the next uchar, advancing the pointer. This is called when
-we know we are in UTF mode. */
-
-#define RAWUCHARINC(eptr) \
-  (*((eptr)++))
-
-/* Returns the next uchar, testing for UTF mode, and not advancing the
-pointer. */
-
-#define RAWUCHARTEST(eptr) \
-  (*(eptr))
-
-/* Returns the next uchar, testing for UTF mode, advancing the
-pointer. */
-
-#define RAWUCHARINCTEST(eptr) \
-  (*((eptr)++))
-
 /* If the pointer is not at the start of a character, move it back until
 it is. This is called only in UTF-16 mode - we don't put a test within the
 macro because almost all calls are already within a block of UTF-16 only
@@ -980,30 +943,6 @@ This is called when we do not know if we are in UTF-32 mode. */
 #define GETCHARLENTEST(c, eptr, len) \
   GETCHARTEST(c, eptr)
 
-/* Returns the next uchar, not advancing the pointer. This is called when
-we know we are in UTF mode. */
-
-#define RAWUCHAR(eptr) \
-  (*(eptr))
-
-/* Returns the next uchar, advancing the pointer. This is called when
-we know we are in UTF mode. */
-
-#define RAWUCHARINC(eptr) \
-  (*((eptr)++))
-
-/* Returns the next uchar, testing for UTF mode, and not advancing the
-pointer. */
-
-#define RAWUCHARTEST(eptr) \
-  (*(eptr))
-
-/* Returns the next uchar, testing for UTF mode, advancing the
-pointer. */
-
-#define RAWUCHARINCTEST(eptr) \
-  (*((eptr)++))
-
 /* If the pointer is not at the start of a character, move it back until
 it is. This is called only in UTF-32 mode - we don't put a test within the
 macro because almost all calls are already within a block of UTF-32 only
@@ -1874,8 +1813,9 @@ table. */
 /* Flag bits and data types for the extended class (OP_XCLASS) for classes that
 contain characters with values greater than 255. */
 
-#define XCL_NOT    0x01    /* Flag: this is a negative class */
-#define XCL_MAP    0x02    /* Flag: a 32-byte map is present */
+#define XCL_NOT       0x01    /* Flag: this is a negative class */
+#define XCL_MAP       0x02    /* Flag: a 32-byte map is present */
+#define XCL_HASPROP   0x04    /* Flag: property checks are present. */
 
 #define XCL_END       0    /* Marks end of individual items */
 #define XCL_SINGLE    1    /* Single item (one multibyte char) follows */
@@ -2341,7 +2281,7 @@ enum { ERR0,  ERR1,  ERR2,  ERR3,  ERR4,  ERR5,  ERR6,  ERR7,  ERR8,  ERR9,
        ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
        ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69,
        ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79,
-       ERR80, ERR81, ERR82, ERR83, ERR84, ERRCOUNT };
+       ERR80, ERR81, ERR82, ERR83, ERR84, ERR85, ERRCOUNT };
 
 /* JIT compiling modes. The function list is indexed by them. */
 
diff --git a/ext/pcre/pcrelib/pcre_jit_compile.c b/ext/pcre/pcrelib/pcre_jit_compile.c
new file mode 100644 (file)
index 0000000..3799cd9
--- /dev/null
@@ -0,0 +1,10699 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+           Copyright (c) 1997-2013 University of Cambridge
+
+  The machine code generator part (this module) was written by Zoltan Herczeg
+                      Copyright (c) 2010-2013
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+#include "config.h"
+
+#include "pcre_internal.h"
+
+#if defined SUPPORT_JIT
+
+/* All-in-one: Since we use the JIT compiler only from here,
+we just include it. This way we don't need to touch the build
+system files. */
+
+#define SLJIT_MALLOC(size) (PUBL(malloc))(size)
+#define SLJIT_FREE(ptr) (PUBL(free))(ptr)
+#define SLJIT_CONFIG_AUTO 1
+#define SLJIT_CONFIG_STATIC 1
+#define SLJIT_VERBOSE 0
+#define SLJIT_DEBUG 0
+
+#include "sljit/sljitLir.c"
+
+#if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
+#error Unsupported architecture
+#endif
+
+/* Defines for debugging purposes. */
+
+/* 1 - Use unoptimized capturing brackets.
+   2 - Enable capture_last_ptr (includes option 1). */
+/* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
+
+/* 1 - Always have a control head. */
+/* #define DEBUG_FORCE_CONTROL_HEAD 1 */
+
+/* Allocate memory for the regex stack on the real machine stack.
+Fast, but limited size. */
+#define MACHINE_STACK_SIZE 32768
+
+/* Growth rate for stack allocated by the OS. Should be the multiply
+of page size. */
+#define STACK_GROWTH_RATE 8192
+
+/* Enable to check that the allocation could destroy temporaries. */
+#if defined SLJIT_DEBUG && SLJIT_DEBUG
+#define DESTROY_REGISTERS 1
+#endif
+
+/*
+Short summary about the backtracking mechanism empolyed by the jit code generator:
+
+The code generator follows the recursive nature of the PERL compatible regular
+expressions. The basic blocks of regular expressions are condition checkers
+whose execute different commands depending on the result of the condition check.
+The relationship between the operators can be horizontal (concatenation) and
+vertical (sub-expression) (See struct backtrack_common for more details).
+
+  'ab' - 'a' and 'b' regexps are concatenated
+  'a+' - 'a' is the sub-expression of the '+' operator
+
+The condition checkers are boolean (true/false) checkers. Machine code is generated
+for the checker itself and for the actions depending on the result of the checker.
+The 'true' case is called as the matching path (expected path), and the other is called as
+the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
+branches on the matching path.
+
+ Greedy star operator (*) :
+   Matching path: match happens.
+   Backtrack path: match failed.
+ Non-greedy star operator (*?) :
+   Matching path: no need to perform a match.
+   Backtrack path: match is required.
+
+The following example shows how the code generated for a capturing bracket
+with two alternatives. Let A, B, C, D are arbirary regular expressions, and
+we have the following regular expression:
+
+   A(B|C)D
+
+The generated code will be the following:
+
+ A matching path
+ '(' matching path (pushing arguments to the stack)
+ B matching path
+ ')' matching path (pushing arguments to the stack)
+ D matching path
+ return with successful match
+
+ D backtrack path
+ ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
+ B backtrack path
+ C expected path
+ jump to D matching path
+ C backtrack path
+ A backtrack path
+
+ Notice, that the order of backtrack code paths are the opposite of the fast
+ code paths. In this way the topmost value on the stack is always belong
+ to the current backtrack code path. The backtrack path must check
+ whether there is a next alternative. If so, it needs to jump back to
+ the matching path eventually. Otherwise it needs to clear out its own stack
+ frame and continue the execution on the backtrack code paths.
+*/
+
+/*
+Saved stack frames:
+
+Atomic blocks and asserts require reloading the values of private data
+when the backtrack mechanism performed. Because of OP_RECURSE, the data
+are not necessarly known in compile time, thus we need a dynamic restore
+mechanism.
+
+The stack frames are stored in a chain list, and have the following format:
+([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
+
+Thus we can restore the private data to a particular point in the stack.
+*/
+
+typedef struct jit_arguments {
+  /* Pointers first. */
+  struct sljit_stack *stack;
+  const pcre_uchar *str;
+  const pcre_uchar *begin;
+  const pcre_uchar *end;
+  int *offsets;
+  pcre_uchar *uchar_ptr;
+  pcre_uchar *mark_ptr;
+  void *callout_data;
+  /* Everything else after. */
+  pcre_uint32 limit_match;
+  int real_offset_count;
+  int offset_count;
+  pcre_uint8 notbol;
+  pcre_uint8 noteol;
+  pcre_uint8 notempty;
+  pcre_uint8 notempty_atstart;
+} jit_arguments;
+
+typedef struct executable_functions {
+  void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
+  sljit_uw *read_only_data[JIT_NUMBER_OF_COMPILE_MODES];
+  sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
+  PUBL(jit_callback) callback;
+  void *userdata;
+  pcre_uint32 top_bracket;
+  pcre_uint32 limit_match;
+} executable_functions;
+
+typedef struct jump_list {
+  struct sljit_jump *jump;
+  struct jump_list *next;
+} jump_list;
+
+typedef struct stub_list {
+  struct sljit_jump *start;
+  struct sljit_label *quit;
+  struct stub_list *next;
+} stub_list;
+
+typedef struct label_addr_list {
+  struct sljit_label *label;
+  sljit_uw *addr;
+  struct label_addr_list *next;
+} label_addr_list;
+
+enum frame_types {
+  no_frame = -1,
+  no_stack = -2
+};
+
+enum control_types {
+  type_mark = 0,
+  type_then_trap = 1
+};
+
+typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
+
+/* The following structure is the key data type for the recursive
+code generator. It is allocated by compile_matchingpath, and contains
+the arguments for compile_backtrackingpath. Must be the first member
+of its descendants. */
+typedef struct backtrack_common {
+  /* Concatenation stack. */
+  struct backtrack_common *prev;
+  jump_list *nextbacktracks;
+  /* Internal stack (for component operators). */
+  struct backtrack_common *top;
+  jump_list *topbacktracks;
+  /* Opcode pointer. */
+  pcre_uchar *cc;
+} backtrack_common;
+
+typedef struct assert_backtrack {
+  backtrack_common common;
+  jump_list *condfailed;
+  /* Less than 0 if a frame is not needed. */
+  int framesize;
+  /* Points to our private memory word on the stack. */
+  int private_data_ptr;
+  /* For iterators. */
+  struct sljit_label *matchingpath;
+} assert_backtrack;
+
+typedef struct bracket_backtrack {
+  backtrack_common common;
+  /* Where to coninue if an alternative is successfully matched. */
+  struct sljit_label *alternative_matchingpath;
+  /* For rmin and rmax iterators. */
+  struct sljit_label *recursive_matchingpath;
+  /* For greedy ? operator. */
+  struct sljit_label *zero_matchingpath;
+  /* Contains the branches of a failed condition. */
+  union {
+    /* Both for OP_COND, OP_SCOND. */
+    jump_list *condfailed;
+    assert_backtrack *assert;
+    /* For OP_ONCE. Less than 0 if not needed. */
+    int framesize;
+  } u;
+  /* Points to our private memory word on the stack. */
+  int private_data_ptr;
+} bracket_backtrack;
+
+typedef struct bracketpos_backtrack {
+  backtrack_common common;
+  /* Points to our private memory word on the stack. */
+  int private_data_ptr;
+  /* Reverting stack is needed. */
+  int framesize;
+  /* Allocated stack size. */
+  int stacksize;
+} bracketpos_backtrack;
+
+typedef struct braminzero_backtrack {
+  backtrack_common common;
+  struct sljit_label *matchingpath;
+} braminzero_backtrack;
+
+typedef struct iterator_backtrack {
+  backtrack_common common;
+  /* Next iteration. */
+  struct sljit_label *matchingpath;
+} iterator_backtrack;
+
+typedef struct recurse_entry {
+  struct recurse_entry *next;
+  /* Contains the function entry. */
+  struct sljit_label *entry;
+  /* Collects the calls until the function is not created. */
+  jump_list *calls;
+  /* Points to the starting opcode. */
+  sljit_sw start;
+} recurse_entry;
+
+typedef struct recurse_backtrack {
+  backtrack_common common;
+  BOOL inlined_pattern;
+} recurse_backtrack;
+
+#define OP_THEN_TRAP OP_TABLE_LENGTH
+
+typedef struct then_trap_backtrack {
+  backtrack_common common;
+  /* If then_trap is not NULL, this structure contains the real
+  then_trap for the backtracking path. */
+  struct then_trap_backtrack *then_trap;
+  /* Points to the starting opcode. */
+  sljit_sw start;
+  /* Exit point for the then opcodes of this alternative. */
+  jump_list *quit;
+  /* Frame size of the current alternative. */
+  int framesize;
+} then_trap_backtrack;
+
+#define MAX_RANGE_SIZE 4
+
+typedef struct compiler_common {
+  /* The sljit ceneric compiler. */
+  struct sljit_compiler *compiler;
+  /* First byte code. */
+  pcre_uchar *start;
+  /* Maps private data offset to each opcode. */
+  sljit_si *private_data_ptrs;
+  /* This read-only data is available during runtime. */
+  sljit_uw *read_only_data;
+  /* The total size of the read-only data. */
+  sljit_uw read_only_data_size;
+  /* The next free entry of the read_only_data. */
+  sljit_uw *read_only_data_ptr;
+  /* Tells whether the capturing bracket is optimized. */
+  pcre_uint8 *optimized_cbracket;
+  /* Tells whether the starting offset is a target of then. */
+  pcre_uint8 *then_offsets;
+  /* Current position where a THEN must jump. */
+  then_trap_backtrack *then_trap;
+  /* Starting offset of private data for capturing brackets. */
+  int cbra_ptr;
+  /* Output vector starting point. Must be divisible by 2. */
+  int ovector_start;
+  /* Last known position of the requested byte. */
+  int req_char_ptr;
+  /* Head of the last recursion. */
+  int recursive_head_ptr;
+  /* First inspected character for partial matching. */
+  int start_used_ptr;
+  /* Starting pointer for partial soft matches. */
+  int hit_start;
+  /* End pointer of the first line. */
+  int first_line_end;
+  /* Points to the marked string. */
+  int mark_ptr;
+  /* Recursive control verb management chain. */
+  int control_head_ptr;
+  /* Points to the last matched capture block index. */
+  int capture_last_ptr;
+  /* Points to the starting position of the current match. */
+  int start_ptr;
+
+  /* Flipped and lower case tables. */
+  const pcre_uint8 *fcc;
+  sljit_sw lcc;
+  /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
+  int mode;
+  /* TRUE, when minlength is greater than 0. */
+  BOOL might_be_empty;
+  /* \K is found in the pattern. */
+  BOOL has_set_som;
+  /* (*SKIP:arg) is found in the pattern. */
+  BOOL has_skip_arg;
+  /* (*THEN) is found in the pattern. */
+  BOOL has_then;
+  /* Needs to know the start position anytime. */
+  BOOL needs_start_ptr;
+  /* Currently in recurse or negative assert. */
+  BOOL local_exit;
+  /* Currently in a positive assert. */
+  BOOL positive_assert;
+  /* Newline control. */
+  int nltype;
+  pcre_uint32 nlmax;
+  pcre_uint32 nlmin;
+  int newline;
+  int bsr_nltype;
+  pcre_uint32 bsr_nlmax;
+  pcre_uint32 bsr_nlmin;
+  /* Dollar endonly. */
+  int endonly;
+  /* Tables. */
+  sljit_sw ctypes;
+  /* Named capturing brackets. */
+  pcre_uchar *name_table;
+  sljit_sw name_count;
+  sljit_sw name_entry_size;
+
+  /* Labels and jump lists. */
+  struct sljit_label *partialmatchlabel;
+  struct sljit_label *quit_label;
+  struct sljit_label *forced_quit_label;
+  struct sljit_label *accept_label;
+  struct sljit_label *ff_newline_shortcut;
+  stub_list *stubs;
+  label_addr_list *label_addrs;
+  recurse_entry *entries;
+  recurse_entry *currententry;
+  jump_list *partialmatch;
+  jump_list *quit;
+  jump_list *positive_assert_quit;
+  jump_list *forced_quit;
+  jump_list *accept;
+  jump_list *calllimit;
+  jump_list *stackalloc;
+  jump_list *revertframes;
+  jump_list *wordboundary;
+  jump_list *anynewline;
+  jump_list *hspace;
+  jump_list *vspace;
+  jump_list *casefulcmp;
+  jump_list *caselesscmp;
+  jump_list *reset_match;
+  BOOL jscript_compat;
+#ifdef SUPPORT_UTF
+  BOOL utf;
+#ifdef SUPPORT_UCP
+  BOOL use_ucp;
+#endif
+#ifdef COMPILE_PCRE8
+  jump_list *utfreadchar;
+  jump_list *utfreadchar16;
+  jump_list *utfreadtype8;
+#endif
+#endif /* SUPPORT_UTF */
+#ifdef SUPPORT_UCP
+  jump_list *getucd;
+#endif
+} compiler_common;
+
+/* For byte_sequence_compare. */
+
+typedef struct compare_context {
+  int length;
+  int sourcereg;
+#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
+  int ucharptr;
+  union {
+    sljit_si asint;
+    sljit_uh asushort;
+#if defined COMPILE_PCRE8
+    sljit_ub asbyte;
+    sljit_ub asuchars[4];
+#elif defined COMPILE_PCRE16
+    sljit_uh asuchars[2];
+#elif defined COMPILE_PCRE32
+    sljit_ui asuchars[1];
+#endif
+  } c;
+  union {
+    sljit_si asint;
+    sljit_uh asushort;
+#if defined COMPILE_PCRE8
+    sljit_ub asbyte;
+    sljit_ub asuchars[4];
+#elif defined COMPILE_PCRE16
+    sljit_uh asuchars[2];
+#elif defined COMPILE_PCRE32
+    sljit_ui asuchars[1];
+#endif
+  } oc;
+#endif
+} compare_context;
+
+/* Undefine sljit macros. */
+#undef CMP
+
+/* Used for accessing the elements of the stack. */
+#define STACK(i)      ((-(i) - 1) * (int)sizeof(sljit_sw))
+
+#define TMP1          SLJIT_SCRATCH_REG1
+#define TMP2          SLJIT_SCRATCH_REG3
+#define TMP3          SLJIT_TEMPORARY_EREG2
+#define STR_PTR       SLJIT_SAVED_REG1
+#define STR_END       SLJIT_SAVED_REG2
+#define STACK_TOP     SLJIT_SCRATCH_REG2
+#define STACK_LIMIT   SLJIT_SAVED_REG3
+#define ARGUMENTS     SLJIT_SAVED_EREG1
+#define COUNT_MATCH   SLJIT_SAVED_EREG2
+#define RETURN_ADDR   SLJIT_TEMPORARY_EREG1
+
+/* Local space layout. */
+/* These two locals can be used by the current opcode. */
+#define LOCALS0          (0 * sizeof(sljit_sw))
+#define LOCALS1          (1 * sizeof(sljit_sw))
+/* Two local variables for possessive quantifiers (char1 cannot use them). */
+#define POSSESSIVE0      (2 * sizeof(sljit_sw))
+#define POSSESSIVE1      (3 * sizeof(sljit_sw))
+/* Max limit of recursions. */
+#define LIMIT_MATCH      (4 * sizeof(sljit_sw))
+/* The output vector is stored on the stack, and contains pointers
+to characters. The vector data is divided into two groups: the first
+group contains the start / end character pointers, and the second is
+the start pointers when the end of the capturing group has not yet reached. */
+#define OVECTOR_START    (common->ovector_start)
+#define OVECTOR(i)       (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
+#define OVECTOR_PRIV(i)  (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
+#define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
+
+#if defined COMPILE_PCRE8
+#define MOV_UCHAR  SLJIT_MOV_UB
+#define MOVU_UCHAR SLJIT_MOVU_UB
+#elif defined COMPILE_PCRE16
+#define MOV_UCHAR  SLJIT_MOV_UH
+#define MOVU_UCHAR SLJIT_MOVU_UH
+#elif defined COMPILE_PCRE32
+#define MOV_UCHAR  SLJIT_MOV_UI
+#define MOVU_UCHAR SLJIT_MOVU_UI
+#else
+#error Unsupported compiling mode
+#endif
+
+/* Shortcuts. */
+#define DEFINE_COMPILER \
+  struct sljit_compiler *compiler = common->compiler
+#define OP1(op, dst, dstw, src, srcw) \
+  sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
+#define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
+  sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
+#define LABEL() \
+  sljit_emit_label(compiler)
+#define JUMP(type) \
+  sljit_emit_jump(compiler, (type))
+#define JUMPTO(type, label) \
+  sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
+#define JUMPHERE(jump) \
+  sljit_set_label((jump), sljit_emit_label(compiler))
+#define SET_LABEL(jump, label) \
+  sljit_set_label((jump), (label))
+#define CMP(type, src1, src1w, src2, src2w) \
+  sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
+#define CMPTO(type, src1, src1w, src2, src2w, label) \
+  sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
+#define OP_FLAGS(op, dst, dstw, src, srcw, type) \
+  sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
+#define GET_LOCAL_BASE(dst, dstw, offset) \
+  sljit_get_local_base(compiler, (dst), (dstw), (offset))
+
+#define READ_CHAR_MAX 0x7fffffff
+
+static pcre_uchar* bracketend(pcre_uchar* cc)
+{
+SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
+do cc += GET(cc, 1); while (*cc == OP_ALT);
+SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
+cc += 1 + LINK_SIZE;
+return cc;
+}
+
+static int no_alternatives(pcre_uchar* cc)
+{
+int count = 0;
+SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
+do
+  {
+  cc += GET(cc, 1);
+  count++;
+  }
+while (*cc == OP_ALT);
+SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
+return count;
+}
+
+static int ones_in_half_byte[16] = {
+  /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3,
+  /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4
+};
+
+/* Functions whose might need modification for all new supported opcodes:
+ next_opcode
+ check_opcode_types
+ set_private_data_ptrs
+ get_framesize
+ init_frame
+ get_private_data_copy_length
+ copy_private_data
+ compile_matchingpath
+ compile_backtrackingpath
+*/
+
+static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
+{
+SLJIT_UNUSED_ARG(common);
+switch(*cc)
+  {
+  case OP_SOD:
+  case OP_SOM:
+  case OP_SET_SOM:
+  case OP_NOT_WORD_BOUNDARY:
+  case OP_WORD_BOUNDARY:
+  case OP_NOT_DIGIT:
+  case OP_DIGIT:
+  case OP_NOT_WHITESPACE:
+  case OP_WHITESPACE:
+  case OP_NOT_WORDCHAR:
+  case OP_WORDCHAR:
+  case OP_ANY:
+  case OP_ALLANY:
+  case OP_NOTPROP:
+  case OP_PROP:
+  case OP_ANYNL:
+  case OP_NOT_HSPACE:
+  case OP_HSPACE:
+  case OP_NOT_VSPACE:
+  case OP_VSPACE:
+  case OP_EXTUNI:
+  case OP_EODN:
+  case OP_EOD:
+  case OP_CIRC:
+  case OP_CIRCM:
+  case OP_DOLL:
+  case OP_DOLLM:
+  case OP_CRSTAR:
+  case OP_CRMINSTAR:
+  case OP_CRPLUS:
+  case OP_CRMINPLUS:
+  case OP_CRQUERY:
+  case OP_CRMINQUERY:
+  case OP_CRRANGE:
+  case OP_CRMINRANGE:
+  case OP_CRPOSSTAR:
+  case OP_CRPOSPLUS:
+  case OP_CRPOSQUERY:
+  case OP_CRPOSRANGE:
+  case OP_CLASS:
+  case OP_NCLASS:
+  case OP_REF:
+  case OP_REFI:
+  case OP_DNREF:
+  case OP_DNREFI:
+  case OP_RECURSE:
+  case OP_CALLOUT:
+  case OP_ALT:
+  case OP_KET:
+  case OP_KETRMAX:
+  case OP_KETRMIN:
+  case OP_KETRPOS:
+  case OP_REVERSE:
+  case OP_ASSERT:
+  case OP_ASSERT_NOT:
+  case OP_ASSERTBACK:
+  case OP_ASSERTBACK_NOT:
+  case OP_ONCE:
+  case OP_ONCE_NC:
+  case OP_BRA:
+  case OP_BRAPOS:
+  case OP_CBRA:
+  case OP_CBRAPOS:
+  case OP_COND:
+  case OP_SBRA:
+  case OP_SBRAPOS:
+  case OP_SCBRA:
+  case OP_SCBRAPOS:
+  case OP_SCOND:
+  case OP_CREF:
+  case OP_DNCREF:
+  case OP_RREF:
+  case OP_DNRREF:
+  case OP_DEF:
+  case OP_BRAZERO:
+  case OP_BRAMINZERO:
+  case OP_BRAPOSZERO:
+  case OP_PRUNE:
+  case OP_SKIP:
+  case OP_THEN:
+  case OP_COMMIT:
+  case OP_FAIL:
+  case OP_ACCEPT:
+  case OP_ASSERT_ACCEPT:
+  case OP_CLOSE:
+  case OP_SKIPZERO:
+  return cc + PRIV(OP_lengths)[*cc];
+
+  case OP_CHAR:
+  case OP_CHARI:
+  case OP_NOT:
+  case OP_NOTI:
+  case OP_STAR:
+  case OP_MINSTAR:
+  case OP_PLUS:
+  case OP_MINPLUS:
+  case OP_QUERY:
+  case OP_MINQUERY:
+  case OP_UPTO:
+  case OP_MINUPTO:
+  case OP_EXACT:
+  case OP_POSSTAR:
+  case OP_POSPLUS:
+  case OP_POSQUERY:
+  case OP_POSUPTO:
+  case OP_STARI:
+  case OP_MINSTARI:
+  case OP_PLUSI:
+  case OP_MINPLUSI:
+  case OP_QUERYI:
+  case OP_MINQUERYI:
+  case OP_UPTOI:
+  case OP_MINUPTOI:
+  case OP_EXACTI:
+  case OP_POSSTARI:
+  case OP_POSPLUSI:
+  case OP_POSQUERYI:
+  case OP_POSUPTOI:
+  case OP_NOTSTAR:
+  case OP_NOTMINSTAR:
+  case OP_NOTPLUS:
+  case OP_NOTMINPLUS:
+  case OP_NOTQUERY:
+  case OP_NOTMINQUERY:
+  case OP_NOTUPTO:
+  case OP_NOTMINUPTO:
+  case OP_NOTEXACT:
+  case OP_NOTPOSSTAR:
+  case OP_NOTPOSPLUS:
+  case OP_NOTPOSQUERY:
+  case OP_NOTPOSUPTO:
+  case OP_NOTSTARI:
+  case OP_NOTMINSTARI:
+  case OP_NOTPLUSI:
+  case OP_NOTMINPLUSI:
+  case OP_NOTQUERYI:
+  case OP_NOTMINQUERYI:
+  case OP_NOTUPTOI:
+  case OP_NOTMINUPTOI:
+  case OP_NOTEXACTI:
+  case OP_NOTPOSSTARI:
+  case OP_NOTPOSPLUSI:
+  case OP_NOTPOSQUERYI:
+  case OP_NOTPOSUPTOI:
+  cc += PRIV(OP_lengths)[*cc];
+#ifdef SUPPORT_UTF
+  if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
+#endif
+  return cc;
+
+  /* Special cases. */
+  case OP_TYPESTAR:
+  case OP_TYPEMINSTAR:
+  case OP_TYPEPLUS:
+  case OP_TYPEMINPLUS:
+  case OP_TYPEQUERY:
+  case OP_TYPEMINQUERY:
+  case OP_TYPEUPTO:
+  case OP_TYPEMINUPTO:
+  case OP_TYPEEXACT:
+  case OP_TYPEPOSSTAR:
+  case OP_TYPEPOSPLUS:
+  case OP_TYPEPOSQUERY:
+  case OP_TYPEPOSUPTO:
+  return cc + PRIV(OP_lengths)[*cc] - 1;
+
+  case OP_ANYBYTE:
+#ifdef SUPPORT_UTF
+  if (common->utf) return NULL;
+#endif
+  return cc + 1;
+
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+  case OP_XCLASS:
+  return cc + GET(cc, 1);
+#endif
+
+  case OP_MARK:
+  case OP_PRUNE_ARG:
+  case OP_SKIP_ARG:
+  case OP_THEN_ARG:
+  return cc + 1 + 2 + cc[1];
+
+  default:
+  /* All opcodes are supported now! */
+  SLJIT_ASSERT_STOP();
+  return NULL;
+  }
+}
+
+static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
+{
+int count;
+pcre_uchar *slot;
+
+/* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
+while (cc < ccend)
+  {
+  switch(*cc)
+    {
+    case OP_SET_SOM:
+    common->has_set_som = TRUE;
+    common->might_be_empty = TRUE;
+    cc += 1;
+    break;
+
+    case OP_REF:
+    case OP_REFI:
+    common->optimized_cbracket[GET2(cc, 1)] = 0;
+    cc += 1 + IMM2_SIZE;
+    break;
+
+    case OP_BRA:
+    case OP_CBRA:
+    case OP_SBRA:
+    case OP_SCBRA:
+    count = no_alternatives(cc);
+    if (count > 4)
+      common->read_only_data_size += count * sizeof(sljit_uw);
+    cc += 1 + LINK_SIZE + (*cc == OP_CBRA || *cc == OP_SCBRA ? IMM2_SIZE : 0);
+    break;
+
+    case OP_CBRAPOS:
+    case OP_SCBRAPOS:
+    common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
+    cc += 1 + LINK_SIZE + IMM2_SIZE;
+    break;
+
+    case OP_COND:
+    case OP_SCOND:
+    /* Only AUTO_CALLOUT can insert this opcode. We do
+       not intend to support this case. */
+    if (cc[1 + LINK_SIZE] == OP_CALLOUT)
+      return FALSE;
+    cc += 1 + LINK_SIZE;
+    break;
+
+    case OP_CREF:
+    common->optimized_cbracket[GET2(cc, 1)] = 0;
+    cc += 1 + IMM2_SIZE;
+    break;
+
+    case OP_DNREF:
+    case OP_DNREFI:
+    case OP_DNCREF:
+    count = GET2(cc, 1 + IMM2_SIZE);
+    slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
+    while (count-- > 0)
+      {
+      common->optimized_cbracket[GET2(slot, 0)] = 0;
+      slot += common->name_entry_size;
+      }
+    cc += 1 + 2 * IMM2_SIZE;
+    break;
+
+    case OP_RECURSE:
+    /* Set its value only once. */
+    if (common->recursive_head_ptr == 0)
+      {
+      common->recursive_head_ptr = common->ovector_start;
+      common->ovector_start += sizeof(sljit_sw);
+      }
+    cc += 1 + LINK_SIZE;
+    break;
+
+    case OP_CALLOUT:
+    if (common->capture_last_ptr == 0)
+      {
+      common->capture_last_ptr = common->ovector_start;
+      common->ovector_start += sizeof(sljit_sw);
+      }
+    cc += 2 + 2 * LINK_SIZE;
+    break;
+
+    case OP_THEN_ARG:
+    common->has_then = TRUE;
+    common->control_head_ptr = 1;
+    /* Fall through. */
+
+    case OP_PRUNE_ARG:
+    common->needs_start_ptr = TRUE;
+    /* Fall through. */
+
+    case OP_MARK:
+    if (common->mark_ptr == 0)
+      {
+      common->mark_ptr = common->ovector_start;
+      common->ovector_start += sizeof(sljit_sw);
+      }
+    cc += 1 + 2 + cc[1];
+    break;
+
+    case OP_THEN:
+    common->has_then = TRUE;
+    common->control_head_ptr = 1;
+    /* Fall through. */
+
+    case OP_PRUNE:
+    case OP_SKIP:
+    common->needs_start_ptr = TRUE;
+    cc += 1;
+    break;
+
+    case OP_SKIP_ARG:
+    common->control_head_ptr = 1;
+    common->has_skip_arg = TRUE;
+    cc += 1 + 2 + cc[1];
+    break;
+
+    default:
+    cc = next_opcode(common, cc);
+    if (cc == NULL)
+      return FALSE;
+    break;
+    }
+  }
+return TRUE;
+}
+
+static int get_class_iterator_size(pcre_uchar *cc)
+{
+switch(*cc)
+  {
+  case OP_CRSTAR:
+  case OP_CRPLUS:
+  return 2;
+
+  case OP_CRMINSTAR:
+  case OP_CRMINPLUS:
+  case OP_CRQUERY:
+  case OP_CRMINQUERY:
+  return 1;
+
+  case OP_CRRANGE:
+  case OP_CRMINRANGE:
+  if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
+    return 0;
+  return 2;
+
+  default:
+  return 0;
+  }
+}
+
+static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
+{
+pcre_uchar *end = bracketend(begin);
+pcre_uchar *next;
+pcre_uchar *next_end;
+pcre_uchar *max_end;
+pcre_uchar type;
+sljit_sw length = end - begin;
+int min, max, i;
+
+/* Detect fixed iterations first. */
+if (end[-(1 + LINK_SIZE)] != OP_KET)
+  return FALSE;
+
+/* Already detected repeat. */
+if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
+  return TRUE;
+
+next = end;
+min = 1;
+while (1)
+  {
+  if (*next != *begin)
+    break;
+  next_end = bracketend(next);
+  if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
+    break;
+  next = next_end;
+  min++;
+  }
+
+if (min == 2)
+  return FALSE;
+
+max = 0;
+max_end = next;
+if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
+  {
+  type = *next;
+  while (1)
+    {
+    if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
+      break;
+    next_end = bracketend(next + 2 + LINK_SIZE);
+    if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
+      break;
+    next = next_end;
+    max++;
+    }
+
+  if (next[0] == type && next[1] == *begin && max >= 1)
+    {
+    next_end = bracketend(next + 1);
+    if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
+      {
+      for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
+        if (*next_end != OP_KET)
+          break;
+
+      if (i == max)
+        {
+        common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
+        common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
+        /* +2 the original and the last. */
+        common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
+        if (min == 1)
+          return TRUE;
+        min--;
+        max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
+        }
+      }
+    }
+  }
+
+if (min >= 3)
+  {
+  common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
+  common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
+  common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
+  return TRUE;
+  }
+
+return FALSE;
+}
+
+#define CASE_ITERATOR_PRIVATE_DATA_1 \
+    case OP_MINSTAR: \
+    case OP_MINPLUS: \
+    case OP_QUERY: \
+    case OP_MINQUERY: \
+    case OP_MINSTARI: \
+    case OP_MINPLUSI: \
+    case OP_QUERYI: \
+    case OP_MINQUERYI: \
+    case OP_NOTMINSTAR: \
+    case OP_NOTMINPLUS: \
+    case OP_NOTQUERY: \
+    case OP_NOTMINQUERY: \
+    case OP_NOTMINSTARI: \
+    case OP_NOTMINPLUSI: \
+    case OP_NOTQUERYI: \
+    case OP_NOTMINQUERYI:
+
+#define CASE_ITERATOR_PRIVATE_DATA_2A \
+    case OP_STAR: \
+    case OP_PLUS: \
+    case OP_STARI: \
+    case OP_PLUSI: \
+    case OP_NOTSTAR: \
+    case OP_NOTPLUS: \
+    case OP_NOTSTARI: \
+    case OP_NOTPLUSI:
+
+#define CASE_ITERATOR_PRIVATE_DATA_2B \
+    case OP_UPTO: \
+    case OP_MINUPTO: \
+    case OP_UPTOI: \
+    case OP_MINUPTOI: \
+    case OP_NOTUPTO: \
+    case OP_NOTMINUPTO: \
+    case OP_NOTUPTOI: \
+    case OP_NOTMINUPTOI:
+
+#define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
+    case OP_TYPEMINSTAR: \
+    case OP_TYPEMINPLUS: \
+    case OP_TYPEQUERY: \
+    case OP_TYPEMINQUERY:
+
+#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
+    case OP_TYPESTAR: \
+    case OP_TYPEPLUS:
+
+#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
+    case OP_TYPEUPTO: \
+    case OP_TYPEMINUPTO:
+
+static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
+{
+pcre_uchar *cc = common->start;
+pcre_uchar *alternative;
+pcre_uchar *end = NULL;
+int private_data_ptr = *private_data_start;
+int space, size, bracketlen;
+
+while (cc < ccend)
+  {
+  space = 0;
+  size = 0;
+  bracketlen = 0;
+  if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
+    return;
+
+  if (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)
+    if (detect_repeat(common, cc))
+      {
+      /* These brackets are converted to repeats, so no global
+      based single character repeat is allowed. */
+      if (cc >= end)
+        end = bracketend(cc);
+      }
+
+  switch(*cc)
+    {
+    case OP_KET:
+    if (common->private_data_ptrs[cc + 1 - common->start] != 0)
+      {
+      common->private_data_ptrs[cc - common->start] = private_data_ptr;
+      private_data_ptr += sizeof(sljit_sw);
+      cc += common->private_data_ptrs[cc + 1 - common->start];
+      }
+    cc += 1 + LINK_SIZE;
+    break;
+
+    case OP_ASSERT:
+    case OP_ASSERT_NOT:
+    case OP_ASSERTBACK:
+    case OP_ASSERTBACK_NOT:
+    case OP_ONCE:
+    case OP_ONCE_NC:
+    case OP_BRAPOS:
+    case OP_SBRA:
+    case OP_SBRAPOS:
+    case OP_SCOND:
+    common->private_data_ptrs[cc - common->start] = private_data_ptr;
+    private_data_ptr += sizeof(sljit_sw);
+    bracketlen = 1 + LINK_SIZE;
+    break;
+
+    case OP_CBRAPOS:
+    case OP_SCBRAPOS:
+    common->private_data_ptrs[cc - common->start] = private_data_ptr;
+    private_data_ptr += sizeof(sljit_sw);
+    bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
+    break;
+
+    case OP_COND:
+    /* Might be a hidden SCOND. */
+    alternative = cc + GET(cc, 1);
+    if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
+      {
+      common->private_data_ptrs[cc - common->start] = private_data_ptr;
+      private_data_ptr += sizeof(sljit_sw);
+      }
+    bracketlen = 1 + LINK_SIZE;
+    break;
+
+    case OP_BRA:
+    bracketlen = 1 + LINK_SIZE;
+    break;
+
+    case OP_CBRA:
+    case OP_SCBRA:
+    bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
+    break;
+
+    CASE_ITERATOR_PRIVATE_DATA_1
+    space = 1;
+    size = -2;
+    break;
+
+    CASE_ITERATOR_PRIVATE_DATA_2A
+    space = 2;
+    size = -2;
+    break;
+
+    CASE_ITERATOR_PRIVATE_DATA_2B
+    space = 2;
+    size = -(2 + IMM2_SIZE);
+    break;
+
+    CASE_ITERATOR_TYPE_PRIVATE_DATA_1
+    space = 1;
+    size = 1;
+    break;
+
+    CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
+    if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
+      space = 2;
+    size = 1;
+    break;
+
+    CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
+    if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
+      space = 2;
+    size = 1 + IMM2_SIZE;
+    break;
+
+    case OP_CLASS:
+    case OP_NCLASS:
+    size += 1 + 32 / sizeof(pcre_uchar);
+    space = get_class_iterator_size(cc + size);
+    break;
+
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+    case OP_XCLASS:
+    size = GET(cc, 1);
+    space = get_class_iterator_size(cc + size);
+    break;
+#endif
+
+    default:
+    cc = next_opcode(common, cc);
+    SLJIT_ASSERT(cc != NULL);
+    break;
+    }
+
+  /* Character iterators, which are not inside a repeated bracket,
+     gets a private slot instead of allocating it on the stack. */
+  if (space > 0 && cc >= end)
+    {
+    common->private_data_ptrs[cc - common->start] = private_data_ptr;
+    private_data_ptr += sizeof(sljit_sw) * space;
+    }
+
+  if (size != 0)
+    {
+    if (size < 0)
+      {
+      cc += -size;
+#ifdef SUPPORT_UTF
+      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
+#endif
+      }
+    else
+      cc += size;
+    }
+
+  if (bracketlen > 0)
+    {
+    if (cc >= end)
+      {
+      end = bracketend(cc);
+      if (end[-1 - LINK_SIZE] == OP_KET)
+        end = NULL;
+      }
+    cc += bracketlen;
+    }
+  }
+*private_data_start = private_data_ptr;
+}
+
+/* Returns with a frame_types (always < 0) if no need for frame. */
+static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL* needs_control_head)
+{
+int length = 0;
+int possessive = 0;
+BOOL stack_restore = FALSE;
+BOOL setsom_found = recursive;
+BOOL setmark_found = recursive;
+/* The last capture is a local variable even for recursions. */
+BOOL capture_last_found = FALSE;
+
+#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
+SLJIT_ASSERT(common->control_head_ptr != 0);
+*needs_control_head = TRUE;
+#else
+*needs_control_head = FALSE;
+#endif
+
+if (ccend == NULL)
+  {
+  ccend = bracketend(cc) - (1 + LINK_SIZE);
+  if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
+    {
+    possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
+    /* This is correct regardless of common->capture_last_ptr. */
+    capture_last_found = TRUE;
+    }
+  cc = next_opcode(common, cc);
+  }
+
+SLJIT_ASSERT(cc != NULL);
+while (cc < ccend)
+  switch(*cc)
+    {
+    case OP_SET_SOM:
+    SLJIT_ASSERT(common->has_set_som);
+    stack_restore = TRUE;
+    if (!setsom_found)
+      {
+      length += 2;
+      setsom_found = TRUE;
+      }
+    cc += 1;
+    break;
+
+    case OP_MARK:
+    case OP_PRUNE_ARG:
+    case OP_THEN_ARG:
+    SLJIT_ASSERT(common->mark_ptr != 0);
+    stack_restore = TRUE;
+    if (!setmark_found)
+      {
+      length += 2;
+      setmark_found = TRUE;
+      }
+    if (common->control_head_ptr != 0)
+      *needs_control_head = TRUE;
+    cc += 1 + 2 + cc[1];
+    break;
+
+    case OP_RECURSE:
+    stack_restore = TRUE;
+    if (common->has_set_som && !setsom_found)
+      {
+      length += 2;
+      setsom_found = TRUE;
+      }
+    if (common->mark_ptr != 0 && !setmark_found)
+      {
+      length += 2;
+      setmark_found = TRUE;
+      }
+    if (common->capture_last_ptr != 0 && !capture_last_found)
+      {
+      length += 2;
+      capture_last_found = TRUE;
+      }
+    cc += 1 + LINK_SIZE;
+    break;
+
+    case OP_CBRA:
+    case OP_CBRAPOS:
+    case OP_SCBRA:
+    case OP_SCBRAPOS:
+    stack_restore = TRUE;
+    if (common->capture_last_ptr != 0 && !capture_last_found)
+      {
+      length += 2;
+      capture_last_found = TRUE;
+      }
+    length += 3;
+    cc += 1 + LINK_SIZE + IMM2_SIZE;
+    break;
+
+    default:
+    stack_restore = TRUE;
+    /* Fall through. */
+
+    case OP_NOT_WORD_BOUNDARY:
+    case OP_WORD_BOUNDARY:
+    case OP_NOT_DIGIT:
+    case OP_DIGIT:
+    case OP_NOT_WHITESPACE:
+    case OP_WHITESPACE:
+    case OP_NOT_WORDCHAR:
+    case OP_WORDCHAR:
+    case OP_ANY:
+    case OP_ALLANY:
+    case OP_ANYBYTE:
+    case OP_NOTPROP:
+    case OP_PROP:
+    case OP_ANYNL:
+    case OP_NOT_HSPACE:
+    case OP_HSPACE:
+    case OP_NOT_VSPACE:
+    case OP_VSPACE:
+    case OP_EXTUNI:
+    case OP_EODN:
+    case OP_EOD:
+    case OP_CIRC:
+    case OP_CIRCM:
+    case OP_DOLL:
+    case OP_DOLLM:
+    case OP_CHAR:
+    case OP_CHARI:
+    case OP_NOT:
+    case OP_NOTI:
+
+    case OP_EXACT:
+    case OP_POSSTAR:
+    case OP_POSPLUS:
+    case OP_POSQUERY:
+    case OP_POSUPTO:
+
+    case OP_EXACTI:
+    case OP_POSSTARI:
+    case OP_POSPLUSI:
+    case OP_POSQUERYI:
+    case OP_POSUPTOI:
+
+    case OP_NOTEXACT:
+    case OP_NOTPOSSTAR:
+    case OP_NOTPOSPLUS:
+    case OP_NOTPOSQUERY:
+    case OP_NOTPOSUPTO:
+
+    case OP_NOTEXACTI:
+    case OP_NOTPOSSTARI:
+    case OP_NOTPOSPLUSI:
+    case OP_NOTPOSQUERYI:
+    case OP_NOTPOSUPTOI:
+
+    case OP_TYPEEXACT:
+    case OP_TYPEPOSSTAR:
+    case OP_TYPEPOSPLUS:
+    case OP_TYPEPOSQUERY:
+    case OP_TYPEPOSUPTO:
+
+    case OP_CLASS:
+    case OP_NCLASS:
+    case OP_XCLASS:
+
+    cc = next_opcode(common, cc);
+    SLJIT_ASSERT(cc != NULL);
+    break;
+    }
+
+/* Possessive quantifiers can use a special case. */
+if (SLJIT_UNLIKELY(possessive == length))
+  return stack_restore ? no_frame : no_stack;
+
+if (length > 0)
+  return length + 1;
+return stack_restore ? no_frame : no_stack;
+}
+
+static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
+{
+DEFINE_COMPILER;
+BOOL setsom_found = recursive;
+BOOL setmark_found = recursive;
+/* The last capture is a local variable even for recursions. */
+BOOL capture_last_found = FALSE;
+int offset;
+
+/* >= 1 + shortest item size (2) */
+SLJIT_UNUSED_ARG(stacktop);
+SLJIT_ASSERT(stackpos >= stacktop + 2);
+
+stackpos = STACK(stackpos);
+if (ccend == NULL)
+  {
+  ccend = bracketend(cc) - (1 + LINK_SIZE);
+  if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
+    cc = next_opcode(common, cc);
+  }
+
+SLJIT_ASSERT(cc != NULL);
+while (cc < ccend)
+  switch(*cc)
+    {
+    case OP_SET_SOM:
+    SLJIT_ASSERT(common->has_set_som);
+    if (!setsom_found)
+      {
+      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
+      stackpos += (int)sizeof(sljit_sw);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
+      stackpos += (int)sizeof(sljit_sw);
+      setsom_found = TRUE;
+      }
+    cc += 1;
+    break;
+
+    case OP_MARK:
+    case OP_PRUNE_ARG:
+    case OP_THEN_ARG:
+    SLJIT_ASSERT(common->mark_ptr != 0);
+    if (!setmark_found)
+      {
+      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
+      stackpos += (int)sizeof(sljit_sw);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
+      stackpos += (int)sizeof(sljit_sw);
+      setmark_found = TRUE;
+      }
+    cc += 1 + 2 + cc[1];
+    break;
+
+    case OP_RECURSE:
+    if (common->has_set_som && !setsom_found)
+      {
+      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
+      stackpos += (int)sizeof(sljit_sw);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
+      stackpos += (int)sizeof(sljit_sw);
+      setsom_found = TRUE;
+      }
+    if (common->mark_ptr != 0 && !setmark_found)
+      {
+      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
+      stackpos += (int)sizeof(sljit_sw);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
+      stackpos += (int)sizeof(sljit_sw);
+      setmark_found = TRUE;
+      }
+    if (common->capture_last_ptr != 0 && !capture_last_found)
+      {
+      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
+      stackpos += (int)sizeof(sljit_sw);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
+      stackpos += (int)sizeof(sljit_sw);
+      capture_last_found = TRUE;
+      }
+    cc += 1 + LINK_SIZE;
+    break;
+
+    case OP_CBRA:
+    case OP_CBRAPOS:
+    case OP_SCBRA:
+    case OP_SCBRAPOS:
+    if (common->capture_last_ptr != 0 && !capture_last_found)
+      {
+      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
+      stackpos += (int)sizeof(sljit_sw);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
+      stackpos += (int)sizeof(sljit_sw);
+      capture_last_found = TRUE;
+      }
+    offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
+    stackpos += (int)sizeof(sljit_sw);
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
+    stackpos += (int)sizeof(sljit_sw);
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
+    stackpos += (int)sizeof(sljit_sw);
+
+    cc += 1 + LINK_SIZE + IMM2_SIZE;
+    break;
+
+    default:
+    cc = next_opcode(common, cc);
+    SLJIT_ASSERT(cc != NULL);
+    break;
+    }
+
+OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
+SLJIT_ASSERT(stackpos == STACK(stacktop));
+}
+
+static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
+{
+int private_data_length = needs_control_head ? 3 : 2;
+int size;
+pcre_uchar *alternative;
+/* Calculate the sum of the private machine words. */
+while (cc < ccend)
+  {
+  size = 0;
+  switch(*cc)
+    {
+    case OP_KET:
+    if (PRIVATE_DATA(cc) != 0)
+      private_data_length++;
+    cc += 1 + LINK_SIZE;
+    break;
+
+    case OP_ASSERT:
+    case OP_ASSERT_NOT:
+    case OP_ASSERTBACK:
+    case OP_ASSERTBACK_NOT:
+    case OP_ONCE:
+    case OP_ONCE_NC:
+    case OP_BRAPOS:
+    case OP_SBRA:
+    case OP_SBRAPOS:
+    case OP_SCOND:
+    private_data_length++;
+    cc += 1 + LINK_SIZE;
+    break;
+
+    case OP_CBRA:
+    case OP_SCBRA:
+    if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
+      private_data_length++;
+    cc += 1 + LINK_SIZE + IMM2_SIZE;
+    break;
+
+    case OP_CBRAPOS:
+    case OP_SCBRAPOS:
+    private_data_length += 2;
+    cc += 1 + LINK_SIZE + IMM2_SIZE;
+    break;
+
+    case OP_COND:
+    /* Might be a hidden SCOND. */
+    alternative = cc + GET(cc, 1);
+    if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
+      private_data_length++;
+    cc += 1 + LINK_SIZE;
+    break;
+
+    CASE_ITERATOR_PRIVATE_DATA_1
+    if (PRIVATE_DATA(cc))
+      private_data_length++;
+    cc += 2;
+#ifdef SUPPORT_UTF
+    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
+#endif
+    break;
+
+    CASE_ITERATOR_PRIVATE_DATA_2A
+    if (PRIVATE_DATA(cc))
+      private_data_length += 2;
+    cc += 2;
+#ifdef SUPPORT_UTF
+    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
+#endif
+    break;
+
+    CASE_ITERATOR_PRIVATE_DATA_2B
+    if (PRIVATE_DATA(cc))
+      private_data_length += 2;
+    cc += 2 + IMM2_SIZE;
+#ifdef SUPPORT_UTF
+    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
+#endif
+    break;
+
+    CASE_ITERATOR_TYPE_PRIVATE_DATA_1
+    if (PRIVATE_DATA(cc))
+      private_data_length++;
+    cc += 1;
+    break;
+
+    CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
+    if (PRIVATE_DATA(cc))
+      private_data_length += 2;
+    cc += 1;
+    break;
+
+    CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
+    if (PRIVATE_DATA(cc))
+      private_data_length += 2;
+    cc += 1 + IMM2_SIZE;
+    break;
+
+    case OP_CLASS:
+    case OP_NCLASS:
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+    case OP_XCLASS:
+    size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
+#else
+    size = 1 + 32 / (int)sizeof(pcre_uchar);
+#endif
+    if (PRIVATE_DATA(cc))
+      private_data_length += get_class_iterator_size(cc + size);
+    cc += size;
+    break;
+
+    default:
+    cc = next_opcode(common, cc);
+    SLJIT_ASSERT(cc != NULL);
+    break;
+    }
+  }
+SLJIT_ASSERT(cc == ccend);
+return private_data_length;
+}
+
+static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
+  BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
+{
+DEFINE_COMPILER;
+int srcw[2];
+int count, size;
+BOOL tmp1next = TRUE;
+BOOL tmp1empty = TRUE;
+BOOL tmp2empty = TRUE;
+pcre_uchar *alternative;
+enum {
+  start,
+  loop,
+  end
+} status;
+
+status = save ? start : loop;
+stackptr = STACK(stackptr - 2);
+stacktop = STACK(stacktop - 1);
+
+if (!save)
+  {
+  stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
+  if (stackptr < stacktop)
+    {
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
+    stackptr += sizeof(sljit_sw);
+    tmp1empty = FALSE;
+    }
+  if (stackptr < stacktop)
+    {
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
+    stackptr += sizeof(sljit_sw);
+    tmp2empty = FALSE;
+    }
+  /* The tmp1next must be TRUE in either way. */
+  }
+
+do
+  {
+  count = 0;
+  switch(status)
+    {
+    case start:
+    SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
+    count = 1;
+    srcw[0] = common->recursive_head_ptr;
+    if (needs_control_head)
+      {
+      SLJIT_ASSERT(common->control_head_ptr != 0);
+      count = 2;
+      srcw[1] = common->control_head_ptr;
+      }
+    status = loop;
+    break;
+
+    case loop:
+    if (cc >= ccend)
+      {
+      status = end;
+      break;
+      }
+
+    switch(*cc)
+      {
+      case OP_KET:
+      if (PRIVATE_DATA(cc) != 0)
+        {
+        count = 1;
+        srcw[0] = PRIVATE_DATA(cc);
+        }
+      cc += 1 + LINK_SIZE;
+      break;
+
+      case OP_ASSERT:
+      case OP_ASSERT_NOT:
+      case OP_ASSERTBACK:
+      case OP_ASSERTBACK_NOT:
+      case OP_ONCE:
+      case OP_ONCE_NC:
+      case OP_BRAPOS:
+      case OP_SBRA:
+      case OP_SBRAPOS:
+      case OP_SCOND:
+      count = 1;
+      srcw[0] = PRIVATE_DATA(cc);
+      SLJIT_ASSERT(srcw[0] != 0);
+      cc += 1 + LINK_SIZE;
+      break;
+
+      case OP_CBRA:
+      case OP_SCBRA:
+      if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
+        {
+        count = 1;
+        srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
+        }
+      cc += 1 + LINK_SIZE + IMM2_SIZE;
+      break;
+
+      case OP_CBRAPOS:
+      case OP_SCBRAPOS:
+      count = 2;
+      srcw[0] = PRIVATE_DATA(cc);
+      srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
+      SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
+      cc += 1 + LINK_SIZE + IMM2_SIZE;
+      break;
+
+      case OP_COND:
+      /* Might be a hidden SCOND. */
+      alternative = cc + GET(cc, 1);
+      if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
+        {
+        count = 1;
+        srcw[0] = PRIVATE_DATA(cc);
+        SLJIT_ASSERT(srcw[0] != 0);
+        }
+      cc += 1 + LINK_SIZE;
+      break;
+
+      CASE_ITERATOR_PRIVATE_DATA_1
+      if (PRIVATE_DATA(cc))
+        {
+        count = 1;
+        srcw[0] = PRIVATE_DATA(cc);
+        }
+      cc += 2;
+#ifdef SUPPORT_UTF
+      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
+#endif
+      break;
+
+      CASE_ITERATOR_PRIVATE_DATA_2A
+      if (PRIVATE_DATA(cc))
+        {
+        count = 2;
+        srcw[0] = PRIVATE_DATA(cc);
+        srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
+        }
+      cc += 2;
+#ifdef SUPPORT_UTF
+      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
+#endif
+      break;
+
+      CASE_ITERATOR_PRIVATE_DATA_2B
+      if (PRIVATE_DATA(cc))
+        {
+        count = 2;
+        srcw[0] = PRIVATE_DATA(cc);
+        srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
+        }
+      cc += 2 + IMM2_SIZE;
+#ifdef SUPPORT_UTF
+      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
+#endif
+      break;
+
+      CASE_ITERATOR_TYPE_PRIVATE_DATA_1
+      if (PRIVATE_DATA(cc))
+        {
+        count = 1;
+        srcw[0] = PRIVATE_DATA(cc);
+        }
+      cc += 1;
+      break;
+
+      CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
+      if (PRIVATE_DATA(cc))
+        {
+        count = 2;
+        srcw[0] = PRIVATE_DATA(cc);
+        srcw[1] = srcw[0] + sizeof(sljit_sw);
+        }
+      cc += 1;
+      break;
+
+      CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
+      if (PRIVATE_DATA(cc))
+        {
+        count = 2;
+        srcw[0] = PRIVATE_DATA(cc);
+        srcw[1] = srcw[0] + sizeof(sljit_sw);
+        }
+      cc += 1 + IMM2_SIZE;
+      break;
+
+      case OP_CLASS:
+      case OP_NCLASS:
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+      case OP_XCLASS:
+      size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
+#else
+      size = 1 + 32 / (int)sizeof(pcre_uchar);
+#endif
+      if (PRIVATE_DATA(cc))
+        switch(get_class_iterator_size(cc + size))
+          {
+          case 1:
+          count = 1;
+          srcw[0] = PRIVATE_DATA(cc);
+          break;
+
+          case 2:
+          count = 2;
+          srcw[0] = PRIVATE_DATA(cc);
+          srcw[1] = srcw[0] + sizeof(sljit_sw);
+          break;
+
+          default:
+          SLJIT_ASSERT_STOP();
+          break;
+          }
+      cc += size;
+      break;
+
+      default:
+      cc = next_opcode(common, cc);
+      SLJIT_ASSERT(cc != NULL);
+      break;
+      }
+    break;
+
+    case end:
+    SLJIT_ASSERT_STOP();
+    break;
+    }
+
+  while (count > 0)
+    {
+    count--;
+    if (save)
+      {
+      if (tmp1next)
+        {
+        if (!tmp1empty)
+          {
+          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
+          stackptr += sizeof(sljit_sw);
+          }
+        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
+        tmp1empty = FALSE;
+        tmp1next = FALSE;
+        }
+      else
+        {
+        if (!tmp2empty)
+          {
+          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
+          stackptr += sizeof(sljit_sw);
+          }
+        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
+        tmp2empty = FALSE;
+        tmp1next = TRUE;
+        }
+      }
+    else
+      {
+      if (tmp1next)
+        {
+        SLJIT_ASSERT(!tmp1empty);
+        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
+        tmp1empty = stackptr >= stacktop;
+        if (!tmp1empty)
+          {
+          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
+          stackptr += sizeof(sljit_sw);
+          }
+        tmp1next = FALSE;
+        }
+      else
+        {
+        SLJIT_ASSERT(!tmp2empty);
+        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
+        tmp2empty = stackptr >= stacktop;
+        if (!tmp2empty)
+          {
+          OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
+          stackptr += sizeof(sljit_sw);
+          }
+        tmp1next = TRUE;
+        }
+      }
+    }
+  }
+while (status != end);
+
+if (save)
+  {
+  if (tmp1next)
+    {
+    if (!tmp1empty)
+      {
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
+      stackptr += sizeof(sljit_sw);
+      }
+    if (!tmp2empty)
+      {
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
+      stackptr += sizeof(sljit_sw);
+      }
+    }
+  else
+    {
+    if (!tmp2empty)
+      {
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
+      stackptr += sizeof(sljit_sw);
+      }
+    if (!tmp1empty)
+      {
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
+      stackptr += sizeof(sljit_sw);
+      }
+    }
+  }
+SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
+}
+
+static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
+{
+pcre_uchar *end = bracketend(cc);
+BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
+
+/* Assert captures then. */
+if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
+  current_offset = NULL;
+/* Conditional block does not. */
+if (*cc == OP_COND || *cc == OP_SCOND)
+  has_alternatives = FALSE;
+
+cc = next_opcode(common, cc);
+if (has_alternatives)
+  current_offset = common->then_offsets + (cc - common->start);
+
+while (cc < end)
+  {
+  if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
+    cc = set_then_offsets(common, cc, current_offset);
+  else
+    {
+    if (*cc == OP_ALT && has_alternatives)
+      current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
+    if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
+      *current_offset = 1;
+    cc = next_opcode(common, cc);
+    }
+  }
+
+return end;
+}
+
+#undef CASE_ITERATOR_PRIVATE_DATA_1
+#undef CASE_ITERATOR_PRIVATE_DATA_2A
+#undef CASE_ITERATOR_PRIVATE_DATA_2B
+#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
+#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
+#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
+
+static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
+{
+return (value & (value - 1)) == 0;
+}
+
+static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
+{
+while (list)
+  {
+  /* sljit_set_label is clever enough to do nothing
+  if either the jump or the label is NULL. */
+  SET_LABEL(list->jump, label);
+  list = list->next;
+  }
+}
+
+static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
+{
+jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
+if (list_item)
+  {
+  list_item->next = *list;
+  list_item->jump = jump;
+  *list = list_item;
+  }
+}
+
+static void add_stub(compiler_common *common, struct sljit_jump *start)
+{
+DEFINE_COMPILER;
+stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
+
+if (list_item)
+  {
+  list_item->start = start;
+  list_item->quit = LABEL();
+  list_item->next = common->stubs;
+  common->stubs = list_item;
+  }
+}
+
+static void flush_stubs(compiler_common *common)
+{
+DEFINE_COMPILER;
+stub_list* list_item = common->stubs;
+
+while (list_item)
+  {
+  JUMPHERE(list_item->start);
+  add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
+  JUMPTO(SLJIT_JUMP, list_item->quit);
+  list_item = list_item->next;
+  }
+common->stubs = NULL;
+}
+
+static void add_label_addr(compiler_common *common)
+{
+DEFINE_COMPILER;
+label_addr_list *label_addr;
+
+label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
+if (label_addr == NULL)
+  return;
+label_addr->label = LABEL();
+label_addr->addr = common->read_only_data_ptr;
+label_addr->next = common->label_addrs;
+common->label_addrs = label_addr;
+common->read_only_data_ptr++;
+}
+
+static SLJIT_INLINE void count_match(compiler_common *common)
+{
+DEFINE_COMPILER;
+
+OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
+add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
+}
+
+static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
+{
+/* May destroy all locals and registers except TMP2. */
+DEFINE_COMPILER;
+
+OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
+#ifdef DESTROY_REGISTERS
+OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
+OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
+OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
+#endif
+add_stub(common, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
+}
+
+static SLJIT_INLINE void free_stack(compiler_common *common, int size)
+{
+DEFINE_COMPILER;
+OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
+}
+
+static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
+{
+DEFINE_COMPILER;
+struct sljit_label *loop;
+int i;
+
+/* At this point we can freely use all temporary registers. */
+SLJIT_ASSERT(length > 1);
+/* TMP1 returns with begin - 1. */
+OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
+if (length < 8)
+  {
+  for (i = 1; i < length; i++)
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);
+  }
+else
+  {
+  GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START);
+  OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length - 1);
+  loop = LABEL();
+  OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);
+  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);
+  JUMPTO(SLJIT_C_NOT_ZERO, loop);
+  }
+}
+
+static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
+{
+DEFINE_COMPILER;
+struct sljit_label *loop;
+int i;
+
+SLJIT_ASSERT(length > 1);
+/* OVECTOR(1) contains the "string begin - 1" constant. */
+if (length > 2)
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
+if (length < 8)
+  {
+  for (i = 2; i < length; i++)
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), TMP1, 0);
+  }
+else
+  {
+  GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
+  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
+  loop = LABEL();
+  OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
+  OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
+  JUMPTO(SLJIT_C_NOT_ZERO, loop);
+  }
+
+OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
+if (common->mark_ptr != 0)
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
+if (common->control_head_ptr != 0)
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
+OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
+OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr);
+OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
+}
+
+static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
+{
+while (current != NULL)
+  {
+  switch (current[-2])
+    {
+    case type_then_trap:
+    break;
+
+    case type_mark:
+    if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
+      return current[-4];
+    break;
+
+    default:
+    SLJIT_ASSERT_STOP();
+    break;
+    }
+  current = (sljit_sw*)current[-1];
+  }
+return -1;
+}
+
+static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
+{
+DEFINE_COMPILER;
+struct sljit_label *loop;
+struct sljit_jump *early_quit;
+
+/* At this point we can freely use all registers. */
+OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
+
+OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0);
+if (common->mark_ptr != 0)
+  OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
+OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offset_count));
+if (common->mark_ptr != 0)
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0);
+OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
+OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
+GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
+/* Unlikely, but possible */
+early_quit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0);
+loop = LABEL();
+OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0);
+OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw));
+/* Copy the integer value to the output buffer */
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
+#endif
+OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
+OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
+JUMPTO(SLJIT_C_NOT_ZERO, loop);
+JUMPHERE(early_quit);
+
+/* Calculate the return value, which is the maximum ovector value. */
+if (topbracket > 1)
+  {
+  GET_LOCAL_BASE(SLJIT_SCRATCH_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
+  OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, topbracket + 1);
+
+  /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
+  loop = LABEL();
+  OP1(SLJIT_MOVU, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), -(2 * (sljit_sw)sizeof(sljit_sw)));
+  OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
+  CMPTO(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
+  OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_SCRATCH_REG2, 0);
+  }
+else
+  OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
+}
+
+static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
+{
+DEFINE_COMPILER;
+struct sljit_jump *jump;
+
+SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
+SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
+  && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
+
+OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
+OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
+OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
+CMPTO(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit);
+
+/* Store match begin and end. */
+OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
+OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
+
+jump = CMP(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 3);
+OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_SAVED_REG1, 0);
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
+#endif
+OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 2 * sizeof(int), SLJIT_SCRATCH_REG3, 0);
+JUMPHERE(jump);
+
+OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
+OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
+#endif
+OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
+
+OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG1, 0);
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
+#endif
+OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 0, SLJIT_SCRATCH_REG3, 0);
+
+JUMPTO(SLJIT_JUMP, quit);
+}
+
+static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
+{
+/* May destroy TMP1. */
+DEFINE_COMPILER;
+struct sljit_jump *jump;
+
+if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
+  {
+  /* The value of -1 must be kept for start_used_ptr! */
+  OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
+  /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
+  is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
+  jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
+  JUMPHERE(jump);
+  }
+else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
+  {
+  jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
+  JUMPHERE(jump);
+  }
+}
+
+static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
+{
+/* Detects if the character has an othercase. */
+unsigned int c;
+
+#ifdef SUPPORT_UTF
+if (common->utf)
+  {
+  GETCHAR(c, cc);
+  if (c > 127)
+    {
+#ifdef SUPPORT_UCP
+    return c != UCD_OTHERCASE(c);
+#else
+    return FALSE;
+#endif
+    }
+#ifndef COMPILE_PCRE8
+  return common->fcc[c] != c;
+#endif
+  }
+else
+#endif
+  c = *cc;
+return MAX_255(c) ? common->fcc[c] != c : FALSE;
+}
+
+static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
+{
+/* Returns with the othercase. */
+#ifdef SUPPORT_UTF
+if (common->utf && c > 127)
+  {
+#ifdef SUPPORT_UCP
+  return UCD_OTHERCASE(c);
+#else
+  return c;
+#endif
+  }
+#endif
+return TABLE_GET(c, common->fcc, c);
+}
+
+static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
+{
+/* Detects if the character and its othercase has only 1 bit difference. */
+unsigned int c, oc, bit;
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+int n;
+#endif
+
+#ifdef SUPPORT_UTF
+if (common->utf)
+  {
+  GETCHAR(c, cc);
+  if (c <= 127)
+    oc = common->fcc[c];
+  else
+    {
+#ifdef SUPPORT_UCP
+    oc = UCD_OTHERCASE(c);
+#else
+    oc = c;
+#endif
+    }
+  }
+else
+  {
+  c = *cc;
+  oc = TABLE_GET(c, common->fcc, c);
+  }
+#else
+c = *cc;
+oc = TABLE_GET(c, common->fcc, c);
+#endif
+
+SLJIT_ASSERT(c != oc);
+
+bit = c ^ oc;
+/* Optimized for English alphabet. */
+if (c <= 127 && bit == 0x20)
+  return (0 << 8) | 0x20;
+
+/* Since c != oc, they must have at least 1 bit difference. */
+if (!is_powerof2(bit))
+  return 0;
+
+#if defined COMPILE_PCRE8
+
+#ifdef SUPPORT_UTF
+if (common->utf && c > 127)
+  {
+  n = GET_EXTRALEN(*cc);
+  while ((bit & 0x3f) == 0)
+    {
+    n--;
+    bit >>= 6;
+    }
+  return (n << 8) | bit;
+  }
+#endif /* SUPPORT_UTF */
+return (0 << 8) | bit;
+
+#elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+
+#ifdef SUPPORT_UTF
+if (common->utf && c > 65535)
+  {
+  if (bit >= (1 << 10))
+    bit >>= 10;
+  else
+    return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
+  }
+#endif /* SUPPORT_UTF */
+return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
+
+#endif /* COMPILE_PCRE[8|16|32] */
+}
+
+static void check_partial(compiler_common *common, BOOL force)
+{
+/* Checks whether a partial matching is occurred. Does not modify registers. */
+DEFINE_COMPILER;
+struct sljit_jump *jump = NULL;
+
+SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
+
+if (common->mode == JIT_COMPILE)
+  return;
+
+if (!force)
+  jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
+else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
+  jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
+
+if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
+else
+  {
+  if (common->partialmatchlabel != NULL)
+    JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
+  else
+    add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
+  }
+
+if (jump != NULL)
+  JUMPHERE(jump);
+}
+
+static void check_str_end(compiler_common *common, jump_list **end_reached)
+{
+/* Does not affect registers. Usually used in a tight spot. */
+DEFINE_COMPILER;
+struct sljit_jump *jump;
+
+if (common->mode == JIT_COMPILE)
+  {
+  add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
+  return;
+  }
+
+jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
+if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
+  {
+  add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
+  add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
+  }
+else
+  {
+  add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
+  if (common->partialmatchlabel != NULL)
+    JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
+  else
+    add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
+  }
+JUMPHERE(jump);
+}
+
+static void detect_partial_match(compiler_common *common, jump_list **backtracks)
+{
+DEFINE_COMPILER;
+struct sljit_jump *jump;
+
+if (common->mode == JIT_COMPILE)
+  {
+  add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
+  return;
+  }
+
+/* Partial matching mode. */
+jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
+add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
+if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
+  {
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
+  add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
+  }
+else
+  {
+  if (common->partialmatchlabel != NULL)
+    JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
+  else
+    add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
+  }
+JUMPHERE(jump);
+}
+
+static void peek_char(compiler_common *common, pcre_uint32 max)
+{
+/* Reads the character into TMP1, keeps STR_PTR.
+Does not check STR_END. TMP2 Destroyed. */
+DEFINE_COMPILER;
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
+struct sljit_jump *jump;
+#endif
+
+SLJIT_UNUSED_ARG(max);
+
+OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+if (common->utf)
+  {
+  if (max < 128) return;
+
+  jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
+  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+  add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
+  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
+  JUMPHERE(jump);
+  }
+#endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
+
+#if defined SUPPORT_UTF && defined COMPILE_PCRE16
+if (common->utf)
+  {
+  if (max < 0xd800) return;
+
+  OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
+  jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
+  /* TMP2 contains the high surrogate. */
+  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
+  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
+  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
+  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
+  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
+  JUMPHERE(jump);
+  }
+#endif
+}
+
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+
+static BOOL is_char7_bitset(const pcre_uint8 *bitset, BOOL nclass)
+{
+/* Tells whether the character codes below 128 are enough
+to determine a match. */
+const pcre_uint8 value = nclass ? 0xff : 0;
+const pcre_uint8* end = bitset + 32;
+
+bitset += 16;
+do
+  {
+  if (*bitset++ != value)
+    return FALSE;
+  }
+while (bitset < end);
+return TRUE;
+}
+
+static void read_char7_type(compiler_common *common, BOOL full_read)
+{
+/* Reads the precise character type of a character into TMP1, if the character
+is less than 128. Otherwise it returns with zero. Does not check STR_END. The
+full_read argument tells whether characters above max are accepted or not. */
+DEFINE_COMPILER;
+struct sljit_jump *jump;
+
+SLJIT_ASSERT(common->utf);
+
+OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+
+OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
+
+if (full_read)
+  {
+  jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
+  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
+  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
+  JUMPHERE(jump);
+  }
+}
+
+#endif /* SUPPORT_UTF && COMPILE_PCRE8 */
+
+static void read_char_range(compiler_common *common, pcre_uint32 min, pcre_uint32 max, BOOL update_str_ptr)
+{
+/* Reads the precise value of a character into TMP1, if the character is
+between min and max (c >= min && c <= max). Otherwise it returns with a value
+outside the range. Does not check STR_END. */
+DEFINE_COMPILER;
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
+struct sljit_jump *jump;
+#endif
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+struct sljit_jump *jump2;
+#endif
+
+SLJIT_UNUSED_ARG(update_str_ptr);
+SLJIT_UNUSED_ARG(min);
+SLJIT_UNUSED_ARG(max);
+SLJIT_ASSERT(min <= max);
+
+OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+if (common->utf)
+  {
+  if (max < 128 && !update_str_ptr) return;
+
+  jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
+  if (min >= 0x10000)
+    {
+    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
+    if (update_str_ptr)
+      OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
+    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
+    jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
+    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
+    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
+    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
+    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
+    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
+    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
+    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
+    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
+    if (!update_str_ptr)
+      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
+    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
+    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
+    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
+    JUMPHERE(jump2);
+    if (update_str_ptr)
+      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
+    }
+  else if (min >= 0x800 && max <= 0xffff)
+    {
+    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
+    if (update_str_ptr)
+      OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
+    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
+    jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
+    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
+    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
+    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
+    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
+    if (!update_str_ptr)
+      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
+    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
+    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
+    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
+    JUMPHERE(jump2);
+    if (update_str_ptr)
+      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
+    }
+  else if (max >= 0x800)
+    add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
+  else if (max < 128)
+    {
+    OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
+    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
+    }
+  else
+    {
+    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
+    if (!update_str_ptr)
+      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+    else
+      OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
+    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
+    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
+    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
+    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
+    if (update_str_ptr)
+      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
+    }
+  JUMPHERE(jump);
+  }
+#endif
+
+#if defined SUPPORT_UTF && defined COMPILE_PCRE16
+if (common->utf)
+  {
+  if (max >= 0x10000)
+    {
+    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
+    jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
+    /* TMP2 contains the high surrogate. */
+    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
+    OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
+    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
+    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
+    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
+    JUMPHERE(jump);
+    return;
+    }
+
+  if (max < 0xd800 && !update_str_ptr) return;
+
+  /* Skip low surrogate if necessary. */
+  OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
+  jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
+  if (update_str_ptr)
+    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+  if (max >= 0xd800)
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
+  JUMPHERE(jump);
+  }
+#endif
+}
+
+static SLJIT_INLINE void read_char(compiler_common *common)
+{
+read_char_range(common, 0, READ_CHAR_MAX, TRUE);
+}
+
+static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
+{
+/* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
+DEFINE_COMPILER;
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+struct sljit_jump *jump;
+#endif
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+struct sljit_jump *jump2;
+#endif
+
+SLJIT_UNUSED_ARG(update_str_ptr);
+
+OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+if (common->utf)
+  {
+  /* This can be an extra read in some situations, but hopefully
+  it is needed in most cases. */
+  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
+  jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
+  if (!update_str_ptr)
+    {
+    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
+    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
+    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
+    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
+    OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
+    jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
+    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
+    JUMPHERE(jump2);
+    }
+  else
+    add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
+  JUMPHERE(jump);
+  return;
+  }
+#endif /* SUPPORT_UTF && COMPILE_PCRE8 */
+
+#if !defined COMPILE_PCRE8
+/* The ctypes array contains only 256 values. */
+OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
+jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
+#endif
+OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
+#if !defined COMPILE_PCRE8
+JUMPHERE(jump);
+#endif
+
+#if defined SUPPORT_UTF && defined COMPILE_PCRE16
+if (common->utf && update_str_ptr)
+  {
+  /* Skip low surrogate if necessary. */
+  OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
+  jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
+  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+  JUMPHERE(jump);
+  }
+#endif /* SUPPORT_UTF && COMPILE_PCRE16 */
+}
+
+static void skip_char_back(compiler_common *common)
+{
+/* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
+DEFINE_COMPILER;
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
+#if defined COMPILE_PCRE8
+struct sljit_label *label;
+
+if (common->utf)
+  {
+  label = LABEL();
+  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
+  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
+  CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
+  return;
+  }
+#elif defined COMPILE_PCRE16
+if (common->utf)
+  {
+  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
+  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+  /* Skip low surrogate if necessary. */
+  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
+  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
+  OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
+  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
+  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
+  return;
+  }
+#endif /* COMPILE_PCRE[8|16] */
+#endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
+OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+}
+
+static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
+{
+/* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
+DEFINE_COMPILER;
+struct sljit_jump *jump;
+
+if (nltype == NLTYPE_ANY)
+  {
+  add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
+  add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
+  }
+else if (nltype == NLTYPE_ANYCRLF)
+  {
+  if (jumpifmatch)
+    {
+    add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
+    add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
+    }
+  else
+    {
+    jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
+    add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
+    JUMPHERE(jump);
+    }
+  }
+else
+  {
+  SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
+  add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
+  }
+}
+
+#ifdef SUPPORT_UTF
+
+#if defined COMPILE_PCRE8
+static void do_utfreadchar(compiler_common *common)
+{
+/* Fast decoding a UTF-8 character. TMP1 contains the first byte
+of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
+DEFINE_COMPILER;
+struct sljit_jump *jump;
+
+sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
+OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
+OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
+OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
+OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
+OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
+
+/* Searching for the first zero. */
+OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
+jump = JUMP(SLJIT_C_NOT_ZERO);
+/* Two byte sequence. */
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
+sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+
+JUMPHERE(jump);
+OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
+OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
+OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
+OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
+OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
+
+OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
+jump = JUMP(SLJIT_C_NOT_ZERO);
+/* Three byte sequence. */
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
+OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
+sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+
+/* Four byte sequence. */
+JUMPHERE(jump);
+OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
+OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
+OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
+OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
+OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
+OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
+sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+}
+
+static void do_utfreadchar16(compiler_common *common)
+{
+/* Fast decoding a UTF-8 character. TMP1 contains the first byte
+of the character (>= 0xc0). Return value in TMP1. */
+DEFINE_COMPILER;
+struct sljit_jump *jump;
+
+sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
+OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
+OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
+OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
+OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
+OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
+
+/* Searching for the first zero. */
+OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
+jump = JUMP(SLJIT_C_NOT_ZERO);
+/* Two byte sequence. */
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+
+JUMPHERE(jump);
+OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
+OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_NOT_ZERO);
+/* This code runs only in 8 bit mode. No need to shift the value. */
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
+OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
+OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
+OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
+OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
+OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
+/* Three byte sequence. */
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
+sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+}
+
+static void do_utfreadtype8(compiler_common *common)
+{
+/* Fast decoding a UTF-8 character type. TMP2 contains the first byte
+of the character (>= 0xc0). Return value in TMP1. */
+DEFINE_COMPILER;
+struct sljit_jump *jump;
+struct sljit_jump *compare;
+
+sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
+
+OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
+jump = JUMP(SLJIT_C_NOT_ZERO);
+/* Two byte sequence. */
+OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
+/* The upper 5 bits are known at this point. */
+compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
+OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
+OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
+OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
+OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
+sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+
+JUMPHERE(compare);
+OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
+sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+
+/* We only have types for characters less than 256. */
+JUMPHERE(jump);
+OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
+OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
+sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+}
+
+#endif /* COMPILE_PCRE8 */
+
+#endif /* SUPPORT_UTF */
+
+#ifdef SUPPORT_UCP
+
+/* UCD_BLOCK_SIZE must be 128 (see the assert below). */
+#define UCD_BLOCK_MASK 127
+#define UCD_BLOCK_SHIFT 7
+
+static void do_getucd(compiler_common *common)
+{
+/* Search the UCD record for the character comes in TMP1.
+Returns chartype in TMP1 and UCD offset in TMP2. */
+DEFINE_COMPILER;
+
+SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
+
+sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
+OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
+OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
+OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
+OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
+OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
+OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
+OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
+OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
+OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
+sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+}
+#endif
+
+static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
+{
+DEFINE_COMPILER;
+struct sljit_label *mainloop;
+struct sljit_label *newlinelabel = NULL;
+struct sljit_jump *start;
+struct sljit_jump *end = NULL;
+struct sljit_jump *nl = NULL;
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
+struct sljit_jump *singlechar;
+#endif
+jump_list *newline = NULL;
+BOOL newlinecheck = FALSE;
+BOOL readuchar = FALSE;
+
+if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
+    common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
+  newlinecheck = TRUE;
+
+if (firstline)
+  {
+  /* Search for the end of the first line. */
+  SLJIT_ASSERT(common->first_line_end != 0);
+  OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
+
+  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
+    {
+    mainloop = LABEL();
+    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+    end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
+    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
+    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
+    CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
+    CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
+    JUMPHERE(end);
+    OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+    }
+  else
+    {
+    end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
+    mainloop = LABEL();
+    /* Continual stores does not cause data dependency. */
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
+    read_char_range(common, common->nlmin, common->nlmax, TRUE);
+    check_newlinechar(common, common->nltype, &newline, TRUE);
+    CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
+    JUMPHERE(end);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
+    set_jumps(newline, LABEL());
+    }
+
+  OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
+  }
+
+start = JUMP(SLJIT_JUMP);
+
+if (newlinecheck)
+  {
+  newlinelabel = LABEL();
+  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+  end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
+  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
+  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
+  OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
+#endif
+  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
+  nl = JUMP(SLJIT_JUMP);
+  }
+
+mainloop = LABEL();
+
+/* Increasing the STR_PTR here requires one less jump in the most common case. */
+#ifdef SUPPORT_UTF
+if (common->utf) readuchar = TRUE;
+#endif
+if (newlinecheck) readuchar = TRUE;
+
+if (readuchar)
+  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
+
+if (newlinecheck)
+  CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
+
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
+#if defined COMPILE_PCRE8
+if (common->utf)
+  {
+  singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
+  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
+  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
+  JUMPHERE(singlechar);
+  }
+#elif defined COMPILE_PCRE16
+if (common->utf)
+  {
+  singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
+  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
+  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
+  OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
+  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
+  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
+  JUMPHERE(singlechar);
+  }
+#endif /* COMPILE_PCRE[8|16] */
+#endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
+JUMPHERE(start);
+
+if (newlinecheck)
+  {
+  JUMPHERE(end);
+  JUMPHERE(nl);
+  }
+
+return mainloop;
+}
+
+#define MAX_N_CHARS 16
+#define MAX_N_BYTES 8
+
+static SLJIT_INLINE void add_prefix_byte(pcre_uint8 byte, pcre_uint8 *bytes)
+{
+pcre_uint8 len = bytes[0];
+int i;
+
+if (len == 255)
+  return;
+
+if (len == 0)
+  {
+  bytes[0] = 1;
+  bytes[1] = byte;
+  return;
+  }
+
+for (i = len; i > 0; i--)
+  if (bytes[i] == byte)
+    return;
+
+if (len >= MAX_N_BYTES - 1)
+  {
+  bytes[0] = 255;
+  return;
+  }
+
+len++;
+bytes[len] = byte;
+bytes[0] = len;
+}
+
+static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, pcre_uint8 *bytes, int max_chars)
+{
+/* Recursive function, which scans prefix literals. */
+BOOL last, any, caseless;
+int len, repeat, len_save, consumed = 0;
+pcre_uint32 chr, mask;
+pcre_uchar *alternative, *cc_save, *oc;
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+pcre_uchar othercase[8];
+#elif defined SUPPORT_UTF && defined COMPILE_PCRE16
+pcre_uchar othercase[2];
+#else
+pcre_uchar othercase[1];
+#endif
+
+repeat = 1;
+while (TRUE)
+  {
+  last = TRUE;
+  any = FALSE;
+  caseless = FALSE;
+  switch (*cc)
+    {
+    case OP_CHARI:
+    caseless = TRUE;
+    case OP_CHAR:
+    last = FALSE;
+    cc++;
+    break;
+
+    case OP_SOD:
+    case OP_SOM:
+    case OP_SET_SOM:
+    case OP_NOT_WORD_BOUNDARY:
+    case OP_WORD_BOUNDARY:
+    case OP_EODN:
+    case OP_EOD:
+    case OP_CIRC:
+    case OP_CIRCM:
+    case OP_DOLL:
+    case OP_DOLLM:
+    /* Zero width assertions. */
+    cc++;
+    continue;
+
+    case OP_ASSERT:
+    case OP_ASSERT_NOT:
+    case OP_ASSERTBACK:
+    case OP_ASSERTBACK_NOT:
+    cc = bracketend(cc);
+    continue;
+
+    case OP_PLUSI:
+    case OP_MINPLUSI:
+    case OP_POSPLUSI:
+    caseless = TRUE;
+    case OP_PLUS:
+    case OP_MINPLUS:
+    case OP_POSPLUS:
+    cc++;
+    break;
+
+    case OP_EXACTI:
+    caseless = TRUE;
+    case OP_EXACT:
+    repeat = GET2(cc, 1);
+    last = FALSE;
+    cc += 1 + IMM2_SIZE;
+    break;
+
+    case OP_QUERYI:
+    case OP_MINQUERYI:
+    case OP_POSQUERYI:
+    caseless = TRUE;
+    case OP_QUERY:
+    case OP_MINQUERY:
+    case OP_POSQUERY:
+    len = 1;
+    cc++;
+#ifdef SUPPORT_UTF
+    if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
+#endif
+    max_chars = scan_prefix(common, cc + len, chars, bytes, max_chars);
+    if (max_chars == 0)
+      return consumed;
+    last = FALSE;
+    break;
+
+    case OP_KET:
+    cc += 1 + LINK_SIZE;
+    continue;
+
+    case OP_ALT:
+    cc += GET(cc, 1);
+    continue;
+
+    case OP_ONCE:
+    case OP_ONCE_NC:
+    case OP_BRA:
+    case OP_BRAPOS:
+    case OP_CBRA:
+    case OP_CBRAPOS:
+    alternative = cc + GET(cc, 1);
+    while (*alternative == OP_ALT)
+      {
+      max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, bytes, max_chars);
+      if (max_chars == 0)
+        return consumed;
+      alternative += GET(alternative, 1);
+      }
+
+    if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
+      cc += IMM2_SIZE;
+    cc += 1 + LINK_SIZE;
+    continue;
+
+    case OP_CLASS:
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+    if (common->utf && !is_char7_bitset((const pcre_uint8 *)(cc + 1), FALSE)) return consumed;
+#endif
+    any = TRUE;
+    cc += 1 + 32 / sizeof(pcre_uchar);
+    break;
+
+    case OP_NCLASS:
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
+    if (common->utf) return consumed;
+#endif
+    any = TRUE;
+    cc += 1 + 32 / sizeof(pcre_uchar);
+    break;
+
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+    case OP_XCLASS:
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
+    if (common->utf) return consumed;
+#endif
+    any = TRUE;
+    cc += GET(cc, 1);
+    break;
+#endif
+
+    case OP_DIGIT:
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+    if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
+      return consumed;
+#endif
+    any = TRUE;
+    cc++;
+    break;
+
+    case OP_WHITESPACE:
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+    if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_space, FALSE))
+      return consumed;
+#endif
+    any = TRUE;
+    cc++;
+    break;
+
+    case OP_WORDCHAR:
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+    if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_word, FALSE))
+      return consumed;
+#endif
+    any = TRUE;
+    cc++;
+    break;
+
+    case OP_NOT:
+    case OP_NOTI:
+    cc++;
+    /* Fall through. */
+    case OP_NOT_DIGIT:
+    case OP_NOT_WHITESPACE:
+    case OP_NOT_WORDCHAR:
+    case OP_ANY:
+    case OP_ALLANY:
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
+    if (common->utf) return consumed;
+#endif
+    any = TRUE;
+    cc++;
+    break;
+
+#ifdef SUPPORT_UCP
+    case OP_NOTPROP:
+    case OP_PROP:
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
+    if (common->utf) return consumed;
+#endif
+    any = TRUE;
+    cc += 1 + 2;
+    break;
+#endif
+
+    case OP_TYPEEXACT:
+    repeat = GET2(cc, 1);
+    cc += 1 + IMM2_SIZE;
+    continue;
+
+    case OP_NOTEXACT:
+    case OP_NOTEXACTI:
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
+    if (common->utf) return consumed;
+#endif
+    any = TRUE;
+    repeat = GET2(cc, 1);
+    cc += 1 + IMM2_SIZE + 1;
+    break;
+
+    default:
+    return consumed;
+    }
+
+  if (any)
+    {
+#if defined COMPILE_PCRE8
+    mask = 0xff;
+#elif defined COMPILE_PCRE16
+    mask = 0xffff;
+#elif defined COMPILE_PCRE32
+    mask = 0xffffffff;
+#else
+    SLJIT_ASSERT_STOP();
+#endif
+
+    do
+      {
+      chars[0] = mask;
+      chars[1] = mask;
+      bytes[0] = 255;
+
+      consumed++;
+      if (--max_chars == 0)
+        return consumed;
+      chars += 2;
+      bytes += MAX_N_BYTES;
+      }
+    while (--repeat > 0);
+
+    repeat = 1;
+    continue;
+    }
+
+  len = 1;
+#ifdef SUPPORT_UTF
+  if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
+#endif
+
+  if (caseless && char_has_othercase(common, cc))
+    {
+#ifdef SUPPORT_UTF
+    if (common->utf)
+      {
+      GETCHAR(chr, cc);
+      if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
+        return consumed;
+      }
+    else
+#endif
+      {
+      chr = *cc;
+      othercase[0] = TABLE_GET(chr, common->fcc, chr);
+      }
+    }
+  else
+    caseless = FALSE;
+
+  len_save = len;
+  cc_save = cc;
+  while (TRUE)
+    {
+    oc = othercase;
+    do
+      {
+      chr = *cc;
+#ifdef COMPILE_PCRE32
+      if (SLJIT_UNLIKELY(chr == NOTACHAR))
+        return consumed;
+#endif
+      add_prefix_byte((pcre_uint8)chr, bytes);
+
+      mask = 0;
+      if (caseless)
+        {
+        add_prefix_byte((pcre_uint8)*oc, bytes);
+        mask = *cc ^ *oc;
+        chr |= mask;
+        }
+
+#ifdef COMPILE_PCRE32
+      if (chars[0] == NOTACHAR && chars[1] == 0)
+#else
+      if (chars[0] == NOTACHAR)
+#endif
+        {
+        chars[0] = chr;
+        chars[1] = mask;
+        }
+      else
+        {
+        mask |= chars[0] ^ chr;
+        chr |= mask;
+        chars[0] = chr;
+        chars[1] |= mask;
+        }
+
+      len--;
+      consumed++;
+      if (--max_chars == 0)
+        return consumed;
+      chars += 2;
+      bytes += MAX_N_BYTES;
+      cc++;
+      oc++;
+      }
+    while (len > 0);
+
+    if (--repeat == 0)
+      break;
+
+    len = len_save;
+    cc = cc_save;
+    }
+
+  repeat = 1;
+  if (last)
+    return consumed;
+  }
+}
+
+static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
+{
+DEFINE_COMPILER;
+struct sljit_label *start;
+struct sljit_jump *quit;
+pcre_uint32 chars[MAX_N_CHARS * 2];
+pcre_uint8 bytes[MAX_N_CHARS * MAX_N_BYTES];
+pcre_uint8 ones[MAX_N_CHARS];
+int offsets[3];
+pcre_uint32 mask;
+pcre_uint8 *byte_set, *byte_set_end;
+int i, max, from;
+int range_right = -1, range_len = 3 - 1;
+sljit_ub *update_table = NULL;
+BOOL in_range;
+
+/* This is even TRUE, if both are NULL. */
+SLJIT_ASSERT(common->read_only_data_ptr == common->read_only_data);
+
+for (i = 0; i < MAX_N_CHARS; i++)
+  {
+  chars[i << 1] = NOTACHAR;
+  chars[(i << 1) + 1] = 0;
+  bytes[i * MAX_N_BYTES] = 0;
+  }
+
+max = scan_prefix(common, common->start, chars, bytes, MAX_N_CHARS);
+
+if (max <= 1)
+  return FALSE;
+
+for (i = 0; i < max; i++)
+  {
+  mask = chars[(i << 1) + 1];
+  ones[i] = ones_in_half_byte[mask & 0xf];
+  mask >>= 4;
+  while (mask != 0)
+    {
+    ones[i] += ones_in_half_byte[mask & 0xf];
+    mask >>= 4;
+    }
+  }
+
+in_range = FALSE;
+from = 0;   /* Prevent compiler "uninitialized" warning */
+for (i = 0; i <= max; i++)
+  {
+  if (in_range && (i - from) > range_len && (bytes[(i - 1) * MAX_N_BYTES] <= 4))
+    {
+    range_len = i - from;
+    range_right = i - 1;
+    }
+
+  if (i < max && bytes[i * MAX_N_BYTES] < 255)
+    {
+    if (!in_range)
+      {
+      in_range = TRUE;
+      from = i;
+      }
+    }
+  else if (in_range)
+    in_range = FALSE;
+  }
+
+if (range_right >= 0)
+  {
+  /* Since no data is consumed (see the assert in the beginning
+  of this function), this space can be reallocated. */
+  if (common->read_only_data)
+    SLJIT_FREE(common->read_only_data);
+
+  common->read_only_data_size += 256;
+  common->read_only_data = (sljit_uw *)SLJIT_MALLOC(common->read_only_data_size);
+  if (common->read_only_data == NULL)
+    return TRUE;
+
+  update_table = (sljit_ub *)common->read_only_data;
+  common->read_only_data_ptr = (sljit_uw *)(update_table + 256);
+  memset(update_table, IN_UCHARS(range_len), 256);
+
+  for (i = 0; i < range_len; i++)
+    {
+    byte_set = bytes + ((range_right - i) * MAX_N_BYTES);
+    SLJIT_ASSERT(byte_set[0] > 0 && byte_set[0] < 255);
+    byte_set_end = byte_set + byte_set[0];
+    byte_set++;
+    while (byte_set <= byte_set_end)
+      {
+      if (update_table[*byte_set] > IN_UCHARS(i))
+        update_table[*byte_set] = IN_UCHARS(i);
+      byte_set++;
+      }
+    }
+  }
+
+offsets[0] = -1;
+/* Scan forward. */
+for (i = 0; i < max; i++)
+  if (ones[i] <= 2) {
+    offsets[0] = i;
+    break;
+  }
+
+if (offsets[0] < 0 && range_right < 0)
+  return FALSE;
+
+if (offsets[0] >= 0)
+  {
+  /* Scan backward. */
+  offsets[1] = -1;
+  for (i = max - 1; i > offsets[0]; i--)
+    if (ones[i] <= 2 && i != range_right)
+      {
+      offsets[1] = i;
+      break;
+      }
+
+  /* This case is handled better by fast_forward_first_char. */
+  if (offsets[1] == -1 && offsets[0] == 0 && range_right < 0)
+    return FALSE;
+
+  offsets[2] = -1;
+  /* We only search for a middle character if there is no range check. */
+  if (offsets[1] >= 0 && range_right == -1)
+    {
+    /* Scan from middle. */
+    for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++)
+      if (ones[i] <= 2)
+        {
+        offsets[2] = i;
+        break;
+        }
+
+    if (offsets[2] == -1)
+      {
+      for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--)
+        if (ones[i] <= 2)
+          {
+          offsets[2] = i;
+          break;
+          }
+      }
+    }
+
+  SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1]));
+  SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2]));
+
+  chars[0] = chars[offsets[0] << 1];
+  chars[1] = chars[(offsets[0] << 1) + 1];
+  if (offsets[2] >= 0)
+    {
+    chars[2] = chars[offsets[2] << 1];
+    chars[3] = chars[(offsets[2] << 1) + 1];
+    }
+  if (offsets[1] >= 0)
+    {
+    chars[4] = chars[offsets[1] << 1];
+    chars[5] = chars[(offsets[1] << 1) + 1];
+    }
+  }
+
+max -= 1;
+if (firstline)
+  {
+  SLJIT_ASSERT(common->first_line_end != 0);
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
+  OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
+  OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
+  quit = CMP(SLJIT_C_LESS_EQUAL, STR_END, 0, TMP1, 0);
+  OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
+  JUMPHERE(quit);
+  }
+else
+  OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
+
+#if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+if (range_right >= 0)
+  OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
+#endif
+
+start = LABEL();
+quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
+
+SLJIT_ASSERT(range_right >= 0 || offsets[0] >= 0);
+
+if (range_right >= 0)
+  {
+#if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
+  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
+#else
+  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
+#endif
+
+#if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
+#else
+  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
+#endif
+  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
+  CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
+  }
+
+if (offsets[0] >= 0)
+  {
+  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0]));
+  if (offsets[1] >= 0)
+    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1]));
+  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+
+  if (chars[1] != 0)
+    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
+  CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
+  if (offsets[2] >= 0)
+    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1));
+
+  if (offsets[1] >= 0)
+    {
+    if (chars[5] != 0)
+      OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]);
+    CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start);
+    }
+
+  if (offsets[2] >= 0)
+    {
+    if (chars[3] != 0)
+      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]);
+    CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start);
+    }
+  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+  }
+
+JUMPHERE(quit);
+
+if (firstline)
+  {
+  if (range_right >= 0)
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
+  OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
+  if (range_right >= 0)
+    {
+    quit = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
+    OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
+    JUMPHERE(quit);
+    }
+  }
+else
+  OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
+return TRUE;
+}
+
+#undef MAX_N_CHARS
+#undef MAX_N_BYTES
+
+static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
+{
+DEFINE_COMPILER;
+struct sljit_label *start;
+struct sljit_jump *quit;
+struct sljit_jump *found;
+pcre_uchar oc, bit;
+
+if (firstline)
+  {
+  SLJIT_ASSERT(common->first_line_end != 0);
+  OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
+  OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
+  }
+
+start = LABEL();
+quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
+OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
+
+oc = first_char;
+if (caseless)
+  {
+  oc = TABLE_GET(first_char, common->fcc, first_char);
+#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
+  if (first_char > 127 && common->utf)
+    oc = UCD_OTHERCASE(first_char);
+#endif
+  }
+if (first_char == oc)
+  found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
+else
+  {
+  bit = first_char ^ oc;
+  if (is_powerof2(bit))
+    {
+    OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
+    found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
+    }
+  else
+    {
+    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
+    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
+    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
+    OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
+    found = JUMP(SLJIT_C_NOT_ZERO);
+    }
+  }
+
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+JUMPTO(SLJIT_JUMP, start);
+JUMPHERE(found);
+JUMPHERE(quit);
+
+if (firstline)
+  OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
+}
+
+static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
+{
+DEFINE_COMPILER;
+struct sljit_label *loop;
+struct sljit_jump *lastchar;
+struct sljit_jump *firstchar;
+struct sljit_jump *quit;
+struct sljit_jump *foundcr = NULL;
+struct sljit_jump *notfoundnl;
+jump_list *newline = NULL;
+
+if (firstline)
+  {
+  SLJIT_ASSERT(common->first_line_end != 0);
+  OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
+  OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
+  }
+
+if (common->nltype == NLTYPE_FIXED && common->newline > 255)
+  {
+  lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
+  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
+  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
+  firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
+
+  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
+  OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
+  OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL);
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
+#endif
+  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
+
+  loop = LABEL();
+  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+  quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
+  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
+  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
+  CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
+  CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
+
+  JUMPHERE(quit);
+  JUMPHERE(firstchar);
+  JUMPHERE(lastchar);
+
+  if (firstline)
+    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
+  return;
+  }
+
+OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
+OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
+firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
+skip_char_back(common);
+
+loop = LABEL();
+common->ff_newline_shortcut = loop;
+
+read_char_range(common, common->nlmin, common->nlmax, TRUE);
+lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
+if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
+  foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
+check_newlinechar(common, common->nltype, &newline, FALSE);
+set_jumps(newline, loop);
+
+if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
+  {
+  quit = JUMP(SLJIT_JUMP);
+  JUMPHERE(foundcr);
+  notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
+  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
+  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
+  OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
+#endif
+  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
+  JUMPHERE(notfoundnl);
+  JUMPHERE(quit);
+  }
+JUMPHERE(lastchar);
+JUMPHERE(firstchar);
+
+if (firstline)
+  OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
+}
+
+static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
+
+static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, pcre_uint8 *start_bits, BOOL firstline)
+{
+DEFINE_COMPILER;
+struct sljit_label *start;
+struct sljit_jump *quit;
+struct sljit_jump *found = NULL;
+jump_list *matches = NULL;
+#ifndef COMPILE_PCRE8
+struct sljit_jump *jump;
+#endif
+
+if (firstline)
+  {
+  SLJIT_ASSERT(common->first_line_end != 0);
+  OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
+  OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
+  }
+
+start = LABEL();
+quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
+OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
+#ifdef SUPPORT_UTF
+if (common->utf)
+  OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
+#endif
+
+if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
+  {
+#ifndef COMPILE_PCRE8
+  jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
+  JUMPHERE(jump);
+#endif
+  OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
+  OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
+  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
+  OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
+  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
+  found = JUMP(SLJIT_C_NOT_ZERO);
+  }
+
+#ifdef SUPPORT_UTF
+if (common->utf)
+  OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
+#endif
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+#ifdef SUPPORT_UTF
+#if defined COMPILE_PCRE8
+if (common->utf)
+  {
+  CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
+  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
+  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
+  }
+#elif defined COMPILE_PCRE16
+if (common->utf)
+  {
+  CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
+  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
+  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
+  OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
+  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
+  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
+  }
+#endif /* COMPILE_PCRE[8|16] */
+#endif /* SUPPORT_UTF */
+JUMPTO(SLJIT_JUMP, start);
+if (found != NULL)
+  JUMPHERE(found);
+if (matches != NULL)
+  set_jumps(matches, LABEL());
+JUMPHERE(quit);
+
+if (firstline)
+  OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
+}
+
+static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
+{
+DEFINE_COMPILER;
+struct sljit_label *loop;
+struct sljit_jump *toolong;
+struct sljit_jump *alreadyfound;
+struct sljit_jump *found;
+struct sljit_jump *foundoc = NULL;
+struct sljit_jump *notfound;
+pcre_uint32 oc, bit;
+
+SLJIT_ASSERT(common->req_char_ptr != 0);
+OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
+OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
+toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
+alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
+
+if (has_firstchar)
+  OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+else
+  OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
+
+loop = LABEL();
+notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
+
+OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
+oc = req_char;
+if (caseless)
+  {
+  oc = TABLE_GET(req_char, common->fcc, req_char);
+#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
+  if (req_char > 127 && common->utf)
+    oc = UCD_OTHERCASE(req_char);
+#endif
+  }
+if (req_char == oc)
+  found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
+else
+  {
+  bit = req_char ^ oc;
+  if (is_powerof2(bit))
+    {
+    OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
+    found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
+    }
+  else
+    {
+    found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
+    foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
+    }
+  }
+OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
+JUMPTO(SLJIT_JUMP, loop);
+
+JUMPHERE(found);
+if (foundoc)
+  JUMPHERE(foundoc);
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
+JUMPHERE(alreadyfound);
+JUMPHERE(toolong);
+return notfound;
+}
+
+static void do_revertframes(compiler_common *common)
+{
+DEFINE_COMPILER;
+struct sljit_jump *jump;
+struct sljit_label *mainloop;
+
+sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
+OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
+GET_LOCAL_BASE(TMP3, 0, 0);
+
+/* Drop frames until we reach STACK_TOP. */
+mainloop = LABEL();
+OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
+OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
+jump = JUMP(SLJIT_C_SIG_LESS_EQUAL);
+
+OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
+OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
+OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
+OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
+JUMPTO(SLJIT_JUMP, mainloop);
+
+JUMPHERE(jump);
+jump = JUMP(SLJIT_C_SIG_LESS);
+/* End of dropping frames. */
+sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+
+JUMPHERE(jump);
+OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
+OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
+OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
+OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
+JUMPTO(SLJIT_JUMP, mainloop);
+}
+
+static void check_wordboundary(compiler_common *common)
+{
+DEFINE_COMPILER;
+struct sljit_jump *skipread;
+jump_list *skipread_list = NULL;
+#if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
+struct sljit_jump *jump;
+#endif
+
+SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
+
+sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
+/* Get type of the previous char, and put it to LOCALS1. */
+OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
+OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
+skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
+skip_char_back(common);
+check_start_used_ptr(common);
+read_char(common);
+
+/* Testing char type. */
+#ifdef SUPPORT_UCP
+if (common->use_ucp)
+  {
+  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
+  jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
+  add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
+  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
+  OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
+  OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
+  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
+  OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
+  OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
+  JUMPHERE(jump);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
+  }
+else
+#endif
+  {
+#ifndef COMPILE_PCRE8
+  jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
+#elif defined SUPPORT_UTF
+  /* Here LOCALS1 has already been zeroed. */
+  jump = NULL;
+  if (common->utf)
+    jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
+#endif /* COMPILE_PCRE8 */
+  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
+  OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
+  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
+#ifndef COMPILE_PCRE8
+  JUMPHERE(jump);
+#elif defined SUPPORT_UTF
+  if (jump != NULL)
+    JUMPHERE(jump);
+#endif /* COMPILE_PCRE8 */
+  }
+JUMPHERE(skipread);
+
+OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
+check_str_end(common, &skipread_list);
+peek_char(common, READ_CHAR_MAX);
+
+/* Testing char type. This is a code duplication. */
+#ifdef SUPPORT_UCP
+if (common->use_ucp)
+  {
+  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
+  jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
+  add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
+  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
+  OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
+  OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
+  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
+  OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
+  OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
+  JUMPHERE(jump);
+  }
+else
+#endif
+  {
+#ifndef COMPILE_PCRE8
+  /* TMP2 may be destroyed by peek_char. */
+  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
+  jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
+#elif defined SUPPORT_UTF
+  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
+  jump = NULL;
+  if (common->utf)
+    jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
+#endif
+  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
+  OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
+  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
+#ifndef COMPILE_PCRE8
+  JUMPHERE(jump);
+#elif defined SUPPORT_UTF
+  if (jump != NULL)
+    JUMPHERE(jump);
+#endif /* COMPILE_PCRE8 */
+  }
+set_jumps(skipread_list, LABEL());
+
+OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
+sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
+}
+
+static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
+{
+DEFINE_COMPILER;
+int ranges[MAX_RANGE_SIZE];
+pcre_uint8 bit, cbit, all;
+int i, byte, length = 0;
+
+bit = bits[0] & 0x1;
+/* All bits will be zero or one (since bit is zero or one). */
+all = -bit;
+
+for (i = 0; i < 256; )
+  {
+  byte = i >> 3;
+  if ((i & 0x7) == 0 && bits[byte] == all)
+    i += 8;
+  else
+    {
+    cbit = (bits[byte] >> (i & 0x7)) & 0x1;
+    if (cbit != bit)
+      {
+      if (length >= MAX_RANGE_SIZE)
+        return FALSE;
+      ranges[length] = i;
+      length++;
+      bit = cbit;
+      all = -cbit;
+      }
+    i++;
+    }
+  }
+
+if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
+  {
+  if (length >= MAX_RANGE_SIZE)
+    return FALSE;
+  ranges[length] = 256;
+  length++;
+  }
+
+if (length < 0 || length > 4)
+  return FALSE;
+
+bit = bits[0] & 0x1;
+if (invert) bit ^= 0x1;
+
+/* No character is accepted. */
+if (length == 0 && bit == 0)
+  add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
+
+switch(length)
+  {
+  case 0:
+  /* When bit != 0, all characters are accepted. */
+  return TRUE;
+
+  case 1:
+  add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
+  return TRUE;
+
+  case 2:
+  if (ranges[0] + 1 != ranges[1])
+    {
+    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
+    add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
+    }
+  else
+    add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
+  return TRUE;
+
+  case 3:
+  if (bit != 0)
+    {
+    add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
+    if (ranges[0] + 1 != ranges[1])
+      {
+      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
+      add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
+      }
+    else
+      add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
+    return TRUE;
+    }
+
+  add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
+  if (ranges[1] + 1 != ranges[2])
+    {
+    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
+    add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
+    }
+  else
+    add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
+  return TRUE;
+
+  case 4:
+  if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
+      && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
+      && is_powerof2(ranges[2] - ranges[0]))
+    {
+    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
+    if (ranges[2] + 1 != ranges[3])
+      {
+      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
+      add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
+      }
+    else
+      add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
+    return TRUE;
+    }
+
+  if (bit != 0)
+    {
+    i = 0;
+    if (ranges[0] + 1 != ranges[1])
+      {
+      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
+      add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
+      i = ranges[0];
+      }
+    else
+      add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
+
+    if (ranges[2] + 1 != ranges[3])
+      {
+      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
+      add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
+      }
+    else
+      add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
+    return TRUE;
+    }
+
+  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
+  add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
+  if (ranges[1] + 1 != ranges[2])
+    {
+    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
+    add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
+    }
+  else
+    add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
+  return TRUE;
+
+  default:
+  SLJIT_ASSERT_STOP();
+  return FALSE;
+  }
+}
+
+static void check_anynewline(compiler_common *common)
+{
+/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
+DEFINE_COMPILER;
+
+sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
+
+OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
+OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
+OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
+OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
+#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+#ifdef COMPILE_PCRE8
+if (common->utf)
+  {
+#endif
+  OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
+  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
+  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
+#ifdef COMPILE_PCRE8
+  }
+#endif
+#endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
+OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
+sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+}
+
+static void check_hspace(compiler_common *common)
+{
+/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
+DEFINE_COMPILER;
+
+sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
+
+OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
+OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
+OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
+OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
+OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
+#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+#ifdef COMPILE_PCRE8
+if (common->utf)
+  {
+#endif
+  OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
+  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
+  OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
+  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
+  OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
+  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
+  OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
+  OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
+  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
+  OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
+  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
+  OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
+  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
+#ifdef COMPILE_PCRE8
+  }
+#endif
+#endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
+OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
+
+sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+}
+
+static void check_vspace(compiler_common *common)
+{
+/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
+DEFINE_COMPILER;
+
+sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
+
+OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
+OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
+OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
+OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
+#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+#ifdef COMPILE_PCRE8
+if (common->utf)
+  {
+#endif
+  OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
+  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
+  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
+#ifdef COMPILE_PCRE8
+  }
+#endif
+#endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
+OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
+
+sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+}
+
+#define CHAR1 STR_END
+#define CHAR2 STACK_TOP
+
+static void do_casefulcmp(compiler_common *common)
+{
+DEFINE_COMPILER;
+struct sljit_jump *jump;
+struct sljit_label *label;
+
+sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
+OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
+OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
+OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
+OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+
+label = LABEL();
+OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
+OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
+jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
+OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
+JUMPTO(SLJIT_C_NOT_ZERO, label);
+
+JUMPHERE(jump);
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
+OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
+sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+}
+
+#define LCC_TABLE STACK_LIMIT
+
+static void do_caselesscmp(compiler_common *common)
+{
+DEFINE_COMPILER;
+struct sljit_jump *jump;
+struct sljit_label *label;
+
+sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
+OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
+
+OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
+OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
+OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
+OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+
+label = LABEL();
+OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
+OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
+#ifndef COMPILE_PCRE8
+jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
+#endif
+OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
+#ifndef COMPILE_PCRE8
+JUMPHERE(jump);
+jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
+#endif
+OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
+#ifndef COMPILE_PCRE8
+JUMPHERE(jump);
+#endif
+jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
+OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
+JUMPTO(SLJIT_C_NOT_ZERO, label);
+
+JUMPHERE(jump);
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
+OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
+OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
+sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+}
+
+#undef LCC_TABLE
+#undef CHAR1
+#undef CHAR2
+
+#if defined SUPPORT_UTF && defined SUPPORT_UCP
+
+static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
+{
+/* This function would be ineffective to do in JIT level. */
+pcre_uint32 c1, c2;
+const pcre_uchar *src2 = args->uchar_ptr;
+const pcre_uchar *end2 = args->end;
+const ucd_record *ur;
+const pcre_uint32 *pp;
+
+while (src1 < end1)
+  {
+  if (src2 >= end2)
+    return (pcre_uchar*)1;
+  GETCHARINC(c1, src1);
+  GETCHARINC(c2, src2);
+  ur = GET_UCD(c2);
+  if (c1 != c2 && c1 != c2 + ur->other_case)
+    {
+    pp = PRIV(ucd_caseless_sets) + ur->caseset;
+    for (;;)
+      {
+      if (c1 < *pp) return NULL;
+      if (c1 == *pp++) break;
+      }
+    }
+  }
+return src2;
+}
+
+#endif /* SUPPORT_UTF && SUPPORT_UCP */
+
+static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
+    compare_context* context, jump_list **backtracks)
+{
+DEFINE_COMPILER;
+unsigned int othercasebit = 0;
+pcre_uchar *othercasechar = NULL;
+#ifdef SUPPORT_UTF
+int utflength;
+#endif
+
+if (caseless && char_has_othercase(common, cc))
+  {
+  othercasebit = char_get_othercase_bit(common, cc);
+  SLJIT_ASSERT(othercasebit);
+  /* Extracting bit difference info. */
+#if defined COMPILE_PCRE8
+  othercasechar = cc + (othercasebit >> 8);
+  othercasebit &= 0xff;
+#elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+  /* Note that this code only handles characters in the BMP. If there
+  ever are characters outside the BMP whose othercase differs in only one
+  bit from itself (there currently are none), this code will need to be
+  revised for COMPILE_PCRE32. */
+  othercasechar = cc + (othercasebit >> 9);
+  if ((othercasebit & 0x100) != 0)
+    othercasebit = (othercasebit & 0xff) << 8;
+  else
+    othercasebit &= 0xff;
+#endif /* COMPILE_PCRE[8|16|32] */
+  }
+
+if (context->sourcereg == -1)
+  {
+#if defined COMPILE_PCRE8
+#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
+  if (context->length >= 4)
+    OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
+  else if (context->length >= 2)
+    OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
+  else
+#endif
+    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
+#elif defined COMPILE_PCRE16
+#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
+  if (context->length >= 4)
+    OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
+  else
+#endif
+    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
+#elif defined COMPILE_PCRE32
+  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
+#endif /* COMPILE_PCRE[8|16|32] */
+  context->sourcereg = TMP2;
+  }
+
+#ifdef SUPPORT_UTF
+utflength = 1;
+if (common->utf && HAS_EXTRALEN(*cc))
+  utflength += GET_EXTRALEN(*cc);
+
+do
+  {
+#endif
+
+  context->length -= IN_UCHARS(1);
+#if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
+
+  /* Unaligned read is supported. */
+  if (othercasebit != 0 && othercasechar == cc)
+    {
+    context->c.asuchars[context->ucharptr] = *cc | othercasebit;
+    context->oc.asuchars[context->ucharptr] = othercasebit;
+    }
+  else
+    {
+    context->c.asuchars[context->ucharptr] = *cc;
+    context->oc.asuchars[context->ucharptr] = 0;
+    }
+  context->ucharptr++;
+
+#if defined COMPILE_PCRE8
+  if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
+#else
+  if (context->ucharptr >= 2 || context->length == 0)
+#endif
+    {
+    if (context->length >= 4)
+      OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
+    else if (context->length >= 2)
+      OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
+#if defined COMPILE_PCRE8
+    else if (context->length >= 1)
+      OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
+#endif /* COMPILE_PCRE8 */
+    context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
+
+    switch(context->ucharptr)
+      {
+      case 4 / sizeof(pcre_uchar):
+      if (context->oc.asint != 0)
+        OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
+      add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
+      break;
+
+      case 2 / sizeof(pcre_uchar):
+      if (context->oc.asushort != 0)
+        OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
+      add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
+      break;
+
+#ifdef COMPILE_PCRE8
+      case 1:
+      if (context->oc.asbyte != 0)
+        OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
+      add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
+      break;
+#endif
+
+      default:
+      SLJIT_ASSERT_STOP();
+      break;
+      }
+    context->ucharptr = 0;
+    }
+
+#else
+
+  /* Unaligned read is unsupported or in 32 bit mode. */
+  if (context->length >= 1)
+    OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
+
+  context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
+
+  if (othercasebit != 0 && othercasechar == cc)
+    {
+    OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
+    add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
+    }
+  else
+    add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
+
+#endif
+
+  cc++;
+#ifdef SUPPORT_UTF
+  utflength--;
+  }
+while (utflength > 0);
+#endif
+
+return cc;
+}
+
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+
+#define SET_TYPE_OFFSET(value) \
+  if ((value) != typeoffset) \
+    { \
+    if ((value) < typeoffset) \
+      OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
+    else \
+      OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
+    } \
+  typeoffset = (value);
+
+#define SET_CHAR_OFFSET(value) \
+  if ((value) != charoffset) \
+    { \
+    if ((value) < charoffset) \
+      OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
+    else \
+      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
+    } \
+  charoffset = (value);
+
+static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
+{
+DEFINE_COMPILER;
+jump_list *found = NULL;
+jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
+sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
+struct sljit_jump *jump = NULL;
+pcre_uchar *ccbegin;
+int compares, invertcmp, numberofcmps;
+#if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
+BOOL utf = common->utf;
+#endif
+
+#ifdef SUPPORT_UCP
+BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
+BOOL charsaved = FALSE;
+int typereg = TMP1, scriptreg = TMP1;
+const pcre_uint32 *other_cases;
+sljit_uw typeoffset;
+#endif
+
+/* Scanning the necessary info. */
+cc++;
+ccbegin = cc;
+compares = 0;
+if (cc[-1] & XCL_MAP)
+  {
+  min = 0;
+  cc += 32 / sizeof(pcre_uchar);
+  }
+
+while (*cc != XCL_END)
+  {
+  compares++;
+  if (*cc == XCL_SINGLE)
+    {
+    cc ++;
+    GETCHARINCTEST(c, cc);
+    if (c > max) max = c;
+    if (c < min) min = c;
+#ifdef SUPPORT_UCP
+    needschar = TRUE;
+#endif
+    }
+  else if (*cc == XCL_RANGE)
+    {
+    cc ++;
+    GETCHARINCTEST(c, cc);
+    if (c < min) min = c;
+    GETCHARINCTEST(c, cc);
+    if (c > max) max = c;
+#ifdef SUPPORT_UCP
+    needschar = TRUE;
+#endif
+    }
+#ifdef SUPPORT_UCP
+  else
+    {
+    SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
+    cc++;
+    if (*cc == PT_CLIST)
+      {
+      other_cases = PRIV(ucd_caseless_sets) + cc[1];
+      while (*other_cases != NOTACHAR)
+        {
+        if (*other_cases > max) max = *other_cases;
+        if (*other_cases < min) min = *other_cases;
+        other_cases++;
+        }
+      }
+    else
+      {
+      max = READ_CHAR_MAX;
+      min = 0;
+      }
+
+    switch(*cc)
+      {
+      case PT_ANY:
+      break;
+
+      case PT_LAMP:
+      case PT_GC:
+      case PT_PC:
+      case PT_ALNUM:
+      needstype = TRUE;
+      break;
+
+      case PT_SC:
+      needsscript = TRUE;
+      break;
+
+      case PT_SPACE:
+      case PT_PXSPACE:
+      case PT_WORD:
+      case PT_PXGRAPH:
+      case PT_PXPRINT:
+      case PT_PXPUNCT:
+      needstype = TRUE;
+      needschar = TRUE;
+      break;
+
+      case PT_CLIST:
+      case PT_UCNC:
+      needschar = TRUE;
+      break;
+
+      default:
+      SLJIT_ASSERT_STOP();
+      break;
+      }
+    cc += 2;
+    }
+#endif
+  }
+
+/* We are not necessary in utf mode even in 8 bit mode. */
+cc = ccbegin;
+detect_partial_match(common, backtracks);
+read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
+
+if ((cc[-1] & XCL_HASPROP) == 0)
+  {
+  if ((cc[-1] & XCL_MAP) != 0)
+    {
+    jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
+    if (!check_class_ranges(common, (const pcre_uint8 *)cc, (((const pcre_uint8 *)cc)[31] & 0x80) != 0, TRUE, &found))
+      {
+      OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
+      OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
+      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
+      OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
+      OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
+      add_jump(compiler, &found, JUMP(SLJIT_C_NOT_ZERO));
+      }
+
+    add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
+    JUMPHERE(jump);
+
+    cc += 32 / sizeof(pcre_uchar);
+    }
+  else
+    {
+    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
+    add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, max - min));
+    }
+  }
+else if ((cc[-1] & XCL_MAP) != 0)
+  {
+  OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
+#ifdef SUPPORT_UCP
+  charsaved = TRUE;
+#endif
+  if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
+    {
+#ifdef COMPILE_PCRE8
+    SLJIT_ASSERT(common->utf);
+#endif
+    jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
+
+    OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
+    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
+    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
+    OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
+    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
+    add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
+
+    JUMPHERE(jump);
+    }
+
+  OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
+  cc += 32 / sizeof(pcre_uchar);
+  }
+
+#ifdef SUPPORT_UCP
+/* Simple register allocation. TMP1 is preferred if possible. */
+if (needstype || needsscript)
+  {
+  if (needschar && !charsaved)
+    OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
+  add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
+  if (needschar)
+    {
+    if (needstype)
+      {
+      OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
+      typereg = RETURN_ADDR;
+      }
+
+    if (needsscript)
+      scriptreg = TMP3;
+    OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
+    }
+  else if (needstype && needsscript)
+    scriptreg = TMP3;
+  /* In all other cases only one of them was specified, and that can goes to TMP1. */
+
+  if (needsscript)
+    {
+    if (scriptreg == TMP1)
+      {
+      OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
+      OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
+      }
+    else
+      {
+      OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
+      OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
+      OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
+      }
+    }
+  }
+#endif
+
+/* Generating code. */
+charoffset = 0;
+numberofcmps = 0;
+#ifdef SUPPORT_UCP
+typeoffset = 0;
+#endif
+
+while (*cc != XCL_END)
+  {
+  compares--;
+  invertcmp = (compares == 0 && list != backtracks);
+  jump = NULL;
+
+  if (*cc == XCL_SINGLE)
+    {
+    cc ++;
+    GETCHARINCTEST(c, cc);
+
+    if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
+      {
+      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
+      OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
+      numberofcmps++;
+      }
+    else if (numberofcmps > 0)
+      {
+      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
+      OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
+      jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
+      numberofcmps = 0;
+      }
+    else
+      {
+      jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
+      numberofcmps = 0;
+      }
+    }
+  else if (*cc == XCL_RANGE)
+    {
+    cc ++;
+    GETCHARINCTEST(c, cc);
+    SET_CHAR_OFFSET(c);
+    GETCHARINCTEST(c, cc);
+
+    if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
+      {
+      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
+      OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
+      numberofcmps++;
+      }
+    else if (numberofcmps > 0)
+      {
+      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
+      OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
+      jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
+      numberofcmps = 0;
+      }
+    else
+      {
+      jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
+      numberofcmps = 0;
+      }
+    }
+#ifdef SUPPORT_UCP
+  else
+    {
+    if (*cc == XCL_NOTPROP)
+      invertcmp ^= 0x1;
+    cc++;
+    switch(*cc)
+      {
+      case PT_ANY:
+      if (list != backtracks)
+        {
+        if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
+          continue;
+        }
+      else if (cc[-1] == XCL_NOTPROP)
+        continue;
+      jump = JUMP(SLJIT_JUMP);
+      break;
+
+      case PT_LAMP:
+      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
+      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
+      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
+      OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
+      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
+      OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
+      jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
+      break;
+
+      case PT_GC:
+      c = PRIV(ucp_typerange)[(int)cc[1] * 2];
+      SET_TYPE_OFFSET(c);
+      jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
+      break;
+
+      case PT_PC:
+      jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
+      break;
+
+      case PT_SC:
+      jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
+      break;
+
+      case PT_SPACE:
+      case PT_PXSPACE:
+      SET_CHAR_OFFSET(9);
+      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
+      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
+
+      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
+      OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
+
+      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
+      OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
+
+      SET_TYPE_OFFSET(ucp_Zl);
+      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
+      OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
+      jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
+      break;
+
+      case PT_WORD:
+      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
+      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
+      /* Fall through. */
+
+      case PT_ALNUM:
+      SET_TYPE_OFFSET(ucp_Ll);
+      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
+      OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
+      SET_TYPE_OFFSET(ucp_Nd);
+      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
+      OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
+      jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
+      break;
+
+      case PT_CLIST:
+      other_cases = PRIV(ucd_caseless_sets) + cc[1];
+
+      /* At least three characters are required.
+         Otherwise this case would be handled by the normal code path. */
+      SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
+      SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
+
+      /* Optimizing character pairs, if their difference is power of 2. */
+      if (is_powerof2(other_cases[1] ^ other_cases[0]))
+        {
+        if (charoffset == 0)
+          OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
+        else
+          {
+          OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
+          OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
+          }
+        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
+        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
+        other_cases += 2;
+        }
+      else if (is_powerof2(other_cases[2] ^ other_cases[1]))
+        {
+        if (charoffset == 0)
+          OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
+        else
+          {
+          OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
+          OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
+          }
+        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
+        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
+
+        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
+        OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
+
+        other_cases += 3;
+        }
+      else
+        {
+        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
+        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
+        }
+
+      while (*other_cases != NOTACHAR)
+        {
+        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
+        OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
+        }
+      jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
+      break;
+
+      case PT_UCNC:
+      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
+      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
+      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
+      OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
+      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
+      OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
+
+      SET_CHAR_OFFSET(0xa0);
+      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
+      OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
+      SET_CHAR_OFFSET(0);
+      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
+      OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);
+      jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
+      break;
+
+      case PT_PXGRAPH:
+      /* C and Z groups are the farthest two groups. */
+      SET_TYPE_OFFSET(ucp_Ll);
+      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
+      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
+
+      jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
+
+      /* In case of ucp_Cf, we overwrite the result. */
+      SET_CHAR_OFFSET(0x2066);
+      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
+      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
+
+      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
+      OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
+
+      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
+      OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
+
+      JUMPHERE(jump);
+      jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
+      break;
+
+      case PT_PXPRINT:
+      /* C and Z groups are the farthest two groups. */
+      SET_TYPE_OFFSET(ucp_Ll);
+      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
+      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
+
+      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
+      OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
+
+      jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
+
+      /* In case of ucp_Cf, we overwrite the result. */
+      SET_CHAR_OFFSET(0x2066);
+      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
+      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
+
+      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
+      OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
+
+      JUMPHERE(jump);
+      jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
+      break;
+
+      case PT_PXPUNCT:
+      SET_TYPE_OFFSET(ucp_Sc);
+      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
+      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
+
+      SET_CHAR_OFFSET(0);
+      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xff);
+      OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
+
+      SET_TYPE_OFFSET(ucp_Pc);
+      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
+      OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
+      jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
+      break;
+      }
+    cc += 2;
+    }
+#endif
+
+  if (jump != NULL)
+    add_jump(compiler, compares > 0 ? list : backtracks, jump);
+  }
+
+if (found != NULL)
+  set_jumps(found, LABEL());
+}
+
+#undef SET_TYPE_OFFSET
+#undef SET_CHAR_OFFSET
+
+#endif
+
+static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
+{
+DEFINE_COMPILER;
+int length;
+unsigned int c, oc, bit;
+compare_context context;
+struct sljit_jump *jump[4];
+jump_list *end_list;
+#ifdef SUPPORT_UTF
+struct sljit_label *label;
+#ifdef SUPPORT_UCP
+pcre_uchar propdata[5];
+#endif
+#endif /* SUPPORT_UTF */
+
+switch(type)
+  {
+  case OP_SOD:
+  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
+  add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
+  return cc;
+
+  case OP_SOM:
+  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
+  add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
+  return cc;
+
+  case OP_NOT_WORD_BOUNDARY:
+  case OP_WORD_BOUNDARY:
+  add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
+  add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
+  return cc;
+
+  case OP_NOT_DIGIT:
+  case OP_DIGIT:
+  /* Digits are usually 0-9, so it is worth to optimize them. */
+  detect_partial_match(common, backtracks);
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+  if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_digit, FALSE))
+    read_char7_type(common, type == OP_NOT_DIGIT);
+  else
+#endif
+    read_char8_type(common, type == OP_NOT_DIGIT);
+    /* Flip the starting bit in the negative case. */
+  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
+  add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
+  return cc;
+
+  case OP_NOT_WHITESPACE:
+  case OP_WHITESPACE:
+  detect_partial_match(common, backtracks);
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+  if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_space, FALSE))
+    read_char7_type(common, type == OP_NOT_WHITESPACE);
+  else
+#endif
+    read_char8_type(common, type == OP_NOT_WHITESPACE);
+  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
+  add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
+  return cc;
+
+  case OP_NOT_WORDCHAR:
+  case OP_WORDCHAR:
+  detect_partial_match(common, backtracks);
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+  if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_word, FALSE))
+    read_char7_type(common, type == OP_NOT_WORDCHAR);
+  else
+#endif
+    read_char8_type(common, type == OP_NOT_WORDCHAR);
+  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
+  add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
+  return cc;
+
+  case OP_ANY:
+  detect_partial_match(common, backtracks);
+  read_char_range(common, common->nlmin, common->nlmax, TRUE);
+  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
+    {
+    jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
+    end_list = NULL;
+    if (common->mode != JIT_PARTIAL_HARD_COMPILE)
+      add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
+    else
+      check_str_end(common, &end_list);
+
+    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
+    add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
+    set_jumps(end_list, LABEL());
+    JUMPHERE(jump[0]);
+    }
+  else
+    check_newlinechar(common, common->nltype, backtracks, TRUE);
+  return cc;
+
+  case OP_ALLANY:
+  detect_partial_match(common, backtracks);
+#ifdef SUPPORT_UTF
+  if (common->utf)
+    {
+    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
+    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+#if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
+#if defined COMPILE_PCRE8
+    jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
+    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
+    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
+#elif defined COMPILE_PCRE16
+    jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
+    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
+    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
+    OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
+    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
+    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
+#endif
+    JUMPHERE(jump[0]);
+#endif /* COMPILE_PCRE[8|16] */
+    return cc;
+    }
+#endif
+  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+  return cc;
+
+  case OP_ANYBYTE:
+  detect_partial_match(common, backtracks);
+  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+  return cc;
+
+#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UCP
+  case OP_NOTPROP:
+  case OP_PROP:
+  propdata[0] = XCL_HASPROP;
+  propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
+  propdata[2] = cc[0];
+  propdata[3] = cc[1];
+  propdata[4] = XCL_END;
+  compile_xclass_matchingpath(common, propdata, backtracks);
+  return cc + 2;
+#endif
+#endif
+
+  case OP_ANYNL:
+  detect_partial_match(common, backtracks);
+  read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
+  jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
+  /* We don't need to handle soft partial matching case. */
+  end_list = NULL;
+  if (common->mode != JIT_PARTIAL_HARD_COMPILE)
+    add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
+  else
+    check_str_end(common, &end_list);
+  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
+  jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
+  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+  jump[2] = JUMP(SLJIT_JUMP);
+  JUMPHERE(jump[0]);
+  check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
+  set_jumps(end_list, LABEL());
+  JUMPHERE(jump[1]);
+  JUMPHERE(jump[2]);
+  return cc;
+
+  case OP_NOT_HSPACE:
+  case OP_HSPACE:
+  detect_partial_match(common, backtracks);
+  read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
+  add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
+  add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
+  return cc;
+
+  case OP_NOT_VSPACE:
+  case OP_VSPACE:
+  detect_partial_match(common, backtracks);
+  read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
+  add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
+  add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
+  return cc;
+
+#ifdef SUPPORT_UCP
+  case OP_EXTUNI:
+  detect_partial_match(common, backtracks);
+  read_char(common);
+  add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
+  /* Optimize register allocation: use a real register. */
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
+  OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
+
+  label = LABEL();
+  jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
+  OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
+  read_char(common);
+  add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
+  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
+
+  OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
+  OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
+  OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
+  OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
+  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
+  JUMPTO(SLJIT_C_NOT_ZERO, label);
+
+  OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
+  JUMPHERE(jump[0]);
+  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
+
+  if (common->mode == JIT_PARTIAL_HARD_COMPILE)
+    {
+    jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
+    /* Since we successfully read a char above, partial matching must occure. */
+    check_partial(common, TRUE);
+    JUMPHERE(jump[0]);
+    }
+  return cc;
+#endif
+
+  case OP_EODN:
+  /* Requires rather complex checks. */
+  jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
+  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
+    {
+    OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
+    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
+    if (common->mode == JIT_COMPILE)
+      add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
+    else
+      {
+      jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
+      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
+      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS);
+      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
+      OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
+      add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
+      check_partial(common, TRUE);
+      add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
+      JUMPHERE(jump[1]);
+      }
+    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
+    add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
+    add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
+    }
+  else if (common->nltype == NLTYPE_FIXED)
+    {
+    OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
+    add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
+    add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
+    }
+  else
+    {
+    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
+    jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
+    OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
+    OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
+    jump[2] = JUMP(SLJIT_C_GREATER);
+    add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
+    /* Equal. */
+    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
+    jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
+    add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
+
+    JUMPHERE(jump[1]);
+    if (common->nltype == NLTYPE_ANYCRLF)
+      {
+      OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+      add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
+      add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
+      }
+    else
+      {
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
+      read_char_range(common, common->nlmin, common->nlmax, TRUE);
+      add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
+      add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
+      add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
+      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
+      }
+    JUMPHERE(jump[2]);
+    JUMPHERE(jump[3]);
+    }
+  JUMPHERE(jump[0]);
+  check_partial(common, FALSE);
+  return cc;
+
+  case OP_EOD:
+  add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
+  check_partial(common, FALSE);
+  return cc;
+
+  case OP_CIRC:
+  OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
+  add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
+  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
+  add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
+  return cc;
+
+  case OP_CIRCM:
+  OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
+  jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
+  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
+  add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
+  jump[0] = JUMP(SLJIT_JUMP);
+  JUMPHERE(jump[1]);
+
+  add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
+  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
+    {
+    OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
+    add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
+    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
+    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
+    add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
+    add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
+    }
+  else
+    {
+    skip_char_back(common);
+    read_char_range(common, common->nlmin, common->nlmax, TRUE);
+    check_newlinechar(common, common->nltype, backtracks, FALSE);
+    }
+  JUMPHERE(jump[0]);
+  return cc;
+
+  case OP_DOLL:
+  OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
+  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
+  add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
+
+  if (!common->endonly)
+    compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
+  else
+    {
+    add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
+    check_partial(common, FALSE);
+    }
+  return cc;
+
+  case OP_DOLLM:
+  jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
+  OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
+  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
+  add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
+  check_partial(common, FALSE);
+  jump[0] = JUMP(SLJIT_JUMP);
+  JUMPHERE(jump[1]);
+
+  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
+    {
+    OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
+    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
+    if (common->mode == JIT_COMPILE)
+      add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
+    else
+      {
+      jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
+      /* STR_PTR = STR_END - IN_UCHARS(1) */
+      add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
+      check_partial(common, TRUE);
+      add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
+      JUMPHERE(jump[1]);
+      }
+
+    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
+    add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
+    add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
+    }
+  else
+    {
+    peek_char(common, common->nlmax);
+    check_newlinechar(common, common->nltype, backtracks, FALSE);
+    }
+  JUMPHERE(jump[0]);
+  return cc;
+
+  case OP_CHAR:
+  case OP_CHARI:
+  length = 1;
+#ifdef SUPPORT_UTF
+  if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
+#endif
+  if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
+    {
+    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
+    add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
+
+    context.length = IN_UCHARS(length);
+    context.sourcereg = -1;
+#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
+    context.ucharptr = 0;
+#endif
+    return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
+    }
+
+  detect_partial_match(common, backtracks);
+#ifdef SUPPORT_UTF
+  if (common->utf)
+    {
+    GETCHAR(c, cc);
+    }
+  else
+#endif
+    c = *cc;
+
+  if (type == OP_CHAR || !char_has_othercase(common, cc))
+    {
+    read_char_range(common, c, c, FALSE);
+    add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
+    return cc + length;
+    }
+  oc = char_othercase(common, c);
+  read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
+  bit = c ^ oc;
+  if (is_powerof2(bit))
+    {
+    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
+    add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
+    return cc + length;
+    }
+  jump[0] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c);
+  add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
+  JUMPHERE(jump[0]);
+  return cc + length;
+
+  case OP_NOT:
+  case OP_NOTI:
+  detect_partial_match(common, backtracks);
+  length = 1;
+#ifdef SUPPORT_UTF
+  if (common->utf)
+    {
+#ifdef COMPILE_PCRE8
+    c = *cc;
+    if (c < 128)
+      {
+      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
+      if (type == OP_NOT || !char_has_othercase(common, cc))
+        add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
+      else
+        {
+        /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
+        OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
+        add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
+        }
+      /* Skip the variable-length character. */
+      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+      jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
+      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
+      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
+      JUMPHERE(jump[0]);
+      return cc + 1;
+      }
+    else
+#endif /* COMPILE_PCRE8 */
+      {
+      GETCHARLEN(c, cc, length);
+      }
+    }
+  else
+#endif /* SUPPORT_UTF */
+    c = *cc;
+
+  if (type == OP_NOT || !char_has_othercase(common, cc))
+    {
+    read_char_range(common, c, c, TRUE);
+    add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
+    }
+  else
+    {
+    oc = char_othercase(common, c);
+    read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
+    bit = c ^ oc;
+    if (is_powerof2(bit))
+      {
+      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
+      add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
+      }
+    else
+      {
+      add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
+      add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
+      }
+    }
+  return cc + length;
+
+  case OP_CLASS:
+  case OP_NCLASS:
+  detect_partial_match(common, backtracks);
+
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+  bit = (common->utf && is_char7_bitset((const pcre_uint8 *)cc, type == OP_NCLASS)) ? 127 : 255;
+  read_char_range(common, 0, bit, type == OP_NCLASS);
+#else
+  read_char_range(common, 0, 255, type == OP_NCLASS);
+#endif
+
+  if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks))
+    return cc + 32 / sizeof(pcre_uchar);
+
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+  jump[0] = NULL;
+  if (common->utf)
+    {
+    jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, bit);
+    if (type == OP_CLASS)
+      {
+      add_jump(compiler, backtracks, jump[0]);
+      jump[0] = NULL;
+      }
+    }
+#elif !defined COMPILE_PCRE8
+  jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
+  if (type == OP_CLASS)
+    {
+    add_jump(compiler, backtracks, jump[0]);
+    jump[0] = NULL;
+    }
+#endif /* SUPPORT_UTF && COMPILE_PCRE8 */
+
+  OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
+  OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
+  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
+  OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
+  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
+  add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
+
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+  if (jump[0] != NULL)
+    JUMPHERE(jump[0]);
+#endif
+
+  return cc + 32 / sizeof(pcre_uchar);
+
+#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+  case OP_XCLASS:
+  compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
+  return cc + GET(cc, 0) - 1;
+#endif
+
+  case OP_REVERSE:
+  length = GET(cc, 0);
+  if (length == 0)
+    return cc + LINK_SIZE;
+  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
+#ifdef SUPPORT_UTF
+  if (common->utf)
+    {
+    OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
+    label = LABEL();
+    add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
+    skip_char_back(common);
+    OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
+    JUMPTO(SLJIT_C_NOT_ZERO, label);
+    }
+  else
+#endif
+    {
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
+    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
+    add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
+    }
+  check_start_used_ptr(common);
+  return cc + LINK_SIZE;
+  }
+SLJIT_ASSERT_STOP();
+return cc;
+}
+
+static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
+{
+/* This function consumes at least one input character. */
+/* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
+DEFINE_COMPILER;
+pcre_uchar *ccbegin = cc;
+compare_context context;
+int size;
+
+context.length = 0;
+do
+  {
+  if (cc >= ccend)
+    break;
+
+  if (*cc == OP_CHAR)
+    {
+    size = 1;
+#ifdef SUPPORT_UTF
+    if (common->utf && HAS_EXTRALEN(cc[1]))
+      size += GET_EXTRALEN(cc[1]);
+#endif
+    }
+  else if (*cc == OP_CHARI)
+    {
+    size = 1;
+#ifdef SUPPORT_UTF
+    if (common->utf)
+      {
+      if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
+        size = 0;
+      else if (HAS_EXTRALEN(cc[1]))
+        size += GET_EXTRALEN(cc[1]);
+      }
+    else
+#endif
+    if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
+      size = 0;
+    }
+  else
+    size = 0;
+
+  cc += 1 + size;
+  context.length += IN_UCHARS(size);
+  }
+while (size > 0 && context.length <= 128);
+
+cc = ccbegin;
+if (context.length > 0)
+  {
+  /* We have a fixed-length byte sequence. */
+  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
+  add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
+
+  context.sourcereg = -1;
+#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
+  context.ucharptr = 0;
+#endif
+  do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
+  return cc;
+  }
+
+/* A non-fixed length character will be checked if length == 0. */
+return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
+}
+
+/* Forward definitions. */
+static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
+static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
+
+#define PUSH_BACKTRACK(size, ccstart, error) \
+  do \
+    { \
+    backtrack = sljit_alloc_memory(compiler, (size)); \
+    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
+      return error; \
+    memset(backtrack, 0, size); \
+    backtrack->prev = parent->top; \
+    backtrack->cc = (ccstart); \
+    parent->top = backtrack; \
+    } \
+  while (0)
+
+#define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
+  do \
+    { \
+    backtrack = sljit_alloc_memory(compiler, (size)); \
+    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
+      return; \
+    memset(backtrack, 0, size); \
+    backtrack->prev = parent->top; \
+    backtrack->cc = (ccstart); \
+    parent->top = backtrack; \
+    } \
+  while (0)
+
+#define BACKTRACK_AS(type) ((type *)backtrack)
+
+static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
+{
+/* The OVECTOR offset goes to TMP2. */
+DEFINE_COMPILER;
+int count = GET2(cc, 1 + IMM2_SIZE);
+pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
+unsigned int offset;
+jump_list *found = NULL;
+
+SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
+
+OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
+
+count--;
+while (count-- > 0)
+  {
+  offset = GET2(slot, 0) << 1;
+  GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
+  add_jump(compiler, &found, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
+  slot += common->name_entry_size;
+  }
+
+offset = GET2(slot, 0) << 1;
+GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
+if (backtracks != NULL && !common->jscript_compat)
+  add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
+
+set_jumps(found, LABEL());
+}
+
+static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
+{
+DEFINE_COMPILER;
+BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
+int offset = 0;
+struct sljit_jump *jump = NULL;
+struct sljit_jump *partial;
+struct sljit_jump *nopartial;
+
+if (ref)
+  {
+  offset = GET2(cc, 1) << 1;
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
+  /* OVECTOR(1) contains the "string begin - 1" constant. */
+  if (withchecks && !common->jscript_compat)
+    add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
+  }
+else
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
+
+#if defined SUPPORT_UTF && defined SUPPORT_UCP
+if (common->utf && *cc == OP_REFI)
+  {
+  SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
+  if (ref)
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
+  else
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
+
+  if (withchecks)
+    jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
+
+  /* Needed to save important temporary registers. */
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
+  OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
+  sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
+  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
+  if (common->mode == JIT_COMPILE)
+    add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
+  else
+    {
+    add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
+    nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
+    check_partial(common, FALSE);
+    add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
+    JUMPHERE(nopartial);
+    }
+  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
+  }
+else
+#endif /* SUPPORT_UTF && SUPPORT_UCP */
+  {
+  if (ref)
+    OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
+  else
+    OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
+
+  if (withchecks)
+    jump = JUMP(SLJIT_C_ZERO);
+
+  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
+  partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
+  if (common->mode == JIT_COMPILE)
+    add_jump(compiler, backtracks, partial);
+
+  add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
+  add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
+
+  if (common->mode != JIT_COMPILE)
+    {
+    nopartial = JUMP(SLJIT_JUMP);
+    JUMPHERE(partial);
+    /* TMP2 -= STR_END - STR_PTR */
+    OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
+    OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
+    partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
+    OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
+    add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
+    add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
+    JUMPHERE(partial);
+    check_partial(common, FALSE);
+    add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
+    JUMPHERE(nopartial);
+    }
+  }
+
+if (jump != NULL)
+  {
+  if (emptyfail)
+    add_jump(compiler, backtracks, jump);
+  else
+    JUMPHERE(jump);
+  }
+}
+
+static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
+{
+DEFINE_COMPILER;
+BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
+backtrack_common *backtrack;
+pcre_uchar type;
+int offset = 0;
+struct sljit_label *label;
+struct sljit_jump *zerolength;
+struct sljit_jump *jump = NULL;
+pcre_uchar *ccbegin = cc;
+int min = 0, max = 0;
+BOOL minimize;
+
+PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
+
+if (ref)
+  offset = GET2(cc, 1) << 1;
+else
+  cc += IMM2_SIZE;
+type = cc[1 + IMM2_SIZE];
+
+SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
+minimize = (type & 0x1) != 0;
+switch(type)
+  {
+  case OP_CRSTAR:
+  case OP_CRMINSTAR:
+  min = 0;
+  max = 0;
+  cc += 1 + IMM2_SIZE + 1;
+  break;
+  case OP_CRPLUS:
+  case OP_CRMINPLUS:
+  min = 1;
+  max = 0;
+  cc += 1 + IMM2_SIZE + 1;
+  break;
+  case OP_CRQUERY:
+  case OP_CRMINQUERY:
+  min = 0;
+  max = 1;
+  cc += 1 + IMM2_SIZE + 1;
+  break;
+  case OP_CRRANGE:
+  case OP_CRMINRANGE:
+  min = GET2(cc, 1 + IMM2_SIZE + 1);
+  max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
+  cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
+  break;
+  default:
+  SLJIT_ASSERT_STOP();
+  break;
+  }
+
+if (!minimize)
+  {
+  if (min == 0)
+    {
+    allocate_stack(common, 2);
+    if (ref)
+      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
+    /* Temporary release of STR_PTR. */
+    OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
+    /* Handles both invalid and empty cases. Since the minimum repeat,
+    is zero the invalid case is basically the same as an empty case. */
+    if (ref)
+      zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
+    else
+      {
+      compile_dnref_search(common, ccbegin, NULL);
+      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
+      zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
+      }
+    /* Restore if not zero length. */
+    OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
+    }
+  else
+    {
+    allocate_stack(common, 1);
+    if (ref)
+      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
+    if (ref)
+      {
+      add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
+      zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
+      }
+    else
+      {
+      compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
+      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
+      zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
+      }
+    }
+
+  if (min > 1 || max > 1)
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
+
+  label = LABEL();
+  if (!ref)
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
+  compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
+
+  if (min > 1 || max > 1)
+    {
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
+    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
+    if (min > 1)
+      CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
+    if (max > 1)
+      {
+      jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
+      allocate_stack(common, 1);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
+      JUMPTO(SLJIT_JUMP, label);
+      JUMPHERE(jump);
+      }
+    }
+
+  if (max == 0)
+    {
+    /* Includes min > 1 case as well. */
+    allocate_stack(common, 1);
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
+    JUMPTO(SLJIT_JUMP, label);
+    }
+
+  JUMPHERE(zerolength);
+  BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
+
+  count_match(common);
+  return cc;
+  }
+
+allocate_stack(common, ref ? 2 : 3);
+if (ref)
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
+OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
+if (type != OP_CRMINSTAR)
+  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
+
+if (min == 0)
+  {
+  /* Handles both invalid and empty cases. Since the minimum repeat,
+  is zero the invalid case is basically the same as an empty case. */
+  if (ref)
+    zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
+  else
+    {
+    compile_dnref_search(common, ccbegin, NULL);
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
+    zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
+    }
+  /* Length is non-zero, we can match real repeats. */
+  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
+  jump = JUMP(SLJIT_JUMP);
+  }
+else
+  {
+  if (ref)
+    {
+    add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
+    zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
+    }
+  else
+    {
+    compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
+    zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
+    }
+  }
+
+BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
+if (max > 0)
+  add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
+
+if (!ref)
+  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
+compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
+OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
+
+if (min > 1)
+  {
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
+  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
+  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
+  CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
+  }
+else if (max > 0)
+  OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
+
+if (jump != NULL)
+  JUMPHERE(jump);
+JUMPHERE(zerolength);
+
+count_match(common);
+return cc;
+}
+
+static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
+{
+DEFINE_COMPILER;
+backtrack_common *backtrack;
+recurse_entry *entry = common->entries;
+recurse_entry *prev = NULL;
+sljit_sw start = GET(cc, 1);
+pcre_uchar *start_cc;
+BOOL needs_control_head;
+
+PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
+
+/* Inlining simple patterns. */
+if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
+  {
+  start_cc = common->start + start;
+  compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
+  BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
+  return cc + 1 + LINK_SIZE;
+  }
+
+while (entry != NULL)
+  {
+  if (entry->start == start)
+    break;
+  prev = entry;
+  entry = entry->next;
+  }
+
+if (entry == NULL)
+  {
+  entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
+  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
+    return NULL;
+  entry->next = NULL;
+  entry->entry = NULL;
+  entry->calls = NULL;
+  entry->start = start;
+
+  if (prev != NULL)
+    prev->next = entry;
+  else
+    common->entries = entry;
+  }
+
+if (common->has_set_som && common->mark_ptr != 0)
+  {
+  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
+  allocate_stack(common, 2);
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
+  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
+  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
+  }
+else if (common->has_set_som || common->mark_ptr != 0)
+  {
+  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
+  allocate_stack(common, 1);
+  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
+  }
+
+if (entry->entry == NULL)
+  add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
+else
+  JUMPTO(SLJIT_FAST_CALL, entry->entry);
+/* Leave if the match is failed. */
+add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
+return cc + 1 + LINK_SIZE;
+}
+
+static int SLJIT_CALL do_callout(struct jit_arguments* arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
+{
+const pcre_uchar *begin = arguments->begin;
+int *offset_vector = arguments->offsets;
+int offset_count = arguments->offset_count;
+int i;
+
+if (PUBL(callout) == NULL)
+  return 0;
+
+callout_block->version = 2;
+callout_block->callout_data = arguments->callout_data;
+
+/* Offsets in subject. */
+callout_block->subject_length = arguments->end - arguments->begin;
+callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
+callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
+#if defined COMPILE_PCRE8
+callout_block->subject = (PCRE_SPTR)begin;
+#elif defined COMPILE_PCRE16
+callout_block->subject = (PCRE_SPTR16)begin;
+#elif defined COMPILE_PCRE32
+callout_block->subject = (PCRE_SPTR32)begin;
+#endif
+
+/* Convert and copy the JIT offset vector to the offset_vector array. */
+callout_block->capture_top = 0;
+callout_block->offset_vector = offset_vector;
+for (i = 2; i < offset_count; i += 2)
+  {
+  offset_vector[i] = jit_ovector[i] - begin;
+  offset_vector[i + 1] = jit_ovector[i + 1] - begin;
+  if (jit_ovector[i] >= begin)
+    callout_block->capture_top = i;
+  }
+
+callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
+if (offset_count > 0)
+  offset_vector[0] = -1;
+if (offset_count > 1)
+  offset_vector[1] = -1;
+return (*PUBL(callout))(callout_block);
+}
+
+/* Aligning to 8 byte. */
+#define CALLOUT_ARG_SIZE \
+    (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
+
+#define CALLOUT_ARG_OFFSET(arg) \
+    (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
+
+static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
+{
+DEFINE_COMPILER;
+backtrack_common *backtrack;
+
+PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
+
+allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
+
+OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
+OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
+SLJIT_ASSERT(common->capture_last_ptr != 0);
+OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
+OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
+
+/* These pointer sized fields temporarly stores internal variables. */
+OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
+OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
+OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
+
+if (common->mark_ptr != 0)
+  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
+OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
+OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
+OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
+
+/* Needed to save important temporary registers. */
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
+OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
+GET_LOCAL_BASE(SLJIT_SCRATCH_REG3, 0, OVECTOR_START);
+sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
+OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
+OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
+free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
+
+/* Check return value. */
+OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
+add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_C_SIG_GREATER));
+if (common->forced_quit_label == NULL)
+  add_jump(compiler, &common->forced_quit, JUMP(SLJIT_C_SIG_LESS));
+else
+  JUMPTO(SLJIT_C_SIG_LESS, common->forced_quit_label);
+return cc + 2 + 2 * LINK_SIZE;
+}
+
+#undef CALLOUT_ARG_SIZE
+#undef CALLOUT_ARG_OFFSET
+
+static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
+{
+DEFINE_COMPILER;
+int framesize;
+int extrasize;
+BOOL needs_control_head;
+int private_data_ptr;
+backtrack_common altbacktrack;
+pcre_uchar *ccbegin;
+pcre_uchar opcode;
+pcre_uchar bra = OP_BRA;
+jump_list *tmp = NULL;
+jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
+jump_list **found;
+/* Saving previous accept variables. */
+BOOL save_local_exit = common->local_exit;
+BOOL save_positive_assert = common->positive_assert;
+then_trap_backtrack *save_then_trap = common->then_trap;
+struct sljit_label *save_quit_label = common->quit_label;
+struct sljit_label *save_accept_label = common->accept_label;
+jump_list *save_quit = common->quit;
+jump_list *save_positive_assert_quit = common->positive_assert_quit;
+jump_list *save_accept = common->accept;
+struct sljit_jump *jump;
+struct sljit_jump *brajump = NULL;
+
+/* Assert captures then. */
+common->then_trap = NULL;
+
+if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
+  {
+  SLJIT_ASSERT(!conditional);
+  bra = *cc;
+  cc++;
+  }
+private_data_ptr = PRIVATE_DATA(cc);
+SLJIT_ASSERT(private_data_ptr != 0);
+framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
+backtrack->framesize = framesize;
+backtrack->private_data_ptr = private_data_ptr;
+opcode = *cc;
+SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
+found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
+ccbegin = cc;
+cc += GET(cc, 1);
+
+if (bra == OP_BRAMINZERO)
+  {
+  /* This is a braminzero backtrack path. */
+  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+  free_stack(common, 1);
+  brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
+  }
+
+if (framesize < 0)
+  {
+  extrasize = needs_control_head ? 2 : 1;
+  if (framesize == no_frame)
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
+  allocate_stack(common, extrasize);
+  if (needs_control_head)
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
+  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
+  if (needs_control_head)
+    {
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
+    }
+  }
+else
+  {
+  extrasize = needs_control_head ? 3 : 2;
+  allocate_stack(common, framesize + extrasize);
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+  OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
+  if (needs_control_head)
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
+  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
+  if (needs_control_head)
+    {
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
+    }
+  else
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
+  init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
+  }
+
+memset(&altbacktrack, 0, sizeof(backtrack_common));
+if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
+  {
+  /* Negative assert is stronger than positive assert. */
+  common->local_exit = TRUE;
+  common->quit_label = NULL;
+  common->quit = NULL;
+  common->positive_assert = FALSE;
+  }
+else
+  common->positive_assert = TRUE;
+common->positive_assert_quit = NULL;
+
+while (1)
+  {
+  common->accept_label = NULL;
+  common->accept = NULL;
+  altbacktrack.top = NULL;
+  altbacktrack.topbacktracks = NULL;
+
+  if (*ccbegin == OP_ALT)
+    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+
+  altbacktrack.cc = ccbegin;
+  compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
+  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
+    {
+    if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
+      {
+      common->local_exit = save_local_exit;
+      common->quit_label = save_quit_label;
+      common->quit = save_quit;
+      }
+    common->positive_assert = save_positive_assert;
+    common->then_trap = save_then_trap;
+    common->accept_label = save_accept_label;
+    common->positive_assert_quit = save_positive_assert_quit;
+    common->accept = save_accept;
+    return NULL;
+    }
+  common->accept_label = LABEL();
+  if (common->accept != NULL)
+    set_jumps(common->accept, common->accept_label);
+
+  /* Reset stack. */
+  if (framesize < 0)
+    {
+    if (framesize == no_frame)
+      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+    else
+      free_stack(common, extrasize);
+    if (needs_control_head)
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
+    }
+  else
+    {
+    if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
+      {
+      /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
+      OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
+      if (needs_control_head)
+        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
+      }
+    else
+      {
+      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+      if (needs_control_head)
+        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
+      add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
+      }
+    }
+
+  if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
+    {
+    /* We know that STR_PTR was stored on the top of the stack. */
+    if (conditional)
+      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
+    else if (bra == OP_BRAZERO)
+      {
+      if (framesize < 0)
+        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
+      else
+        {
+        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
+        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
+        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
+        }
+      OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
+      }
+    else if (framesize >= 0)
+      {
+      /* For OP_BRA and OP_BRAMINZERO. */
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
+      }
+    }
+  add_jump(compiler, found, JUMP(SLJIT_JUMP));
+
+  compile_backtrackingpath(common, altbacktrack.top);
+  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
+    {
+    if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
+      {
+      common->local_exit = save_local_exit;
+      common->quit_label = save_quit_label;
+      common->quit = save_quit;
+      }
+    common->positive_assert = save_positive_assert;
+    common->then_trap = save_then_trap;
+    common->accept_label = save_accept_label;
+    common->positive_assert_quit = save_positive_assert_quit;
+    common->accept = save_accept;
+    return NULL;
+    }
+  set_jumps(altbacktrack.topbacktracks, LABEL());
+
+  if (*cc != OP_ALT)
+    break;
+
+  ccbegin = cc;
+  cc += GET(cc, 1);
+  }
+
+if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
+  {
+  SLJIT_ASSERT(common->positive_assert_quit == NULL);
+  /* Makes the check less complicated below. */
+  common->positive_assert_quit = common->quit;
+  }
+
+/* None of them matched. */
+if (common->positive_assert_quit != NULL)
+  {
+  jump = JUMP(SLJIT_JUMP);
+  set_jumps(common->positive_assert_quit, LABEL());
+  SLJIT_ASSERT(framesize != no_stack);
+  if (framesize < 0)
+    OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
+  else
+    {
+    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+    add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
+    OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
+    }
+  JUMPHERE(jump);
+  }
+
+if (needs_control_head)
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
+
+if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
+  {
+  /* Assert is failed. */
+  if (conditional || bra == OP_BRAZERO)
+    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+
+  if (framesize < 0)
+    {
+    /* The topmost item should be 0. */
+    if (bra == OP_BRAZERO)
+      {
+      if (extrasize == 2)
+        free_stack(common, 1);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
+      }
+    else
+      free_stack(common, extrasize);
+    }
+  else
+    {
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
+    /* The topmost item should be 0. */
+    if (bra == OP_BRAZERO)
+      {
+      free_stack(common, framesize + extrasize - 1);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
+      }
+    else
+      free_stack(common, framesize + extrasize);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
+    }
+  jump = JUMP(SLJIT_JUMP);
+  if (bra != OP_BRAZERO)
+    add_jump(compiler, target, jump);
+
+  /* Assert is successful. */
+  set_jumps(tmp, LABEL());
+  if (framesize < 0)
+    {
+    /* We know that STR_PTR was stored on the top of the stack. */
+    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
+    /* Keep the STR_PTR on the top of the stack. */
+    if (bra == OP_BRAZERO)
+      {
+      OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
+      if (extrasize == 2)
+        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
+      }
+    else if (bra == OP_BRAMINZERO)
+      {
+      OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
+      }
+    }
+  else
+    {
+    if (bra == OP_BRA)
+      {
+      /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
+      OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
+      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
+      }
+    else
+      {
+      /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
+      OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
+      if (extrasize == 2)
+        {
+        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+        if (bra == OP_BRAMINZERO)
+          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
+        }
+      else
+        {
+        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
+        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
+        }
+      }
+    }
+
+  if (bra == OP_BRAZERO)
+    {
+    backtrack->matchingpath = LABEL();
+    SET_LABEL(jump, backtrack->matchingpath);
+    }
+  else if (bra == OP_BRAMINZERO)
+    {
+    JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
+    JUMPHERE(brajump);
+    if (framesize >= 0)
+      {
+      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+      add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
+      }
+    set_jumps(backtrack->common.topbacktracks, LABEL());
+    }
+  }
+else
+  {
+  /* AssertNot is successful. */
+  if (framesize < 0)
+    {
+    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+    if (bra != OP_BRA)
+      {
+      if (extrasize == 2)
+        free_stack(common, 1);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
+      }
+    else
+      free_stack(common, extrasize);
+    }
+  else
+    {
+    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
+    /* The topmost item should be 0. */
+    if (bra != OP_BRA)
+      {
+      free_stack(common, framesize + extrasize - 1);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
+      }
+    else
+      free_stack(common, framesize + extrasize);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
+    }
+
+  if (bra == OP_BRAZERO)
+    backtrack->matchingpath = LABEL();
+  else if (bra == OP_BRAMINZERO)
+    {
+    JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
+    JUMPHERE(brajump);
+    }
+
+  if (bra != OP_BRA)
+    {
+    SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
+    set_jumps(backtrack->common.topbacktracks, LABEL());
+    backtrack->common.topbacktracks = NULL;
+    }
+  }
+
+if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
+  {
+  common->local_exit = save_local_exit;
+  common->quit_label = save_quit_label;
+  common->quit = save_quit;
+  }
+common->positive_assert = save_positive_assert;
+common->then_trap = save_then_trap;
+common->accept_label = save_accept_label;
+common->positive_assert_quit = save_positive_assert_quit;
+common->accept = save_accept;
+return cc + 1 + LINK_SIZE;
+}
+
+static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
+{
+DEFINE_COMPILER;
+int stacksize;
+
+if (framesize < 0)
+  {
+  if (framesize == no_frame)
+    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+  else
+    {
+    stacksize = needs_control_head ? 1 : 0;
+    if (ket != OP_KET || has_alternatives)
+      stacksize++;
+    free_stack(common, stacksize);
+    }
+
+  if (needs_control_head)
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? sizeof(sljit_sw) : 0);
+
+  /* TMP2 which is set here used by OP_KETRMAX below. */
+  if (ket == OP_KETRMAX)
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
+  else if (ket == OP_KETRMIN)
+    {
+    /* Move the STR_PTR to the private_data_ptr. */
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
+    }
+  }
+else
+  {
+  stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
+  OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
+  if (needs_control_head)
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 0);
+
+  if (ket == OP_KETRMAX)
+    {
+    /* TMP2 which is set here used by OP_KETRMAX below. */
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+    }
+  }
+if (needs_control_head)
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP1, 0);
+}
+
+static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
+{
+DEFINE_COMPILER;
+
+if (common->capture_last_ptr != 0)
+  {
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
+  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
+  stacksize++;
+  }
+if (common->optimized_cbracket[offset >> 1] == 0)
+  {
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
+  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
+  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
+  stacksize += 2;
+  }
+return stacksize;
+}
+
+/*
+  Handling bracketed expressions is probably the most complex part.
+
+  Stack layout naming characters:
+    S - Push the current STR_PTR
+    0 - Push a 0 (NULL)
+    A - Push the current STR_PTR. Needed for restoring the STR_PTR
+        before the next alternative. Not pushed if there are no alternatives.
+    M - Any values pushed by the current alternative. Can be empty, or anything.
+    C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
+    L - Push the previous local (pointed by localptr) to the stack
+   () - opional values stored on the stack
+  ()* - optonal, can be stored multiple times
+
+  The following list shows the regular expression templates, their PCRE byte codes
+  and stack layout supported by pcre-sljit.
+
+  (?:)                     OP_BRA     | OP_KET                A M
+  ()                       OP_CBRA    | OP_KET                C M
+  (?:)+                    OP_BRA     | OP_KETRMAX        0   A M S   ( A M S )*
+                           OP_SBRA    | OP_KETRMAX        0   L M S   ( L M S )*
+  (?:)+?                   OP_BRA     | OP_KETRMIN        0   A M S   ( A M S )*
+                           OP_SBRA    | OP_KETRMIN        0   L M S   ( L M S )*
+  ()+                      OP_CBRA    | OP_KETRMAX        0   C M S   ( C M S )*
+                           OP_SCBRA   | OP_KETRMAX        0   C M S   ( C M S )*
+  ()+?                     OP_CBRA    | OP_KETRMIN        0   C M S   ( C M S )*
+                           OP_SCBRA   | OP_KETRMIN        0   C M S   ( C M S )*
+  (?:)?    OP_BRAZERO    | OP_BRA     | OP_KET            S ( A M 0 )
+  (?:)??   OP_BRAMINZERO | OP_BRA     | OP_KET            S ( A M 0 )
+  ()?      OP_BRAZERO    | OP_CBRA    | OP_KET            S ( C M 0 )
+  ()??     OP_BRAMINZERO | OP_CBRA    | OP_KET            S ( C M 0 )
+  (?:)*    OP_BRAZERO    | OP_BRA     | OP_KETRMAX      S 0 ( A M S )*
+           OP_BRAZERO    | OP_SBRA    | OP_KETRMAX      S 0 ( L M S )*
+  (?:)*?   OP_BRAMINZERO | OP_BRA     | OP_KETRMIN      S 0 ( A M S )*
+           OP_BRAMINZERO | OP_SBRA    | OP_KETRMIN      S 0 ( L M S )*
+  ()*      OP_BRAZERO    | OP_CBRA    | OP_KETRMAX      S 0 ( C M S )*
+           OP_BRAZERO    | OP_SCBRA   | OP_KETRMAX      S 0 ( C M S )*
+  ()*?     OP_BRAMINZERO | OP_CBRA    | OP_KETRMIN      S 0 ( C M S )*
+           OP_BRAMINZERO | OP_SCBRA   | OP_KETRMIN      S 0 ( C M S )*
+
+
+  Stack layout naming characters:
+    A - Push the alternative index (starting from 0) on the stack.
+        Not pushed if there is no alternatives.
+    M - Any values pushed by the current alternative. Can be empty, or anything.
+
+  The next list shows the possible content of a bracket:
+  (|)     OP_*BRA    | OP_ALT ...         M A
+  (?()|)  OP_*COND   | OP_ALT             M A
+  (?>|)   OP_ONCE    | OP_ALT ...         [stack trace] M A
+  (?>|)   OP_ONCE_NC | OP_ALT ...         [stack trace] M A
+                                          Or nothing, if trace is unnecessary
+*/
+
+static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
+{
+DEFINE_COMPILER;
+backtrack_common *backtrack;
+pcre_uchar opcode;
+int private_data_ptr = 0;
+int offset = 0;
+int i, stacksize;
+int repeat_ptr = 0, repeat_length = 0;
+int repeat_type = 0, repeat_count = 0;
+pcre_uchar *ccbegin;
+pcre_uchar *matchingpath;
+pcre_uchar *slot;
+pcre_uchar bra = OP_BRA;
+pcre_uchar ket;
+assert_backtrack *assert;
+BOOL has_alternatives;
+BOOL needs_control_head = FALSE;
+struct sljit_jump *jump;
+struct sljit_jump *skip;
+struct sljit_label *rmax_label = NULL;
+struct sljit_jump *braminzero = NULL;
+
+PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
+
+if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
+  {
+  bra = *cc;
+  cc++;
+  opcode = *cc;
+  }
+
+opcode = *cc;
+ccbegin = cc;
+matchingpath = bracketend(cc) - 1 - LINK_SIZE;
+ket = *matchingpath;
+if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
+  {
+  repeat_ptr = PRIVATE_DATA(matchingpath);
+  repeat_length = PRIVATE_DATA(matchingpath + 1);
+  repeat_type = PRIVATE_DATA(matchingpath + 2);
+  repeat_count = PRIVATE_DATA(matchingpath + 3);
+  SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
+  if (repeat_type == OP_UPTO)
+    ket = OP_KETRMAX;
+  if (repeat_type == OP_MINUPTO)
+    ket = OP_KETRMIN;
+  }
+
+if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
+  {
+  /* Drop this bracket_backtrack. */
+  parent->top = backtrack->prev;
+  return matchingpath + 1 + LINK_SIZE + repeat_length;
+  }
+
+matchingpath = ccbegin + 1 + LINK_SIZE;
+SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
+SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
+cc += GET(cc, 1);
+
+has_alternatives = *cc == OP_ALT;
+if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
+  has_alternatives = (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF) ? FALSE : TRUE;
+
+if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
+  opcode = OP_SCOND;
+if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
+  opcode = OP_ONCE;
+
+if (opcode == OP_CBRA || opcode == OP_SCBRA)
+  {
+  /* Capturing brackets has a pre-allocated space. */
+  offset = GET2(ccbegin, 1 + LINK_SIZE);
+  if (common->optimized_cbracket[offset] == 0)
+    {
+    private_data_ptr = OVECTOR_PRIV(offset);
+    offset <<= 1;
+    }
+  else
+    {
+    offset <<= 1;
+    private_data_ptr = OVECTOR(offset);
+    }
+  BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
+  matchingpath += IMM2_SIZE;
+  }
+else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
+  {
+  /* Other brackets simply allocate the next entry. */
+  private_data_ptr = PRIVATE_DATA(ccbegin);
+  SLJIT_ASSERT(private_data_ptr != 0);
+  BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
+  if (opcode == OP_ONCE)
+    BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
+  }
+
+/* Instructions before the first alternative. */
+stacksize = 0;
+if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
+  stacksize++;
+if (bra == OP_BRAZERO)
+  stacksize++;
+
+if (stacksize > 0)
+  allocate_stack(common, stacksize);
+
+stacksize = 0;
+if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
+  {
+  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
+  stacksize++;
+  }
+
+if (bra == OP_BRAZERO)
+  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
+
+if (bra == OP_BRAMINZERO)
+  {
+  /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
+  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+  if (ket != OP_KETRMIN)
+    {
+    free_stack(common, 1);
+    braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
+    }
+  else
+    {
+    if (opcode == OP_ONCE || opcode >= OP_SBRA)
+      {
+      jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
+      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
+      /* Nothing stored during the first run. */
+      skip = JUMP(SLJIT_JUMP);
+      JUMPHERE(jump);
+      /* Checking zero-length iteration. */
+      if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
+        {
+        /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
+        braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+        }
+      else
+        {
+        /* Except when the whole stack frame must be saved. */
+        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+        braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
+        }
+      JUMPHERE(skip);
+      }
+    else
+      {
+      jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
+      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
+      JUMPHERE(jump);
+      }
+    }
+  }
+
+if (repeat_type != 0)
+  {
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, repeat_count);
+  if (repeat_type == OP_EXACT)
+    rmax_label = LABEL();
+  }
+
+if (ket == OP_KETRMIN)
+  BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
+
+if (ket == OP_KETRMAX)
+  {
+  rmax_label = LABEL();
+  if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA && repeat_type == 0)
+    BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
+  }
+
+/* Handling capturing brackets and alternatives. */
+if (opcode == OP_ONCE)
+  {
+  stacksize = 0;
+  if (needs_control_head)
+    {
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
+    stacksize++;
+    }
+
+  if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
+    {
+    /* Neither capturing brackets nor recursions are found in the block. */
+    if (ket == OP_KETRMIN)
+      {
+      stacksize += 2;
+      if (!needs_control_head)
+        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+      }
+    else
+      {
+      if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
+        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
+      if (ket == OP_KETRMAX || has_alternatives)
+        stacksize++;
+      }
+
+    if (stacksize > 0)
+      allocate_stack(common, stacksize);
+
+    stacksize = 0;
+    if (needs_control_head)
+      {
+      stacksize++;
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
+      }
+
+    if (ket == OP_KETRMIN)
+      {
+      if (needs_control_head)
+        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
+      if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
+        OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
+      }
+    else if (ket == OP_KETRMAX || has_alternatives)
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
+    }
+  else
+    {
+    if (ket != OP_KET || has_alternatives)
+      stacksize++;
+
+    stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
+    allocate_stack(common, stacksize);
+
+    if (needs_control_head)
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
+
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+    OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
+
+    stacksize = needs_control_head ? 1 : 0;
+    if (ket != OP_KET || has_alternatives)
+      {
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
+      stacksize++;
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
+      }
+    else
+      {
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
+      }
+    init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1, FALSE);
+    }
+  }
+else if (opcode == OP_CBRA || opcode == OP_SCBRA)
+  {
+  /* Saving the previous values. */
+  if (common->optimized_cbracket[offset >> 1] != 0)
+    {
+    SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
+    allocate_stack(common, 2);
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr + sizeof(sljit_sw));
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
+    }
+  else
+    {
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+    allocate_stack(common, 1);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
+    }
+  }
+else if (opcode == OP_SBRA || opcode == OP_SCOND)
+  {
+  /* Saving the previous value. */
+  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+  allocate_stack(common, 1);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
+  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
+  }
+else if (has_alternatives)
+  {
+  /* Pushing the starting string pointer. */
+  allocate_stack(common, 1);
+  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
+  }
+
+/* Generating code for the first alternative. */
+if (opcode == OP_COND || opcode == OP_SCOND)
+  {
+  if (*matchingpath == OP_CREF)
+    {
+    SLJIT_ASSERT(has_alternatives);
+    add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
+      CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
+    matchingpath += 1 + IMM2_SIZE;
+    }
+  else if (*matchingpath == OP_DNCREF)
+    {
+    SLJIT_ASSERT(has_alternatives);
+
+    i = GET2(matchingpath, 1 + IMM2_SIZE);
+    slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
+    OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
+    OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
+    slot += common->name_entry_size;
+    i--;
+    while (i-- > 0)
+      {
+      OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
+      OP2(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, STR_PTR, 0);
+      slot += common->name_entry_size;
+      }
+    OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
+    add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_C_ZERO));
+    matchingpath += 1 + 2 * IMM2_SIZE;
+    }
+  else if (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF)
+    {
+    /* Never has other case. */
+    BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
+    SLJIT_ASSERT(!has_alternatives);
+
+    if (*matchingpath == OP_RREF)
+      {
+      stacksize = GET2(matchingpath, 1);
+      if (common->currententry == NULL)
+        stacksize = 0;
+      else if (stacksize == RREF_ANY)
+        stacksize = 1;
+      else if (common->currententry->start == 0)
+        stacksize = stacksize == 0;
+      else
+        stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
+
+      if (stacksize != 0)
+        matchingpath += 1 + IMM2_SIZE;
+      }
+    else
+      {
+      if (common->currententry == NULL || common->currententry->start == 0)
+        stacksize = 0;
+      else
+        {
+        stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
+        slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
+        i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
+        while (stacksize > 0)
+          {
+          if ((int)GET2(slot, 0) == i)
+            break;
+          slot += common->name_entry_size;
+          stacksize--;
+          }
+        }
+
+      if (stacksize != 0)
+        matchingpath += 1 + 2 * IMM2_SIZE;
+      }
+
+      /* The stacksize == 0 is a common "else" case. */
+      if (stacksize == 0)
+        {
+        if (*cc == OP_ALT)
+          {
+          matchingpath = cc + 1 + LINK_SIZE;
+          cc += GET(cc, 1);
+          }
+        else
+          matchingpath = cc;
+        }
+    }
+  else
+    {
+    SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
+    /* Similar code as PUSH_BACKTRACK macro. */
+    assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
+    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
+      return NULL;
+    memset(assert, 0, sizeof(assert_backtrack));
+    assert->common.cc = matchingpath;
+    BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
+    matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
+    }
+  }
+
+compile_matchingpath(common, matchingpath, cc, backtrack);
+if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
+  return NULL;
+
+if (opcode == OP_ONCE)
+  match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
+
+stacksize = 0;
+if (repeat_type == OP_MINUPTO)
+  {
+  /* We need to preserve the counter. TMP2 will be used below. */
+  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr);
+  stacksize++;
+  }
+if (ket != OP_KET || bra != OP_BRA)
+  stacksize++;
+if (offset != 0)
+  {
+  if (common->capture_last_ptr != 0)
+    stacksize++;
+  if (common->optimized_cbracket[offset >> 1] == 0)
+    stacksize += 2;
+  }
+if (has_alternatives && opcode != OP_ONCE)
+  stacksize++;
+
+if (stacksize > 0)
+  allocate_stack(common, stacksize);
+
+stacksize = 0;
+if (repeat_type == OP_MINUPTO)
+  {
+  /* TMP2 was set above. */
+  OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
+  stacksize++;
+  }
+
+if (ket != OP_KET || bra != OP_BRA)
+  {
+  if (ket != OP_KET)
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
+  else
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
+  stacksize++;
+  }
+
+if (offset != 0)
+  stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
+
+if (has_alternatives)
+  {
+  if (opcode != OP_ONCE)
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
+  if (ket != OP_KETRMAX)
+    BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
+  }
+
+/* Must be after the matchingpath label. */
+if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
+  {
+  SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
+  }
+
+if (ket == OP_KETRMAX)
+  {
+  if (repeat_type != 0)
+    {
+    if (has_alternatives)
+      BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
+    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1);
+    JUMPTO(SLJIT_C_NOT_ZERO, rmax_label);
+    /* Drop STR_PTR for greedy plus quantifier. */
+    if (opcode != OP_ONCE)
+      free_stack(common, 1);
+    }
+  else if (opcode == OP_ONCE || opcode >= OP_SBRA)
+    {
+    if (has_alternatives)
+      BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
+    /* Checking zero-length iteration. */
+    if (opcode != OP_ONCE)
+      {
+      CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0, rmax_label);
+      /* Drop STR_PTR for greedy plus quantifier. */
+      if (bra != OP_BRAZERO)
+        free_stack(common, 1);
+      }
+    else
+      /* TMP2 must contain the starting STR_PTR. */
+      CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
+    }
+  else
+    JUMPTO(SLJIT_JUMP, rmax_label);
+  BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
+  }
+
+if (repeat_type == OP_EXACT)
+  {
+  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1);
+  JUMPTO(SLJIT_C_NOT_ZERO, rmax_label);
+  }
+else if (repeat_type == OP_UPTO)
+  {
+  /* We need to preserve the counter. */
+  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr);
+  allocate_stack(common, 1);
+  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
+  }
+
+if (bra == OP_BRAZERO)
+  BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
+
+if (bra == OP_BRAMINZERO)
+  {
+  /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
+  JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
+  if (braminzero != NULL)
+    {
+    JUMPHERE(braminzero);
+    /* We need to release the end pointer to perform the
+    backtrack for the zero-length iteration. When
+    framesize is < 0, OP_ONCE will do the release itself. */
+    if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
+      {
+      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+      add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
+      }
+    else if (ket == OP_KETRMIN && opcode != OP_ONCE)
+      free_stack(common, 1);
+    }
+  /* Continue to the normal backtrack. */
+  }
+
+if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
+  count_match(common);
+
+/* Skip the other alternatives. */
+while (*cc == OP_ALT)
+  cc += GET(cc, 1);
+cc += 1 + LINK_SIZE;
+
+/* Temporarily encoding the needs_control_head in framesize. */
+if (opcode == OP_ONCE)
+  BACKTRACK_AS(bracket_backtrack)->u.framesize = (BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
+return cc + repeat_length;
+}
+
+static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
+{
+DEFINE_COMPILER;
+backtrack_common *backtrack;
+pcre_uchar opcode;
+int private_data_ptr;
+int cbraprivptr = 0;
+BOOL needs_control_head;
+int framesize;
+int stacksize;
+int offset = 0;
+BOOL zero = FALSE;
+pcre_uchar *ccbegin = NULL;
+int stack; /* Also contains the offset of control head. */
+struct sljit_label *loop = NULL;
+struct jump_list *emptymatch = NULL;
+
+PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
+if (*cc == OP_BRAPOSZERO)
+  {
+  zero = TRUE;
+  cc++;
+  }
+
+opcode = *cc;
+private_data_ptr = PRIVATE_DATA(cc);
+SLJIT_ASSERT(private_data_ptr != 0);
+BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
+switch(opcode)
+  {
+  case OP_BRAPOS:
+  case OP_SBRAPOS:
+  ccbegin = cc + 1 + LINK_SIZE;
+  break;
+
+  case OP_CBRAPOS:
+  case OP_SCBRAPOS:
+  offset = GET2(cc, 1 + LINK_SIZE);
+  /* This case cannot be optimized in the same was as
+  normal capturing brackets. */
+  SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
+  cbraprivptr = OVECTOR_PRIV(offset);
+  offset <<= 1;
+  ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
+  break;
+
+  default:
+  SLJIT_ASSERT_STOP();
+  break;
+  }
+
+framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
+BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
+if (framesize < 0)
+  {
+  if (offset != 0)
+    {
+    stacksize = 2;
+    if (common->capture_last_ptr != 0)
+      stacksize++;
+    }
+  else
+    stacksize = 1;
+
+  if (needs_control_head)
+    stacksize++;
+  if (!zero)
+    stacksize++;
+
+  BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
+  allocate_stack(common, stacksize);
+  if (framesize == no_frame)
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
+
+  stack = 0;
+  if (offset != 0)
+    {
+    stack = 2;
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
+    if (common->capture_last_ptr != 0)
+      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
+    if (needs_control_head)
+      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
+    if (common->capture_last_ptr != 0)
+      {
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
+      stack = 3;
+      }
+    }
+  else
+    {
+    if (needs_control_head)
+      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
+    stack = 1;
+    }
+
+  if (needs_control_head)
+    stack++;
+  if (!zero)
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
+  if (needs_control_head)
+    {
+    stack--;
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
+    }
+  }
+else
+  {
+  stacksize = framesize + 1;
+  if (!zero)
+    stacksize++;
+  if (needs_control_head)
+    stacksize++;
+  if (offset == 0)
+    stacksize++;
+  BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
+
+  allocate_stack(common, stacksize);
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+  if (needs_control_head)
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
+  OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
+
+  stack = 0;
+  if (!zero)
+    {
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
+    stack = 1;
+    }
+  if (needs_control_head)
+    {
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
+    stack++;
+    }
+  if (offset == 0)
+    {
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
+    stack++;
+    }
+  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
+  init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize, FALSE);
+  stack -= 1 + (offset == 0);
+  }
+
+if (offset != 0)
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
+
+loop = LABEL();
+while (*cc != OP_KETRPOS)
+  {
+  backtrack->top = NULL;
+  backtrack->topbacktracks = NULL;
+  cc += GET(cc, 1);
+
+  compile_matchingpath(common, ccbegin, cc, backtrack);
+  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
+    return NULL;
+
+  if (framesize < 0)
+    {
+    if (framesize == no_frame)
+      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+
+    if (offset != 0)
+      {
+      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
+      if (common->capture_last_ptr != 0)
+        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
+      }
+    else
+      {
+      if (opcode == OP_SBRAPOS)
+        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
+      }
+
+    if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
+      add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
+
+    if (!zero)
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
+    }
+  else
+    {
+    if (offset != 0)
+      {
+      OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
+      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
+      if (common->capture_last_ptr != 0)
+        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
+      }
+    else
+      {
+      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+      OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
+      if (opcode == OP_SBRAPOS)
+        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
+      OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw), STR_PTR, 0);
+      }
+
+    if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
+      add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
+
+    if (!zero)
+      {
+      if (framesize < 0)
+        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
+      else
+        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
+      }
+    }
+
+  if (needs_control_head)
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
+
+  JUMPTO(SLJIT_JUMP, loop);
+  flush_stubs(common);
+
+  compile_backtrackingpath(common, backtrack->top);
+  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
+    return NULL;
+  set_jumps(backtrack->topbacktracks, LABEL());
+
+  if (framesize < 0)
+    {
+    if (offset != 0)
+      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
+    else
+      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+    }
+  else
+    {
+    if (offset != 0)
+      {
+      /* Last alternative. */
+      if (*cc == OP_KETRPOS)
+        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
+      }
+    else
+      {
+      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
+      }
+    }
+
+  if (*cc == OP_KETRPOS)
+    break;
+  ccbegin = cc + 1 + LINK_SIZE;
+  }
+
+/* We don't have to restore the control head in case of a failed match. */
+
+backtrack->topbacktracks = NULL;
+if (!zero)
+  {
+  if (framesize < 0)
+    add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
+  else /* TMP2 is set to [private_data_ptr] above. */
+    add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_sw), SLJIT_IMM, 0));
+  }
+
+/* None of them matched. */
+set_jumps(emptymatch, LABEL());
+count_match(common);
+return cc + 1 + LINK_SIZE;
+}
+
+static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *max, int *min, pcre_uchar **end)
+{
+int class_len;
+
+*opcode = *cc;
+if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
+  {
+  cc++;
+  *type = OP_CHAR;
+  }
+else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
+  {
+  cc++;
+  *type = OP_CHARI;
+  *opcode -= OP_STARI - OP_STAR;
+  }
+else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
+  {
+  cc++;
+  *type = OP_NOT;
+  *opcode -= OP_NOTSTAR - OP_STAR;
+  }
+else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
+  {
+  cc++;
+  *type = OP_NOTI;
+  *opcode -= OP_NOTSTARI - OP_STAR;
+  }
+else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
+  {
+  cc++;
+  *opcode -= OP_TYPESTAR - OP_STAR;
+  *type = 0;
+  }
+else
+  {
+  SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
+  *type = *opcode;
+  cc++;
+  class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
+  *opcode = cc[class_len - 1];
+  if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
+    {
+    *opcode -= OP_CRSTAR - OP_STAR;
+    if (end != NULL)
+      *end = cc + class_len;
+    }
+  else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
+    {
+    *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
+    if (end != NULL)
+      *end = cc + class_len;
+    }
+  else
+    {
+    SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
+    *max = GET2(cc, (class_len + IMM2_SIZE));
+    *min = GET2(cc, class_len);
+
+    if (*min == 0)
+      {
+      SLJIT_ASSERT(*max != 0);
+      *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : (*opcode == OP_CRMINRANGE ? OP_MINUPTO : OP_POSUPTO);
+      }
+    if (*max == *min)
+      *opcode = OP_EXACT;
+
+    if (end != NULL)
+      *end = cc + class_len + 2 * IMM2_SIZE;
+    }
+  return cc;
+  }
+
+if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
+  {
+  *max = GET2(cc, 0);
+  cc += IMM2_SIZE;
+  }
+
+if (*type == 0)
+  {
+  *type = *cc;
+  if (end != NULL)
+    *end = next_opcode(common, cc);
+  cc++;
+  return cc;
+  }
+
+if (end != NULL)
+  {
+  *end = cc + 1;
+#ifdef SUPPORT_UTF
+  if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
+#endif
+  }
+return cc;
+}
+
+static pcre_uchar *compile_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
+{
+DEFINE_COMPILER;
+backtrack_common *backtrack;
+pcre_uchar opcode;
+pcre_uchar type;
+int max = -1, min = -1;
+pcre_uchar* end;
+jump_list *nomatch = NULL;
+struct sljit_jump *jump = NULL;
+struct sljit_label *label;
+int private_data_ptr = PRIVATE_DATA(cc);
+int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_LOCALS_REG);
+int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
+int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
+int tmp_base, tmp_offset;
+
+PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
+
+cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &min, &end);
+
+switch(type)
+  {
+  case OP_NOT_DIGIT:
+  case OP_DIGIT:
+  case OP_NOT_WHITESPACE:
+  case OP_WHITESPACE:
+  case OP_NOT_WORDCHAR:
+  case OP_WORDCHAR:
+  case OP_ANY:
+  case OP_ALLANY:
+  case OP_ANYBYTE:
+  case OP_ANYNL:
+  case OP_NOT_HSPACE:
+  case OP_HSPACE:
+  case OP_NOT_VSPACE:
+  case OP_VSPACE:
+  case OP_CHAR:
+  case OP_CHARI:
+  case OP_NOT:
+  case OP_NOTI:
+  case OP_CLASS:
+  case OP_NCLASS:
+  tmp_base = TMP3;
+  tmp_offset = 0;
+  break;
+
+  default:
+  SLJIT_ASSERT_STOP();
+  /* Fall through. */
+
+  case OP_EXTUNI:
+  case OP_XCLASS:
+  case OP_NOTPROP:
+  case OP_PROP:
+  tmp_base = SLJIT_MEM1(SLJIT_LOCALS_REG);
+  tmp_offset = POSSESSIVE0;
+  break;
+  }
+
+switch(opcode)
+  {
+  case OP_STAR:
+  case OP_PLUS:
+  case OP_UPTO:
+  case OP_CRRANGE:
+  if (type == OP_ANYNL || type == OP_EXTUNI)
+    {
+    SLJIT_ASSERT(private_data_ptr == 0);
+    if (opcode == OP_STAR || opcode == OP_UPTO)
+      {
+      allocate_stack(common, 2);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
+      }
+    else
+      {
+      allocate_stack(common, 1);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
+      }
+
+    if (opcode == OP_UPTO || opcode == OP_CRRANGE)
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
+
+    label = LABEL();
+    compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
+    if (opcode == OP_UPTO || opcode == OP_CRRANGE)
+      {
+      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
+      OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
+      if (opcode == OP_CRRANGE && min > 0)
+        CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
+      if (opcode == OP_UPTO || (opcode == OP_CRRANGE && max > 0))
+        jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
+      }
+
+    /* We cannot use TMP3 because of this allocate_stack. */
+    allocate_stack(common, 1);
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
+    JUMPTO(SLJIT_JUMP, label);
+    if (jump != NULL)
+      JUMPHERE(jump);
+    }
+  else
+    {
+    if (opcode == OP_PLUS)
+      compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
+    if (private_data_ptr == 0)
+      allocate_stack(common, 2);
+    OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
+    if (opcode <= OP_PLUS)
+      OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
+    else
+      OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
+    label = LABEL();
+    compile_char1_matchingpath(common, type, cc, &nomatch);
+    OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
+    if (opcode <= OP_PLUS)
+      JUMPTO(SLJIT_JUMP, label);
+    else if (opcode == OP_CRRANGE && max == 0)
+      {
+      OP2(SLJIT_ADD, base, offset1, base, offset1, SLJIT_IMM, 1);
+      JUMPTO(SLJIT_JUMP, label);
+      }
+    else
+      {
+      OP1(SLJIT_MOV, TMP1, 0, base, offset1);
+      OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
+      OP1(SLJIT_MOV, base, offset1, TMP1, 0);
+      CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, max + 1, label);
+      }
+    set_jumps(nomatch, LABEL());
+    if (opcode == OP_CRRANGE)
+      add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, base, offset1, SLJIT_IMM, min + 1));
+    OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
+    }
+  BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
+  break;
+
+  case OP_MINSTAR:
+  case OP_MINPLUS:
+  if (opcode == OP_MINPLUS)
+    compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
+  if (private_data_ptr == 0)
+    allocate_stack(common, 1);
+  OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
+  BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
+  break;
+
+  case OP_MINUPTO:
+  case OP_CRMINRANGE:
+  if (private_data_ptr == 0)
+    allocate_stack(common, 2);
+  OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
+  OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
+  if (opcode == OP_CRMINRANGE)
+    add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
+  BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
+  break;
+
+  case OP_QUERY:
+  case OP_MINQUERY:
+  if (private_data_ptr == 0)
+    allocate_stack(common, 1);
+  OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
+  if (opcode == OP_QUERY)
+    compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
+  BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
+  break;
+
+  case OP_EXACT:
+  OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
+  label = LABEL();
+  compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
+  OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
+  JUMPTO(SLJIT_C_NOT_ZERO, label);
+  break;
+
+  case OP_POSSTAR:
+  case OP_POSPLUS:
+  case OP_POSUPTO:
+  if (opcode == OP_POSPLUS)
+    compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
+  if (opcode == OP_POSUPTO)
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, max);
+  OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
+  label = LABEL();
+  compile_char1_matchingpath(common, type, cc, &nomatch);
+  OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
+  if (opcode != OP_POSUPTO)
+    JUMPTO(SLJIT_JUMP, label);
+  else
+    {
+    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, 1);
+    JUMPTO(SLJIT_C_NOT_ZERO, label);
+    }
+  set_jumps(nomatch, LABEL());
+  OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
+  break;
+
+  case OP_POSQUERY:
+  OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
+  compile_char1_matchingpath(common, type, cc, &nomatch);
+  OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
+  set_jumps(nomatch, LABEL());
+  OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
+  break;
+
+  case OP_CRPOSRANGE:
+  /* Combination of OP_EXACT and OP_POSSTAR or OP_POSUPTO */
+  OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, min);
+  label = LABEL();
+  compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
+  OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
+  JUMPTO(SLJIT_C_NOT_ZERO, label);
+
+  if (max != 0)
+    {
+    SLJIT_ASSERT(max - min > 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, max - min);
+    }
+  OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
+  label = LABEL();
+  compile_char1_matchingpath(common, type, cc, &nomatch);
+  OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
+  if (max == 0)
+    JUMPTO(SLJIT_JUMP, label);
+  else
+    {
+    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, 1);
+    JUMPTO(SLJIT_C_NOT_ZERO, label);
+    }
+  set_jumps(nomatch, LABEL());
+  OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
+  break;
+
+  default:
+  SLJIT_ASSERT_STOP();
+  break;
+  }
+
+count_match(common);
+return end;
+}
+
+static SLJIT_INLINE pcre_uchar *compile_fail_accept_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
+{
+DEFINE_COMPILER;
+backtrack_common *backtrack;
+
+PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
+
+if (*cc == OP_FAIL)
+  {
+  add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
+  return cc + 1;
+  }
+
+if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
+  {
+  /* No need to check notempty conditions. */
+  if (common->accept_label == NULL)
+    add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
+  else
+    JUMPTO(SLJIT_JUMP, common->accept_label);
+  return cc + 1;
+  }
+
+if (common->accept_label == NULL)
+  add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)));
+else
+  CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), common->accept_label);
+OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
+OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
+add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
+OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
+if (common->accept_label == NULL)
+  add_jump(compiler, &common->accept, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0));
+else
+  CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->accept_label);
+OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
+if (common->accept_label == NULL)
+  add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
+else
+  CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
+add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
+return cc + 1;
+}
+
+static SLJIT_INLINE pcre_uchar *compile_close_matchingpath(compiler_common *common, pcre_uchar *cc)
+{
+DEFINE_COMPILER;
+int offset = GET2(cc, 1);
+BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
+
+/* Data will be discarded anyway... */
+if (common->currententry != NULL)
+  return cc + 1 + IMM2_SIZE;
+
+if (!optimized_cbracket)
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR_PRIV(offset));
+offset <<= 1;
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
+if (!optimized_cbracket)
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
+return cc + 1 + IMM2_SIZE;
+}
+
+static SLJIT_INLINE pcre_uchar *compile_control_verb_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
+{
+DEFINE_COMPILER;
+backtrack_common *backtrack;
+pcre_uchar opcode = *cc;
+pcre_uchar *ccend = cc + 1;
+
+if (opcode == OP_PRUNE_ARG || opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
+  ccend += 2 + cc[1];
+
+PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
+
+if (opcode == OP_SKIP)
+  {
+  allocate_stack(common, 1);
+  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
+  return ccend;
+  }
+
+if (opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
+  {
+  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
+  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
+  OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
+  }
+
+return ccend;
+}
+
+static pcre_uchar then_trap_opcode[1] = { OP_THEN_TRAP };
+
+static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
+{
+DEFINE_COMPILER;
+backtrack_common *backtrack;
+BOOL needs_control_head;
+int size;
+
+PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
+common->then_trap = BACKTRACK_AS(then_trap_backtrack);
+BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
+BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
+BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
+
+size = BACKTRACK_AS(then_trap_backtrack)->framesize;
+size = 3 + (size < 0 ? 0 : size);
+
+OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
+allocate_stack(common, size);
+if (size > 3)
+  OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
+else
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, STACK_TOP, 0);
+OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
+OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
+OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
+
+size = BACKTRACK_AS(then_trap_backtrack)->framesize;
+if (size >= 0)
+  init_frame(common, cc, ccend, size - 1, 0, FALSE);
+}
+
+static void compile_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
+{
+DEFINE_COMPILER;
+backtrack_common *backtrack;
+BOOL has_then_trap = FALSE;
+then_trap_backtrack *save_then_trap = NULL;
+
+SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
+
+if (common->has_then && common->then_offsets[cc - common->start] != 0)
+  {
+  SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
+  has_then_trap = TRUE;
+  save_then_trap = common->then_trap;
+  /* Tail item on backtrack. */
+  compile_then_trap_matchingpath(common, cc, ccend, parent);
+  }
+
+while (cc < ccend)
+  {
+  switch(*cc)
+    {
+    case OP_SOD:
+    case OP_SOM:
+    case OP_NOT_WORD_BOUNDARY:
+    case OP_WORD_BOUNDARY:
+    case OP_NOT_DIGIT:
+    case OP_DIGIT:
+    case OP_NOT_WHITESPACE:
+    case OP_WHITESPACE:
+    case OP_NOT_WORDCHAR:
+    case OP_WORDCHAR:
+    case OP_ANY:
+    case OP_ALLANY:
+    case OP_ANYBYTE:
+    case OP_NOTPROP:
+    case OP_PROP:
+    case OP_ANYNL:
+    case OP_NOT_HSPACE:
+    case OP_HSPACE:
+    case OP_NOT_VSPACE:
+    case OP_VSPACE:
+    case OP_EXTUNI:
+    case OP_EODN:
+    case OP_EOD:
+    case OP_CIRC:
+    case OP_CIRCM:
+    case OP_DOLL:
+    case OP_DOLLM:
+    case OP_NOT:
+    case OP_NOTI:
+    case OP_REVERSE:
+    cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
+    break;
+
+    case OP_SET_SOM:
+    PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
+    allocate_stack(common, 1);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
+    cc++;
+    break;
+
+    case OP_CHAR:
+    case OP_CHARI:
+    if (common->mode == JIT_COMPILE)
+      cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
+    else
+      cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
+    break;
+
+    case OP_STAR:
+    case OP_MINSTAR:
+    case OP_PLUS:
+    case OP_MINPLUS:
+    case OP_QUERY:
+    case OP_MINQUERY:
+    case OP_UPTO:
+    case OP_MINUPTO:
+    case OP_EXACT:
+    case OP_POSSTAR:
+    case OP_POSPLUS:
+    case OP_POSQUERY:
+    case OP_POSUPTO:
+    case OP_STARI:
+    case OP_MINSTARI:
+    case OP_PLUSI:
+    case OP_MINPLUSI:
+    case OP_QUERYI:
+    case OP_MINQUERYI:
+    case OP_UPTOI:
+    case OP_MINUPTOI:
+    case OP_EXACTI:
+    case OP_POSSTARI:
+    case OP_POSPLUSI:
+    case OP_POSQUERYI:
+    case OP_POSUPTOI:
+    case OP_NOTSTAR:
+    case OP_NOTMINSTAR:
+    case OP_NOTPLUS:
+    case OP_NOTMINPLUS:
+    case OP_NOTQUERY:
+    case OP_NOTMINQUERY:
+    case OP_NOTUPTO:
+    case OP_NOTMINUPTO:
+    case OP_NOTEXACT:
+    case OP_NOTPOSSTAR:
+    case OP_NOTPOSPLUS:
+    case OP_NOTPOSQUERY:
+    case OP_NOTPOSUPTO:
+    case OP_NOTSTARI:
+    case OP_NOTMINSTARI:
+    case OP_NOTPLUSI:
+    case OP_NOTMINPLUSI:
+    case OP_NOTQUERYI:
+    case OP_NOTMINQUERYI:
+    case OP_NOTUPTOI:
+    case OP_NOTMINUPTOI:
+    case OP_NOTEXACTI:
+    case OP_NOTPOSSTARI:
+    case OP_NOTPOSPLUSI:
+    case OP_NOTPOSQUERYI:
+    case OP_NOTPOSUPTOI:
+    case OP_TYPESTAR:
+    case OP_TYPEMINSTAR:
+    case OP_TYPEPLUS:
+    case OP_TYPEMINPLUS:
+    case OP_TYPEQUERY:
+    case OP_TYPEMINQUERY:
+    case OP_TYPEUPTO:
+    case OP_TYPEMINUPTO:
+    case OP_TYPEEXACT:
+    case OP_TYPEPOSSTAR:
+    case OP_TYPEPOSPLUS:
+    case OP_TYPEPOSQUERY:
+    case OP_TYPEPOSUPTO:
+    cc = compile_iterator_matchingpath(common, cc, parent);
+    break;
+
+    case OP_CLASS:
+    case OP_NCLASS:
+    if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRPOSRANGE)
+      cc = compile_iterator_matchingpath(common, cc, parent);
+    else
+      cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
+    break;
+
+#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+    case OP_XCLASS:
+    if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
+      cc = compile_iterator_matchingpath(common, cc, parent);
+    else
+      cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
+    break;
+#endif
+
+    case OP_REF:
+    case OP_REFI:
+    if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
+      cc = compile_ref_iterator_matchingpath(common, cc, parent);
+    else
+      {
+      compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
+      cc += 1 + IMM2_SIZE;
+      }
+    break;
+
+    case OP_DNREF:
+    case OP_DNREFI:
+    if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
+      cc = compile_ref_iterator_matchingpath(common, cc, parent);
+    else
+      {
+      compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
+      compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
+      cc += 1 + 2 * IMM2_SIZE;
+      }
+    break;
+
+    case OP_RECURSE:
+    cc = compile_recurse_matchingpath(common, cc, parent);
+    break;
+
+    case OP_CALLOUT:
+    cc = compile_callout_matchingpath(common, cc, parent);
+    break;
+
+    case OP_ASSERT:
+    case OP_ASSERT_NOT:
+    case OP_ASSERTBACK:
+    case OP_ASSERTBACK_NOT:
+    PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
+    cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
+    break;
+
+    case OP_BRAMINZERO:
+    PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
+    cc = bracketend(cc + 1);
+    if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
+      {
+      allocate_stack(common, 1);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
+      }
+    else
+      {
+      allocate_stack(common, 2);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
+      }
+    BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
+    if (cc[1] > OP_ASSERTBACK_NOT)
+      count_match(common);
+    break;
+
+    case OP_ONCE:
+    case OP_ONCE_NC:
+    case OP_BRA:
+    case OP_CBRA:
+    case OP_COND:
+    case OP_SBRA:
+    case OP_SCBRA:
+    case OP_SCOND:
+    cc = compile_bracket_matchingpath(common, cc, parent);
+    break;
+
+    case OP_BRAZERO:
+    if (cc[1] > OP_ASSERTBACK_NOT)
+      cc = compile_bracket_matchingpath(common, cc, parent);
+    else
+      {
+      PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
+      cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
+      }
+    break;
+
+    case OP_BRAPOS:
+    case OP_CBRAPOS:
+    case OP_SBRAPOS:
+    case OP_SCBRAPOS:
+    case OP_BRAPOSZERO:
+    cc = compile_bracketpos_matchingpath(common, cc, parent);
+    break;
+
+    case OP_MARK:
+    PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
+    SLJIT_ASSERT(common->mark_ptr != 0);
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
+    allocate_stack(common, common->has_skip_arg ? 5 : 1);
+    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
+    if (common->has_skip_arg)
+      {
+      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, STACK_TOP, 0);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
+      }
+    cc += 1 + 2 + cc[1];
+    break;
+
+    case OP_PRUNE:
+    case OP_PRUNE_ARG:
+    case OP_SKIP:
+    case OP_SKIP_ARG:
+    case OP_THEN:
+    case OP_THEN_ARG:
+    case OP_COMMIT:
+    cc = compile_control_verb_matchingpath(common, cc, parent);
+    break;
+
+    case OP_FAIL:
+    case OP_ACCEPT:
+    case OP_ASSERT_ACCEPT:
+    cc = compile_fail_accept_matchingpath(common, cc, parent);
+    break;
+
+    case OP_CLOSE:
+    cc = compile_close_matchingpath(common, cc);
+    break;
+
+    case OP_SKIPZERO:
+    cc = bracketend(cc + 1);
+    break;
+
+    default:
+    SLJIT_ASSERT_STOP();
+    return;
+    }
+  if (cc == NULL)
+    return;
+  }
+
+if (has_then_trap)
+  {
+  /* Head item on backtrack. */
+  PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
+  BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
+  BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
+  common->then_trap = save_then_trap;
+  }
+SLJIT_ASSERT(cc == ccend);
+}
+
+#undef PUSH_BACKTRACK
+#undef PUSH_BACKTRACK_NOVALUE
+#undef BACKTRACK_AS
+
+#define COMPILE_BACKTRACKINGPATH(current) \
+  do \
+    { \
+    compile_backtrackingpath(common, (current)); \
+    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
+      return; \
+    } \
+  while (0)
+
+#define CURRENT_AS(type) ((type *)current)
+
+static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
+{
+DEFINE_COMPILER;
+pcre_uchar *cc = current->cc;
+pcre_uchar opcode;
+pcre_uchar type;
+int max = -1, min = -1;
+struct sljit_label *label = NULL;
+struct sljit_jump *jump = NULL;
+jump_list *jumplist = NULL;
+int private_data_ptr = PRIVATE_DATA(cc);
+int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_LOCALS_REG);
+int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
+int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
+
+cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &min, NULL);
+
+switch(opcode)
+  {
+  case OP_STAR:
+  case OP_PLUS:
+  case OP_UPTO:
+  case OP_CRRANGE:
+  if (type == OP_ANYNL || type == OP_EXTUNI)
+    {
+    SLJIT_ASSERT(private_data_ptr == 0);
+    set_jumps(current->topbacktracks, LABEL());
+    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+    free_stack(common, 1);
+    CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
+    }
+  else
+    {
+    if (opcode == OP_UPTO)
+      min = 0;
+    if (opcode <= OP_PLUS)
+      {
+      OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
+      jump = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, base, offset1);
+      }
+    else
+      {
+      OP1(SLJIT_MOV, TMP1, 0, base, offset1);
+      OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
+      jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, min + 1);
+      OP2(SLJIT_SUB, base, offset1, TMP1, 0, SLJIT_IMM, 1);
+      }
+    skip_char_back(common);
+    OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
+    JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
+    if (opcode == OP_CRRANGE)
+      set_jumps(current->topbacktracks, LABEL());
+    JUMPHERE(jump);
+    if (private_data_ptr == 0)
+      free_stack(common, 2);
+    if (opcode == OP_PLUS)
+      set_jumps(current->topbacktracks, LABEL());
+    }
+  break;
+
+  case OP_MINSTAR:
+  case OP_MINPLUS:
+  OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
+  compile_char1_matchingpath(common, type, cc, &jumplist);
+  OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
+  JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
+  set_jumps(jumplist, LABEL());
+  if (private_data_ptr == 0)
+    free_stack(common, 1);
+  if (opcode == OP_MINPLUS)
+    set_jumps(current->topbacktracks, LABEL());
+  break;
+
+  case OP_MINUPTO:
+  case OP_CRMINRANGE:
+  if (opcode == OP_CRMINRANGE)
+    {
+    label = LABEL();
+    set_jumps(current->topbacktracks, label);
+    }
+  OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
+  compile_char1_matchingpath(common, type, cc, &jumplist);
+
+  OP1(SLJIT_MOV, TMP1, 0, base, offset1);
+  OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
+  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
+  OP1(SLJIT_MOV, base, offset1, TMP1, 0);
+
+  if (opcode == OP_CRMINRANGE)
+    CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min + 1, label);
+
+  if (opcode == OP_CRMINRANGE && max == 0)
+    JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
+  else
+    CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, max + 2, CURRENT_AS(iterator_backtrack)->matchingpath);
+
+  set_jumps(jumplist, LABEL());
+  if (private_data_ptr == 0)
+    free_stack(common, 2);
+  break;
+
+  case OP_QUERY:
+  OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
+  OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
+  CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
+  jump = JUMP(SLJIT_JUMP);
+  set_jumps(current->topbacktracks, LABEL());
+  OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
+  OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
+  JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
+  JUMPHERE(jump);
+  if (private_data_ptr == 0)
+    free_stack(common, 1);
+  break;
+
+  case OP_MINQUERY:
+  OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
+  OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
+  jump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
+  compile_char1_matchingpath(common, type, cc, &jumplist);
+  JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
+  set_jumps(jumplist, LABEL());
+  JUMPHERE(jump);
+  if (private_data_ptr == 0)
+    free_stack(common, 1);
+  break;
+
+  case OP_EXACT:
+  case OP_POSPLUS:
+  case OP_CRPOSRANGE:
+  set_jumps(current->topbacktracks, LABEL());
+  break;
+
+  case OP_POSSTAR:
+  case OP_POSQUERY:
+  case OP_POSUPTO:
+  break;
+
+  default:
+  SLJIT_ASSERT_STOP();
+  break;
+  }
+}
+
+static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
+{
+DEFINE_COMPILER;
+pcre_uchar *cc = current->cc;
+BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
+pcre_uchar type;
+
+type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
+
+if ((type & 0x1) == 0)
+  {
+  /* Maximize case. */
+  set_jumps(current->topbacktracks, LABEL());
+  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+  free_stack(common, 1);
+  CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
+  return;
+  }
+
+OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
+set_jumps(current->topbacktracks, LABEL());
+free_stack(common, ref ? 2 : 3);
+}
+
+static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
+{
+DEFINE_COMPILER;
+
+if (CURRENT_AS(recurse_backtrack)->inlined_pattern)
+  compile_backtrackingpath(common, current->top);
+set_jumps(current->topbacktracks, LABEL());
+if (CURRENT_AS(recurse_backtrack)->inlined_pattern)
+  return;
+
+if (common->has_set_som && common->mark_ptr != 0)
+  {
+  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
+  free_stack(common, 2);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP1, 0);
+  }
+else if (common->has_set_som || common->mark_ptr != 0)
+  {
+  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+  free_stack(common, 1);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr, TMP2, 0);
+  }
+}
+
+static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
+{
+DEFINE_COMPILER;
+pcre_uchar *cc = current->cc;
+pcre_uchar bra = OP_BRA;
+struct sljit_jump *brajump = NULL;
+
+SLJIT_ASSERT(*cc != OP_BRAMINZERO);
+if (*cc == OP_BRAZERO)
+  {
+  bra = *cc;
+  cc++;
+  }
+
+if (bra == OP_BRAZERO)
+  {
+  SLJIT_ASSERT(current->topbacktracks == NULL);
+  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+  }
+
+if (CURRENT_AS(assert_backtrack)->framesize < 0)
+  {
+  set_jumps(current->topbacktracks, LABEL());
+
+  if (bra == OP_BRAZERO)
+    {
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
+    CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
+    free_stack(common, 1);
+    }
+  return;
+  }
+
+if (bra == OP_BRAZERO)
+  {
+  if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
+    {
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
+    CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
+    free_stack(common, 1);
+    return;
+    }
+  free_stack(common, 1);
+  brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
+  }
+
+if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
+  {
+  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_backtrack)->private_data_ptr);
+  add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(assert_backtrack)->framesize * sizeof(sljit_sw));
+
+  set_jumps(current->topbacktracks, LABEL());
+  }
+else
+  set_jumps(current->topbacktracks, LABEL());
+
+if (bra == OP_BRAZERO)
+  {
+  /* We know there is enough place on the stack. */
+  OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
+  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
+  JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
+  JUMPHERE(brajump);
+  }
+}
+
+static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
+{
+DEFINE_COMPILER;
+int opcode, stacksize, alt_count, alt_max;
+int offset = 0;
+int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
+int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
+pcre_uchar *cc = current->cc;
+pcre_uchar *ccbegin;
+pcre_uchar *ccprev;
+pcre_uchar bra = OP_BRA;
+pcre_uchar ket;
+assert_backtrack *assert;
+BOOL has_alternatives;
+BOOL needs_control_head = FALSE;
+struct sljit_jump *brazero = NULL;
+struct sljit_jump *alt1 = NULL;
+struct sljit_jump *alt2 = NULL;
+struct sljit_jump *once = NULL;
+struct sljit_jump *cond = NULL;
+struct sljit_label *rmin_label = NULL;
+struct sljit_label *exact_label = NULL;
+
+if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
+  {
+  bra = *cc;
+  cc++;
+  }
+
+opcode = *cc;
+ccbegin = bracketend(cc) - 1 - LINK_SIZE;
+ket = *ccbegin;
+if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
+  {
+  repeat_ptr = PRIVATE_DATA(ccbegin);
+  repeat_type = PRIVATE_DATA(ccbegin + 2);
+  repeat_count = PRIVATE_DATA(ccbegin + 3);
+  SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
+  if (repeat_type == OP_UPTO)
+    ket = OP_KETRMAX;
+  if (repeat_type == OP_MINUPTO)
+    ket = OP_KETRMIN;
+  }
+ccbegin = cc;
+cc += GET(cc, 1);
+has_alternatives = *cc == OP_ALT;
+if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
+  has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
+if (opcode == OP_CBRA || opcode == OP_SCBRA)
+  offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
+if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
+  opcode = OP_SCOND;
+if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
+  opcode = OP_ONCE;
+
+alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
+
+/* Decoding the needs_control_head in framesize. */
+if (opcode == OP_ONCE)
+  {
+  needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
+  CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
+  }
+
+if (ket != OP_KET && repeat_type != 0)
+  {
+  /* TMP1 is used in OP_KETRMIN below. */
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+  free_stack(common, 1);
+  if (repeat_type == OP_UPTO)
+    OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
+  else
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, TMP1, 0);
+  }
+
+if (ket == OP_KETRMAX)
+  {
+  if (bra == OP_BRAZERO)
+    {
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+    free_stack(common, 1);
+    brazero = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0);
+    }
+  }
+else if (ket == OP_KETRMIN)
+  {
+  if (bra != OP_BRAMINZERO)
+    {
+    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+    if (repeat_type != 0)
+      {
+      /* TMP1 was set a few lines above. */
+      CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
+      /* Drop STR_PTR for non-greedy plus quantifier. */
+      if (opcode != OP_ONCE)
+        free_stack(common, 1);
+      }
+    else if (opcode >= OP_SBRA || opcode == OP_ONCE)
+      {
+      /* Checking zero-length iteration. */
+      if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
+        CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
+      else
+        {
+        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+        CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (CURRENT_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
+        }
+      /* Drop STR_PTR for non-greedy plus quantifier. */
+      if (opcode != OP_ONCE)
+        free_stack(common, 1);
+      }
+    else
+      JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
+    }
+  rmin_label = LABEL();
+  if (repeat_type != 0)
+    OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1);
+  }
+else if (bra == OP_BRAZERO)
+  {
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+  free_stack(common, 1);
+  brazero = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
+  }
+else if (repeat_type == OP_EXACT)
+  {
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1);
+  exact_label = LABEL();
+  }
+
+if (offset != 0)
+  {
+  if (common->capture_last_ptr != 0)
+    {
+    SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, TMP1, 0);
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
+    free_stack(common, 3);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP2, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
+    }
+  else if (common->optimized_cbracket[offset >> 1] == 0)
+    {
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
+    free_stack(common, 2);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0);
+    }
+  }
+
+if (SLJIT_UNLIKELY(opcode == OP_ONCE))
+  {
+  if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
+    {
+    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+    add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
+    }
+  once = JUMP(SLJIT_JUMP);
+  }
+else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
+  {
+  if (has_alternatives)
+    {
+    /* Always exactly one alternative. */
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+    free_stack(common, 1);
+
+    alt_max = 2;
+    alt1 = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
+    }
+  }
+else if (has_alternatives)
+  {
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+  free_stack(common, 1);
+
+  if (alt_max > 4)
+    {
+    /* Table jump if alt_max is greater than 4. */
+    sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM1(TMP1), (sljit_sw)common->read_only_data_ptr);
+    add_label_addr(common);
+    }
+  else
+    {
+    if (alt_max == 4)
+      alt2 = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
+    alt1 = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
+    }
+  }
+
+COMPILE_BACKTRACKINGPATH(current->top);
+if (current->topbacktracks)
+  set_jumps(current->topbacktracks, LABEL());
+
+if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
+  {
+  /* Conditional block always has at most one alternative. */
+  if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
+    {
+    SLJIT_ASSERT(has_alternatives);
+    assert = CURRENT_AS(bracket_backtrack)->u.assert;
+    if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
+      {
+      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->private_data_ptr);
+      add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_sw));
+      }
+    cond = JUMP(SLJIT_JUMP);
+    set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
+    }
+  else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
+    {
+    SLJIT_ASSERT(has_alternatives);
+    cond = JUMP(SLJIT_JUMP);
+    set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
+    }
+  else
+    SLJIT_ASSERT(!has_alternatives);
+  }
+
+if (has_alternatives)
+  {
+  alt_count = sizeof(sljit_uw);
+  do
+    {
+    current->top = NULL;
+    current->topbacktracks = NULL;
+    current->nextbacktracks = NULL;
+    /* Conditional blocks always have an additional alternative, even if it is empty. */
+    if (*cc == OP_ALT)
+      {
+      ccprev = cc + 1 + LINK_SIZE;
+      cc += GET(cc, 1);
+      if (opcode != OP_COND && opcode != OP_SCOND)
+        {
+        if (opcode != OP_ONCE)
+          {
+          if (private_data_ptr != 0)
+            OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+          else
+            OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+          }
+        else
+          OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
+        }
+      compile_matchingpath(common, ccprev, cc, current);
+      if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
+        return;
+      }
+
+    /* Instructions after the current alternative is successfully matched. */
+    /* There is a similar code in compile_bracket_matchingpath. */
+    if (opcode == OP_ONCE)
+      match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
+
+    stacksize = 0;
+    if (repeat_type == OP_MINUPTO)
+      {
+      /* We need to preserve the counter. TMP2 will be used below. */
+      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr);
+      stacksize++;
+      }
+    if (ket != OP_KET || bra != OP_BRA)
+      stacksize++;
+    if (offset != 0)
+      {
+      if (common->capture_last_ptr != 0)
+        stacksize++;
+      if (common->optimized_cbracket[offset >> 1] == 0)
+        stacksize += 2;
+      }
+    if (opcode != OP_ONCE)
+      stacksize++;
+
+    if (stacksize > 0)
+      allocate_stack(common, stacksize);
+
+    stacksize = 0;
+    if (repeat_type == OP_MINUPTO)
+      {
+      /* TMP2 was set above. */
+      OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
+      stacksize++;
+      }
+
+    if (ket != OP_KET || bra != OP_BRA)
+      {
+      if (ket != OP_KET)
+        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
+      else
+        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
+      stacksize++;
+      }
+
+    if (offset != 0)
+      stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
+
+    if (opcode != OP_ONCE)
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
+
+    if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
+      {
+      /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
+      SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
+      }
+
+    JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
+
+    if (opcode != OP_ONCE)
+      {
+      if (alt_max > 4)
+        add_label_addr(common);
+      else
+        {
+        if (alt_count != 2 * sizeof(sljit_uw))
+          {
+          JUMPHERE(alt1);
+          if (alt_max == 3 && alt_count == sizeof(sljit_uw))
+            alt2 = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
+          }
+        else
+          {
+          JUMPHERE(alt2);
+          if (alt_max == 4)
+            alt1 = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_uw));
+          }
+        }
+      alt_count += sizeof(sljit_uw);
+      }
+
+    COMPILE_BACKTRACKINGPATH(current->top);
+    if (current->topbacktracks)
+      set_jumps(current->topbacktracks, LABEL());
+    SLJIT_ASSERT(!current->nextbacktracks);
+    }
+  while (*cc == OP_ALT);
+
+  if (cond != NULL)
+    {
+    SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
+    assert = CURRENT_AS(bracket_backtrack)->u.assert;
+    if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
+      {
+      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->private_data_ptr);
+      add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_sw));
+      }
+    JUMPHERE(cond);
+    }
+
+  /* Free the STR_PTR. */
+  if (private_data_ptr == 0)
+    free_stack(common, 1);
+  }
+
+if (offset != 0)
+  {
+  /* Using both tmp register is better for instruction scheduling. */
+  if (common->optimized_cbracket[offset >> 1] != 0)
+    {
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
+    free_stack(common, 2);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0);
+    }
+  else
+    {
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+    free_stack(common, 1);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
+    }
+  }
+else if (opcode == OP_SBRA || opcode == OP_SCOND)
+  {
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
+  free_stack(common, 1);
+  }
+else if (opcode == OP_ONCE)
+  {
+  cc = ccbegin + GET(ccbegin, 1);
+  stacksize = needs_control_head ? 1 : 0;
+
+  if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
+    {
+    /* Reset head and drop saved frame. */
+    stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
+    }
+  else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
+    {
+    /* The STR_PTR must be released. */
+    stacksize++;
+    }
+  free_stack(common, stacksize);
+
+  JUMPHERE(once);
+  /* Restore previous private_data_ptr */
+  if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracket_backtrack)->u.framesize * sizeof(sljit_sw));
+  else if (ket == OP_KETRMIN)
+    {
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
+    /* See the comment below. */
+    free_stack(common, 2);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
+    }
+  }
+
+if (repeat_type == OP_EXACT)
+  {
+  OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, TMP1, 0);
+  CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
+  }
+else if (ket == OP_KETRMAX)
+  {
+  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+  if (bra != OP_BRAZERO)
+    free_stack(common, 1);
+
+  CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
+  if (bra == OP_BRAZERO)
+    {
+    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
+    JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
+    JUMPHERE(brazero);
+    free_stack(common, 1);
+    }
+  }
+else if (ket == OP_KETRMIN)
+  {
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+
+  /* OP_ONCE removes everything in case of a backtrack, so we don't
+  need to explicitly release the STR_PTR. The extra release would
+  affect badly the free_stack(2) above. */
+  if (opcode != OP_ONCE)
+    free_stack(common, 1);
+  CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
+  if (opcode == OP_ONCE)
+    free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
+  else if (bra == OP_BRAMINZERO)
+    free_stack(common, 1);
+  }
+else if (bra == OP_BRAZERO)
+  {
+  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+  JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
+  JUMPHERE(brazero);
+  }
+}
+
+static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
+{
+DEFINE_COMPILER;
+int offset;
+struct sljit_jump *jump;
+
+if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
+  {
+  if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
+    {
+    offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
+    if (common->capture_last_ptr != 0)
+      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0);
+    if (common->capture_last_ptr != 0)
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, TMP1, 0);
+    }
+  set_jumps(current->topbacktracks, LABEL());
+  free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
+  return;
+  }
+
+OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
+add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
+
+if (current->topbacktracks)
+  {
+  jump = JUMP(SLJIT_JUMP);
+  set_jumps(current->topbacktracks, LABEL());
+  /* Drop the stack frame. */
+  free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
+  JUMPHERE(jump);
+  }
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracketpos_backtrack)->framesize * sizeof(sljit_sw));
+}
+
+static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
+{
+assert_backtrack backtrack;
+
+current->top = NULL;
+current->topbacktracks = NULL;
+current->nextbacktracks = NULL;
+if (current->cc[1] > OP_ASSERTBACK_NOT)
+  {
+  /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
+  compile_bracket_matchingpath(common, current->cc, current);
+  compile_bracket_backtrackingpath(common, current->top);
+  }
+else
+  {
+  memset(&backtrack, 0, sizeof(backtrack));
+  backtrack.common.cc = current->cc;
+  backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
+  /* Manual call of compile_assert_matchingpath. */
+  compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
+  }
+SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks);
+}
+
+static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
+{
+DEFINE_COMPILER;
+pcre_uchar opcode = *current->cc;
+struct sljit_label *loop;
+struct sljit_jump *jump;
+
+if (opcode == OP_THEN || opcode == OP_THEN_ARG)
+  {
+  if (common->then_trap != NULL)
+    {
+    SLJIT_ASSERT(common->control_head_ptr != 0);
+
+    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
+    jump = JUMP(SLJIT_JUMP);
+
+    loop = LABEL();
+    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), -(int)sizeof(sljit_sw));
+    JUMPHERE(jump);
+    CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), -(int)(2 * sizeof(sljit_sw)), TMP1, 0, loop);
+    CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), -(int)(3 * sizeof(sljit_sw)), TMP2, 0, loop);
+    add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
+    return;
+    }
+  else if (common->positive_assert)
+    {
+    add_jump(compiler, &common->positive_assert_quit, JUMP(SLJIT_JUMP));
+    return;
+    }
+  }
+
+if (common->local_exit)
+  {
+  if (common->quit_label == NULL)
+    add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
+  else
+    JUMPTO(SLJIT_JUMP, common->quit_label);
+  return;
+  }
+
+if (opcode == OP_SKIP_ARG)
+  {
+  SLJIT_ASSERT(common->control_head_ptr != 0);
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
+  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
+  sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark));
+  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
+
+  OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
+  add_jump(compiler, &common->reset_match, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, -1));
+  return;
+  }
+
+if (opcode == OP_SKIP)
+  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+else
+  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
+add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
+}
+
+static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
+{
+DEFINE_COMPILER;
+struct sljit_jump *jump;
+int size;
+
+if (CURRENT_AS(then_trap_backtrack)->then_trap)
+  {
+  common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
+  return;
+  }
+
+size = CURRENT_AS(then_trap_backtrack)->framesize;
+size = 3 + (size < 0 ? 0 : size);
+
+OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
+free_stack(common, size);
+jump = JUMP(SLJIT_JUMP);
+
+set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
+/* STACK_TOP is set by THEN. */
+if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
+  add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
+OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+free_stack(common, 3);
+
+JUMPHERE(jump);
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP1, 0);
+}
+
+static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
+{
+DEFINE_COMPILER;
+then_trap_backtrack *save_then_trap = common->then_trap;
+
+while (current)
+  {
+  if (current->nextbacktracks != NULL)
+    set_jumps(current->nextbacktracks, LABEL());
+  switch(*current->cc)
+    {
+    case OP_SET_SOM:
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+    free_stack(common, 1);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP1, 0);
+    break;
+
+    case OP_STAR:
+    case OP_MINSTAR:
+    case OP_PLUS:
+    case OP_MINPLUS:
+    case OP_QUERY:
+    case OP_MINQUERY:
+    case OP_UPTO:
+    case OP_MINUPTO:
+    case OP_EXACT:
+    case OP_POSSTAR:
+    case OP_POSPLUS:
+    case OP_POSQUERY:
+    case OP_POSUPTO:
+    case OP_STARI:
+    case OP_MINSTARI:
+    case OP_PLUSI:
+    case OP_MINPLUSI:
+    case OP_QUERYI:
+    case OP_MINQUERYI:
+    case OP_UPTOI:
+    case OP_MINUPTOI:
+    case OP_EXACTI:
+    case OP_POSSTARI:
+    case OP_POSPLUSI:
+    case OP_POSQUERYI:
+    case OP_POSUPTOI:
+    case OP_NOTSTAR:
+    case OP_NOTMINSTAR:
+    case OP_NOTPLUS:
+    case OP_NOTMINPLUS:
+    case OP_NOTQUERY:
+    case OP_NOTMINQUERY:
+    case OP_NOTUPTO:
+    case OP_NOTMINUPTO:
+    case OP_NOTEXACT:
+    case OP_NOTPOSSTAR:
+    case OP_NOTPOSPLUS:
+    case OP_NOTPOSQUERY:
+    case OP_NOTPOSUPTO:
+    case OP_NOTSTARI:
+    case OP_NOTMINSTARI:
+    case OP_NOTPLUSI:
+    case OP_NOTMINPLUSI:
+    case OP_NOTQUERYI:
+    case OP_NOTMINQUERYI:
+    case OP_NOTUPTOI:
+    case OP_NOTMINUPTOI:
+    case OP_NOTEXACTI:
+    case OP_NOTPOSSTARI:
+    case OP_NOTPOSPLUSI:
+    case OP_NOTPOSQUERYI:
+    case OP_NOTPOSUPTOI:
+    case OP_TYPESTAR:
+    case OP_TYPEMINSTAR:
+    case OP_TYPEPLUS:
+    case OP_TYPEMINPLUS:
+    case OP_TYPEQUERY:
+    case OP_TYPEMINQUERY:
+    case OP_TYPEUPTO:
+    case OP_TYPEMINUPTO:
+    case OP_TYPEEXACT:
+    case OP_TYPEPOSSTAR:
+    case OP_TYPEPOSPLUS:
+    case OP_TYPEPOSQUERY:
+    case OP_TYPEPOSUPTO:
+    case OP_CLASS:
+    case OP_NCLASS:
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+    case OP_XCLASS:
+#endif
+    compile_iterator_backtrackingpath(common, current);
+    break;
+
+    case OP_REF:
+    case OP_REFI:
+    case OP_DNREF:
+    case OP_DNREFI:
+    compile_ref_iterator_backtrackingpath(common, current);
+    break;
+
+    case OP_RECURSE:
+    compile_recurse_backtrackingpath(common, current);
+    break;
+
+    case OP_ASSERT:
+    case OP_ASSERT_NOT:
+    case OP_ASSERTBACK:
+    case OP_ASSERTBACK_NOT:
+    compile_assert_backtrackingpath(common, current);
+    break;
+
+    case OP_ONCE:
+    case OP_ONCE_NC:
+    case OP_BRA:
+    case OP_CBRA:
+    case OP_COND:
+    case OP_SBRA:
+    case OP_SCBRA:
+    case OP_SCOND:
+    compile_bracket_backtrackingpath(common, current);
+    break;
+
+    case OP_BRAZERO:
+    if (current->cc[1] > OP_ASSERTBACK_NOT)
+      compile_bracket_backtrackingpath(common, current);
+    else
+      compile_assert_backtrackingpath(common, current);
+    break;
+
+    case OP_BRAPOS:
+    case OP_CBRAPOS:
+    case OP_SBRAPOS:
+    case OP_SCBRAPOS:
+    case OP_BRAPOSZERO:
+    compile_bracketpos_backtrackingpath(common, current);
+    break;
+
+    case OP_BRAMINZERO:
+    compile_braminzero_backtrackingpath(common, current);
+    break;
+
+    case OP_MARK:
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
+    if (common->has_skip_arg)
+      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+    free_stack(common, common->has_skip_arg ? 5 : 1);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP1, 0);
+    if (common->has_skip_arg)
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP2, 0);
+    break;
+
+    case OP_THEN:
+    case OP_THEN_ARG:
+    case OP_PRUNE:
+    case OP_PRUNE_ARG:
+    case OP_SKIP:
+    case OP_SKIP_ARG:
+    compile_control_verb_backtrackingpath(common, current);
+    break;
+
+    case OP_COMMIT:
+    if (!common->local_exit)
+      OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
+    if (common->quit_label == NULL)
+      add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
+    else
+      JUMPTO(SLJIT_JUMP, common->quit_label);
+    break;
+
+    case OP_CALLOUT:
+    case OP_FAIL:
+    case OP_ACCEPT:
+    case OP_ASSERT_ACCEPT:
+    set_jumps(current->topbacktracks, LABEL());
+    break;
+
+    case OP_THEN_TRAP:
+    /* A virtual opcode for then traps. */
+    compile_then_trap_backtrackingpath(common, current);
+    break;
+
+    default:
+    SLJIT_ASSERT_STOP();
+    break;
+    }
+  current = current->prev;
+  }
+common->then_trap = save_then_trap;
+}
+
+static SLJIT_INLINE void compile_recurse(compiler_common *common)
+{
+DEFINE_COMPILER;
+pcre_uchar *cc = common->start + common->currententry->start;
+pcre_uchar *ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
+pcre_uchar *ccend = bracketend(cc);
+BOOL needs_control_head;
+int framesize = get_framesize(common, cc, NULL, TRUE, &needs_control_head);
+int private_data_size = get_private_data_copy_length(common, ccbegin, ccend, needs_control_head);
+int alternativesize;
+BOOL needs_frame;
+backtrack_common altbacktrack;
+struct sljit_jump *jump;
+
+/* Recurse captures then. */
+common->then_trap = NULL;
+
+SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
+needs_frame = framesize >= 0;
+if (!needs_frame)
+  framesize = 0;
+alternativesize = *(cc + GET(cc, 1)) == OP_ALT ? 1 : 0;
+
+SLJIT_ASSERT(common->currententry->entry == NULL && common->recursive_head_ptr != 0);
+common->currententry->entry = LABEL();
+set_jumps(common->currententry->calls, common->currententry->entry);
+
+sljit_emit_fast_enter(compiler, TMP2, 0);
+allocate_stack(common, private_data_size + framesize + alternativesize);
+OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(private_data_size + framesize + alternativesize - 1), TMP2, 0);
+copy_private_data(common, ccbegin, ccend, TRUE, private_data_size + framesize + alternativesize, framesize + alternativesize, needs_control_head);
+if (needs_control_head)
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head_ptr, STACK_TOP, 0);
+if (needs_frame)
+  init_frame(common, cc, NULL, framesize + alternativesize - 1, alternativesize, TRUE);
+
+if (alternativesize > 0)
+  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
+
+memset(&altbacktrack, 0, sizeof(backtrack_common));
+common->quit_label = NULL;
+common->accept_label = NULL;
+common->quit = NULL;
+common->accept = NULL;
+altbacktrack.cc = ccbegin;
+cc += GET(cc, 1);
+while (1)
+  {
+  altbacktrack.top = NULL;
+  altbacktrack.topbacktracks = NULL;
+
+  if (altbacktrack.cc != ccbegin)
+    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+
+  compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
+  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
+    return;
+
+  add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
+
+  compile_backtrackingpath(common, altbacktrack.top);
+  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
+    return;
+  set_jumps(altbacktrack.topbacktracks, LABEL());
+
+  if (*cc != OP_ALT)
+    break;
+
+  altbacktrack.cc = cc + 1 + LINK_SIZE;
+  cc += GET(cc, 1);
+  }
+
+/* None of them matched. */
+OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
+jump = JUMP(SLJIT_JUMP);
+
+if (common->quit != NULL)
+  {
+  set_jumps(common->quit, LABEL());
+  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head_ptr);
+  if (needs_frame)
+    {
+    OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
+    add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
+    OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
+    }
+  OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
+  common->quit = NULL;
+  add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
+  }
+
+set_jumps(common->accept, LABEL());
+OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head_ptr);
+if (needs_frame)
+  {
+  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
+  add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
+  OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
+  }
+OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);
+
+JUMPHERE(jump);
+if (common->quit != NULL)
+  set_jumps(common->quit, LABEL());
+copy_private_data(common, ccbegin, ccend, FALSE, private_data_size + framesize + alternativesize, framesize + alternativesize, needs_control_head);
+free_stack(common, private_data_size + framesize + alternativesize);
+if (needs_control_head)
+  {
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 2 * sizeof(sljit_sw));
+  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_sw));
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head_ptr, TMP1, 0);
+  OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP2, 0);
+  }
+else
+  {
+  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_sw));
+  OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head_ptr, TMP2, 0);
+  }
+sljit_emit_fast_return(compiler, SLJIT_MEM1(STACK_TOP), 0);
+}
+
+#undef COMPILE_BACKTRACKINGPATH
+#undef CURRENT_AS
+
+void
+PRIV(jit_compile)(const REAL_PCRE *re, PUBL(extra) *extra, int mode)
+{
+struct sljit_compiler *compiler;
+backtrack_common rootbacktrack;
+compiler_common common_data;
+compiler_common *common = &common_data;
+const pcre_uint8 *tables = re->tables;
+pcre_study_data *study;
+int private_data_size;
+pcre_uchar *ccend;
+executable_functions *functions;
+void *executable_func;
+sljit_uw executable_size;
+sljit_uw total_length;
+label_addr_list *label_addr;
+struct sljit_label *mainloop_label = NULL;
+struct sljit_label *continue_match_label;
+struct sljit_label *empty_match_found_label = NULL;
+struct sljit_label *empty_match_backtrack_label = NULL;
+struct sljit_label *reset_match_label;
+struct sljit_label *quit_label;
+struct sljit_jump *jump;
+struct sljit_jump *minlength_check_failed = NULL;
+struct sljit_jump *reqbyte_notfound = NULL;
+struct sljit_jump *empty_match = NULL;
+
+SLJIT_ASSERT((extra->flags & PCRE_EXTRA_STUDY_DATA) != 0);
+study = extra->study_data;
+
+if (!tables)
+  tables = PRIV(default_tables);
+
+memset(&rootbacktrack, 0, sizeof(backtrack_common));
+memset(common, 0, sizeof(compiler_common));
+rootbacktrack.cc = (pcre_uchar *)re + re->name_table_offset + re->name_count * re->name_entry_size;
+
+common->start = rootbacktrack.cc;
+common->read_only_data = NULL;
+common->read_only_data_size = 0;
+common->read_only_data_ptr = NULL;
+common->fcc = tables + fcc_offset;
+common->lcc = (sljit_sw)(tables + lcc_offset);
+common->mode = mode;
+common->might_be_empty = study->minlength == 0;
+common->nltype = NLTYPE_FIXED;
+switch(re->options & PCRE_NEWLINE_BITS)
+  {
+  case 0:
+  /* Compile-time default */
+  switch(NEWLINE)
+    {
+    case -1: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
+    case -2: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
+    default: common->newline = NEWLINE; break;
+    }
+  break;
+  case PCRE_NEWLINE_CR: common->newline = CHAR_CR; break;
+  case PCRE_NEWLINE_LF: common->newline = CHAR_NL; break;
+  case PCRE_NEWLINE_CR+
+       PCRE_NEWLINE_LF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
+  case PCRE_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
+  case PCRE_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
+  default: return;
+  }
+common->nlmax = READ_CHAR_MAX;
+common->nlmin = 0;
+if ((re->options & PCRE_BSR_ANYCRLF) != 0)
+  common->bsr_nltype = NLTYPE_ANYCRLF;
+else if ((re->options & PCRE_BSR_UNICODE) != 0)
+  common->bsr_nltype = NLTYPE_ANY;
+else
+  {
+#ifdef BSR_ANYCRLF
+  common->bsr_nltype = NLTYPE_ANYCRLF;
+#else
+  common->bsr_nltype = NLTYPE_ANY;
+#endif
+  }
+common->bsr_nlmax = READ_CHAR_MAX;
+common->bsr_nlmin = 0;
+common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
+common->ctypes = (sljit_sw)(tables + ctypes_offset);
+common->name_table = ((pcre_uchar *)re) + re->name_table_offset;
+common->name_count = re->name_count;
+common->name_entry_size = re->name_entry_size;
+common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
+#ifdef SUPPORT_UTF
+/* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
+common->utf = (re->options & PCRE_UTF8) != 0;
+#ifdef SUPPORT_UCP
+common->use_ucp = (re->options & PCRE_UCP) != 0;
+#endif
+if (common->utf)
+  {
+  if (common->nltype == NLTYPE_ANY)
+    common->nlmax = 0x2029;
+  else if (common->nltype == NLTYPE_ANYCRLF)
+    common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
+  else
+    {
+    /* We only care about the first newline character. */
+    common->nlmax = common->newline & 0xff;
+    }
+
+  if (common->nltype == NLTYPE_FIXED)
+    common->nlmin = common->newline & 0xff;
+  else
+    common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
+
+  if (common->bsr_nltype == NLTYPE_ANY)
+    common->bsr_nlmax = 0x2029;
+  else
+    common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
+  common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
+  }
+#endif /* SUPPORT_UTF */
+ccend = bracketend(common->start);
+
+/* Calculate the local space size on the stack. */
+common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
+common->optimized_cbracket = (pcre_uint8 *)SLJIT_MALLOC(re->top_bracket + 1);
+if (!common->optimized_cbracket)
+  return;
+#if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
+memset(common->optimized_cbracket, 0, re->top_bracket + 1);
+#else
+memset(common->optimized_cbracket, 1, re->top_bracket + 1);
+#endif
+
+SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
+#if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
+common->capture_last_ptr = common->ovector_start;
+common->ovector_start += sizeof(sljit_sw);
+#endif
+if (!check_opcode_types(common, common->start, ccend))
+  {
+  SLJIT_FREE(common->optimized_cbracket);
+  return;
+  }
+
+/* Checking flags and updating ovector_start. */
+if (mode == JIT_COMPILE && (re->flags & PCRE_REQCHSET) != 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
+  {
+  common->req_char_ptr = common->ovector_start;
+  common->ovector_start += sizeof(sljit_sw);
+  }
+if (mode != JIT_COMPILE)
+  {
+  common->start_used_ptr = common->ovector_start;
+  common->ovector_start += sizeof(sljit_sw);
+  if (mode == JIT_PARTIAL_SOFT_COMPILE)
+    {
+    common->hit_start = common->ovector_start;
+    common->ovector_start += 2 * sizeof(sljit_sw);
+    }
+  else
+    {
+    SLJIT_ASSERT(mode == JIT_PARTIAL_HARD_COMPILE);
+    common->needs_start_ptr = TRUE;
+    }
+  }
+if ((re->options & PCRE_FIRSTLINE) != 0)
+  {
+  common->first_line_end = common->ovector_start;
+  common->ovector_start += sizeof(sljit_sw);
+  }
+#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
+common->control_head_ptr = 1;
+#endif
+if (common->control_head_ptr != 0)
+  {
+  common->control_head_ptr = common->ovector_start;
+  common->ovector_start += sizeof(sljit_sw);
+  }
+if (common->needs_start_ptr && common->has_set_som)
+  {
+  /* Saving the real start pointer is necessary. */
+  common->start_ptr = common->ovector_start;
+  common->ovector_start += sizeof(sljit_sw);
+  }
+else
+  common->needs_start_ptr = FALSE;
+
+/* Aligning ovector to even number of sljit words. */
+if ((common->ovector_start & sizeof(sljit_sw)) != 0)
+  common->ovector_start += sizeof(sljit_sw);
+
+if (common->start_ptr == 0)
+  common->start_ptr = OVECTOR(0);
+
+/* Capturing brackets cannot be optimized if callouts are allowed. */
+if (common->capture_last_ptr != 0)
+  memset(common->optimized_cbracket, 0, re->top_bracket + 1);
+
+SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
+common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
+
+total_length = ccend - common->start;
+common->private_data_ptrs = (sljit_si *)SLJIT_MALLOC(total_length * (sizeof(sljit_si) + (common->has_then ? 1 : 0)));
+if (!common->private_data_ptrs)
+  {
+  SLJIT_FREE(common->optimized_cbracket);
+  return;
+  }
+memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_si));
+
+private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
+set_private_data_ptrs(common, &private_data_size, ccend);
+if (private_data_size > SLJIT_MAX_LOCAL_SIZE)
+  {
+  SLJIT_FREE(common->private_data_ptrs);
+  SLJIT_FREE(common->optimized_cbracket);
+  return;
+  }
+
+if (common->has_then)
+  {
+  common->then_offsets = (pcre_uint8 *)(common->private_data_ptrs + total_length);
+  memset(common->then_offsets, 0, total_length);
+  set_then_offsets(common, common->start, NULL);
+  }
+
+if (common->read_only_data_size > 0)
+  {
+  common->read_only_data = (sljit_uw *)SLJIT_MALLOC(common->read_only_data_size);
+  if (common->read_only_data == NULL)
+    {
+    SLJIT_FREE(common->optimized_cbracket);
+    SLJIT_FREE(common->private_data_ptrs);
+    return;
+    }
+  common->read_only_data_ptr = common->read_only_data;
+  }
+
+compiler = sljit_create_compiler();
+if (!compiler)
+  {
+  SLJIT_FREE(common->optimized_cbracket);
+  SLJIT_FREE(common->private_data_ptrs);
+  if (common->read_only_data)
+    SLJIT_FREE(common->read_only_data);
+  return;
+  }
+common->compiler = compiler;
+
+/* Main pcre_jit_exec entry. */
+sljit_emit_enter(compiler, 1, 5, 5, private_data_size);
+
+/* Register init. */
+reset_ovector(common, (re->top_bracket + 1) * 2);
+if (common->req_char_ptr != 0)
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, SLJIT_SCRATCH_REG1, 0);
+
+OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_SAVED_REG1, 0);
+OP1(SLJIT_MOV, TMP1, 0, SLJIT_SAVED_REG1, 0);
+OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
+OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
+OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
+OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
+OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, base));
+OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, limit));
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LIMIT_MATCH, TMP1, 0);
+
+if (mode == JIT_PARTIAL_SOFT_COMPILE)
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
+if (common->mark_ptr != 0)
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
+if (common->control_head_ptr != 0)
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
+
+/* Main part of the matching */
+if ((re->options & PCRE_ANCHORED) == 0)
+  {
+  mainloop_label = mainloop_entry(common, (re->flags & PCRE_HASCRORLF) != 0, (re->options & PCRE_FIRSTLINE) != 0);
+  continue_match_label = LABEL();
+  /* Forward search if possible. */
+  if ((re->options & PCRE_NO_START_OPTIMIZE) == 0)
+    {
+    if (mode == JIT_COMPILE && fast_forward_first_n_chars(common, (re->options & PCRE_FIRSTLINE) != 0))
+      {
+      /* If read_only_data is reallocated, we might have an allocation failure. */
+      if (common->read_only_data_size > 0 && common->read_only_data == NULL)
+        {
+        sljit_free_compiler(compiler);
+        SLJIT_FREE(common->optimized_cbracket);
+        SLJIT_FREE(common->private_data_ptrs);
+        return;
+        }
+      }
+    else if ((re->flags & PCRE_FIRSTSET) != 0)
+      fast_forward_first_char(common, (pcre_uchar)re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0, (re->options & PCRE_FIRSTLINE) != 0);
+    else if ((re->flags & PCRE_STARTLINE) != 0)
+      fast_forward_newline(common, (re->options & PCRE_FIRSTLINE) != 0);
+    else if ((re->flags & PCRE_STARTLINE) == 0 && study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)
+      fast_forward_start_bits(common, study->start_bits, (re->options & PCRE_FIRSTLINE) != 0);
+    }
+  }
+else
+  continue_match_label = LABEL();
+
+if (mode == JIT_COMPILE && study->minlength > 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
+  {
+  OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
+  OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength));
+  minlength_check_failed = CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0);
+  }
+if (common->req_char_ptr != 0)
+  reqbyte_notfound = search_requested_char(common, (pcre_uchar)re->req_char, (re->flags & PCRE_RCH_CASELESS) != 0, (re->flags & PCRE_FIRSTSET) != 0);
+
+/* Store the current STR_PTR in OVECTOR(0). */
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
+/* Copy the limit of allowed recursions. */
+OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LIMIT_MATCH);
+if (common->capture_last_ptr != 0)
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, -1);
+
+if (common->needs_start_ptr)
+  {
+  SLJIT_ASSERT(common->start_ptr != OVECTOR(0));
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr, STR_PTR, 0);
+  }
+else
+  SLJIT_ASSERT(common->start_ptr == OVECTOR(0));
+
+/* Copy the beginning of the string. */
+if (mode == JIT_PARTIAL_SOFT_COMPILE)
+  {
+  jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start + sizeof(sljit_sw), STR_PTR, 0);
+  JUMPHERE(jump);
+  }
+else if (mode == JIT_PARTIAL_HARD_COMPILE)
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
+
+compile_matchingpath(common, common->start, ccend, &rootbacktrack);
+if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
+  {
+  sljit_free_compiler(compiler);
+  SLJIT_FREE(common->optimized_cbracket);
+  SLJIT_FREE(common->private_data_ptrs);
+  if (common->read_only_data)
+    SLJIT_FREE(common->read_only_data);
+  return;
+  }
+
+if (common->might_be_empty)
+  {
+  empty_match = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
+  empty_match_found_label = LABEL();
+  }
+
+common->accept_label = LABEL();
+if (common->accept != NULL)
+  set_jumps(common->accept, common->accept_label);
+
+/* This means we have a match. Update the ovector. */
+copy_ovector(common, re->top_bracket + 1);
+common->quit_label = common->forced_quit_label = LABEL();
+if (common->quit != NULL)
+  set_jumps(common->quit, common->quit_label);
+if (common->forced_quit != NULL)
+  set_jumps(common->forced_quit, common->forced_quit_label);
+if (minlength_check_failed != NULL)
+  SET_LABEL(minlength_check_failed, common->forced_quit_label);
+sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
+
+if (mode != JIT_COMPILE)
+  {
+  common->partialmatchlabel = LABEL();
+  set_jumps(common->partialmatch, common->partialmatchlabel);
+  return_with_partial_match(common, common->quit_label);
+  }
+
+if (common->might_be_empty)
+  empty_match_backtrack_label = LABEL();
+compile_backtrackingpath(common, rootbacktrack.top);
+if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
+  {
+  sljit_free_compiler(compiler);
+  SLJIT_FREE(common->optimized_cbracket);
+  SLJIT_FREE(common->private_data_ptrs);
+  if (common->read_only_data)
+    SLJIT_FREE(common->read_only_data);
+  return;
+  }
+
+SLJIT_ASSERT(rootbacktrack.prev == NULL);
+reset_match_label = LABEL();
+
+if (mode == JIT_PARTIAL_SOFT_COMPILE)
+  {
+  /* Update hit_start only in the first time. */
+  jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, TMP1, 0);
+  JUMPHERE(jump);
+  }
+
+/* Check we have remaining characters. */
+if ((re->options & PCRE_ANCHORED) == 0 && (re->options & PCRE_FIRSTLINE) != 0)
+  {
+  SLJIT_ASSERT(common->first_line_end != 0);
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
+  }
+
+OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr);
+
+if ((re->options & PCRE_ANCHORED) == 0)
+  {
+  if (common->ff_newline_shortcut != NULL)
+    {
+    if ((re->options & PCRE_FIRSTLINE) == 0)
+      CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
+    /* There cannot be more newlines here. */
+    }
+  else
+    {
+    if ((re->options & PCRE_FIRSTLINE) == 0)
+      CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop_label);
+    else
+      CMPTO(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0, mainloop_label);
+    }
+  }
+
+/* No more remaining characters. */
+if (reqbyte_notfound != NULL)
+  JUMPHERE(reqbyte_notfound);
+
+if (mode == JIT_PARTIAL_SOFT_COMPILE)
+  CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
+
+OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
+JUMPTO(SLJIT_JUMP, common->quit_label);
+
+flush_stubs(common);
+
+if (common->might_be_empty)
+  {
+  JUMPHERE(empty_match);
+  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
+  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
+  CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_backtrack_label);
+  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
+  CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_found_label);
+  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
+  CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
+  JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
+  }
+
+common->currententry = common->entries;
+common->local_exit = TRUE;
+quit_label = common->quit_label;
+while (common->currententry != NULL)
+  {
+  /* Might add new entries. */
+  compile_recurse(common);
+  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
+    {
+    sljit_free_compiler(compiler);
+    SLJIT_FREE(common->optimized_cbracket);
+    SLJIT_FREE(common->private_data_ptrs);
+    if (common->read_only_data)
+      SLJIT_FREE(common->read_only_data);
+    return;
+    }
+  flush_stubs(common);
+  common->currententry = common->currententry->next;
+  }
+common->local_exit = FALSE;
+common->quit_label = quit_label;
+
+/* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
+/* This is a (really) rare case. */
+set_jumps(common->stackalloc, LABEL());
+/* RETURN_ADDR is not a saved register. */
+sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
+OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
+OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
+OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top), STACK_TOP, 0);
+OP2(SLJIT_ADD, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit), SLJIT_IMM, STACK_GROWTH_RATE);
+
+sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize));
+jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
+OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
+OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
+OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top));
+OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit));
+OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
+sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
+
+/* Allocation failed. */
+JUMPHERE(jump);
+/* We break the return address cache here, but this is a really rare case. */
+OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_JIT_STACKLIMIT);
+JUMPTO(SLJIT_JUMP, common->quit_label);
+
+/* Call limit reached. */
+set_jumps(common->calllimit, LABEL());
+OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_MATCHLIMIT);
+JUMPTO(SLJIT_JUMP, common->quit_label);
+
+if (common->revertframes != NULL)
+  {
+  set_jumps(common->revertframes, LABEL());
+  do_revertframes(common);
+  }
+if (common->wordboundary != NULL)
+  {
+  set_jumps(common->wordboundary, LABEL());
+  check_wordboundary(common);
+  }
+if (common->anynewline != NULL)
+  {
+  set_jumps(common->anynewline, LABEL());
+  check_anynewline(common);
+  }
+if (common->hspace != NULL)
+  {
+  set_jumps(common->hspace, LABEL());
+  check_hspace(common);
+  }
+if (common->vspace != NULL)
+  {
+  set_jumps(common->vspace, LABEL());
+  check_vspace(common);
+  }
+if (common->casefulcmp != NULL)
+  {
+  set_jumps(common->casefulcmp, LABEL());
+  do_casefulcmp(common);
+  }
+if (common->caselesscmp != NULL)
+  {
+  set_jumps(common->caselesscmp, LABEL());
+  do_caselesscmp(common);
+  }
+if (common->reset_match != NULL)
+  {
+  set_jumps(common->reset_match, LABEL());
+  do_reset_match(common, (re->top_bracket + 1) * 2);
+  CMPTO(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
+  OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
+  JUMPTO(SLJIT_JUMP, reset_match_label);
+  }
+#ifdef SUPPORT_UTF
+#ifdef COMPILE_PCRE8
+if (common->utfreadchar != NULL)
+  {
+  set_jumps(common->utfreadchar, LABEL());
+  do_utfreadchar(common);
+  }
+if (common->utfreadchar16 != NULL)
+  {
+  set_jumps(common->utfreadchar16, LABEL());
+  do_utfreadchar16(common);
+  }
+if (common->utfreadtype8 != NULL)
+  {
+  set_jumps(common->utfreadtype8, LABEL());
+  do_utfreadtype8(common);
+  }
+#endif /* COMPILE_PCRE8 */
+#endif /* SUPPORT_UTF */
+#ifdef SUPPORT_UCP
+if (common->getucd != NULL)
+  {
+  set_jumps(common->getucd, LABEL());
+  do_getucd(common);
+  }
+#endif
+
+SLJIT_ASSERT(common->read_only_data + (common->read_only_data_size >> SLJIT_WORD_SHIFT) == common->read_only_data_ptr);
+SLJIT_FREE(common->optimized_cbracket);
+SLJIT_FREE(common->private_data_ptrs);
+
+executable_func = sljit_generate_code(compiler);
+executable_size = sljit_get_generated_code_size(compiler);
+label_addr = common->label_addrs;
+while (label_addr != NULL)
+  {
+  *label_addr->addr = sljit_get_label_addr(label_addr->label);
+  label_addr = label_addr->next;
+  }
+sljit_free_compiler(compiler);
+if (executable_func == NULL)
+  {
+  if (common->read_only_data)
+    SLJIT_FREE(common->read_only_data);
+  return;
+  }
+
+/* Reuse the function descriptor if possible. */
+if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && extra->executable_jit != NULL)
+  functions = (executable_functions *)extra->executable_jit;
+else
+  {
+  /* Note: If your memory-checker has flagged the allocation below as a
+   * memory leak, it is probably because you either forgot to call
+   * pcre_free_study() (or pcre16_free_study()) on the pcre_extra (or
+   * pcre16_extra) object, or you called said function after having
+   * cleared the PCRE_EXTRA_EXECUTABLE_JIT bit from the "flags" field
+   * of the object. (The function will only free the JIT data if the
+   * bit remains set, as the bit indicates that the pointer to the data
+   * is valid.)
+   */
+  functions = SLJIT_MALLOC(sizeof(executable_functions));
+  if (functions == NULL)
+    {
+    /* This case is highly unlikely since we just recently
+    freed a lot of memory. Not impossible though. */
+    sljit_free_code(executable_func);
+    if (common->read_only_data)
+      SLJIT_FREE(common->read_only_data);
+    return;
+    }
+  memset(functions, 0, sizeof(executable_functions));
+  functions->top_bracket = (re->top_bracket + 1) * 2;
+  functions->limit_match = (re->flags & PCRE_MLSET) != 0 ? re->limit_match : 0;
+  extra->executable_jit = functions;
+  extra->flags |= PCRE_EXTRA_EXECUTABLE_JIT;
+  }
+
+functions->executable_funcs[mode] = executable_func;
+functions->read_only_data[mode] = common->read_only_data;
+functions->executable_sizes[mode] = executable_size;
+}
+
+static int jit_machine_stack_exec(jit_arguments *arguments, void* executable_func)
+{
+union {
+   void* executable_func;
+   jit_function call_executable_func;
+} convert_executable_func;
+pcre_uint8 local_space[MACHINE_STACK_SIZE];
+struct sljit_stack local_stack;
+
+local_stack.top = (sljit_sw)&local_space;
+local_stack.base = local_stack.top;
+local_stack.limit = local_stack.base + MACHINE_STACK_SIZE;
+local_stack.max_limit = local_stack.limit;
+arguments->stack = &local_stack;
+convert_executable_func.executable_func = executable_func;
+return convert_executable_func.call_executable_func(arguments);
+}
+
+int
+PRIV(jit_exec)(const PUBL(extra) *extra_data, const pcre_uchar *subject,
+  int length, int start_offset, int options, int *offsets, int offset_count)
+{
+executable_functions *functions = (executable_functions *)extra_data->executable_jit;
+union {
+   void* executable_func;
+   jit_function call_executable_func;
+} convert_executable_func;
+jit_arguments arguments;
+int max_offset_count;
+int retval;
+int mode = JIT_COMPILE;
+
+if ((options & PCRE_PARTIAL_HARD) != 0)
+  mode = JIT_PARTIAL_HARD_COMPILE;
+else if ((options & PCRE_PARTIAL_SOFT) != 0)
+  mode = JIT_PARTIAL_SOFT_COMPILE;
+
+if (functions->executable_funcs[mode] == NULL)
+  return PCRE_ERROR_JIT_BADOPTION;
+
+/* Sanity checks should be handled by pcre_exec. */
+arguments.str = subject + start_offset;
+arguments.begin = subject;
+arguments.end = subject + length;
+arguments.mark_ptr = NULL;
+/* JIT decreases this value less frequently than the interpreter. */
+arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (pcre_uint32)(extra_data->match_limit);
+if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match)
+  arguments.limit_match = functions->limit_match;
+arguments.notbol = (options & PCRE_NOTBOL) != 0;
+arguments.noteol = (options & PCRE_NOTEOL) != 0;
+arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
+arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
+arguments.offsets = offsets;
+arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL;
+arguments.real_offset_count = offset_count;
+
+/* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of
+the output vector for storing captured strings, with the remainder used as
+workspace. We don't need the workspace here. For compatibility, we limit the
+number of captured strings in the same way as pcre_exec(), so that the user
+gets the same result with and without JIT. */
+
+if (offset_count != 2)
+  offset_count = ((offset_count - (offset_count % 3)) * 2) / 3;
+max_offset_count = functions->top_bracket;
+if (offset_count > max_offset_count)
+  offset_count = max_offset_count;
+arguments.offset_count = offset_count;
+
+if (functions->callback)
+  arguments.stack = (struct sljit_stack *)functions->callback(functions->userdata);
+else
+  arguments.stack = (struct sljit_stack *)functions->userdata;
+
+if (arguments.stack == NULL)
+  retval = jit_machine_stack_exec(&arguments, functions->executable_funcs[mode]);
+else
+  {
+  convert_executable_func.executable_func = functions->executable_funcs[mode];
+  retval = convert_executable_func.call_executable_func(&arguments);
+  }
+
+if (retval * 2 > offset_count)
+  retval = 0;
+if ((extra_data->flags & PCRE_EXTRA_MARK) != 0)
+  *(extra_data->mark) = arguments.mark_ptr;
+
+return retval;
+}
+
+#if defined COMPILE_PCRE8
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre_jit_exec(const pcre *argument_re, const pcre_extra *extra_data,
+  PCRE_SPTR subject, int length, int start_offset, int options,
+  int *offsets, int offset_count, pcre_jit_stack *stack)
+#elif defined COMPILE_PCRE16
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre16_jit_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
+  PCRE_SPTR16 subject, int length, int start_offset, int options,
+  int *offsets, int offset_count, pcre16_jit_stack *stack)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre32_jit_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
+  PCRE_SPTR32 subject, int length, int start_offset, int options,
+  int *offsets, int offset_count, pcre32_jit_stack *stack)
+#endif
+{
+pcre_uchar *subject_ptr = (pcre_uchar *)subject;
+executable_functions *functions = (executable_functions *)extra_data->executable_jit;
+union {
+   void* executable_func;
+   jit_function call_executable_func;
+} convert_executable_func;
+jit_arguments arguments;
+int max_offset_count;
+int retval;
+int mode = JIT_COMPILE;
+
+SLJIT_UNUSED_ARG(argument_re);
+
+/* Plausibility checks */
+if ((options & ~PUBLIC_JIT_EXEC_OPTIONS) != 0) return PCRE_ERROR_JIT_BADOPTION;
+
+if ((options & PCRE_PARTIAL_HARD) != 0)
+  mode = JIT_PARTIAL_HARD_COMPILE;
+else if ((options & PCRE_PARTIAL_SOFT) != 0)
+  mode = JIT_PARTIAL_SOFT_COMPILE;
+
+if (functions->executable_funcs[mode] == NULL)
+  return PCRE_ERROR_JIT_BADOPTION;
+
+/* Sanity checks should be handled by pcre_exec. */
+arguments.stack = (struct sljit_stack *)stack;
+arguments.str = subject_ptr + start_offset;
+arguments.begin = subject_ptr;
+arguments.end = subject_ptr + length;
+arguments.mark_ptr = NULL;
+/* JIT decreases this value less frequently than the interpreter. */
+arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (pcre_uint32)(extra_data->match_limit);
+if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match)
+  arguments.limit_match = functions->limit_match;
+arguments.notbol = (options & PCRE_NOTBOL) != 0;
+arguments.noteol = (options & PCRE_NOTEOL) != 0;
+arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
+arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
+arguments.offsets = offsets;
+arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL;
+arguments.real_offset_count = offset_count;
+
+/* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of
+the output vector for storing captured strings, with the remainder used as
+workspace. We don't need the workspace here. For compatibility, we limit the
+number of captured strings in the same way as pcre_exec(), so that the user
+gets the same result with and without JIT. */
+
+if (offset_count != 2)
+  offset_count = ((offset_count - (offset_count % 3)) * 2) / 3;
+max_offset_count = functions->top_bracket;
+if (offset_count > max_offset_count)
+  offset_count = max_offset_count;
+arguments.offset_count = offset_count;
+
+convert_executable_func.executable_func = functions->executable_funcs[mode];
+retval = convert_executable_func.call_executable_func(&arguments);
+
+if (retval * 2 > offset_count)
+  retval = 0;
+if ((extra_data->flags & PCRE_EXTRA_MARK) != 0)
+  *(extra_data->mark) = arguments.mark_ptr;
+
+return retval;
+}
+
+void
+PRIV(jit_free)(void *executable_funcs)
+{
+int i;
+executable_functions *functions = (executable_functions *)executable_funcs;
+for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
+  {
+  if (functions->executable_funcs[i] != NULL)
+    sljit_free_code(functions->executable_funcs[i]);
+  if (functions->read_only_data[i] != NULL)
+    SLJIT_FREE(functions->read_only_data[i]);
+  }
+SLJIT_FREE(functions);
+}
+
+int
+PRIV(jit_get_size)(void *executable_funcs)
+{
+int i;
+sljit_uw size = 0;
+sljit_uw *executable_sizes = ((executable_functions *)executable_funcs)->executable_sizes;
+for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
+  size += executable_sizes[i];
+return (int)size;
+}
+
+const char*
+PRIV(jit_get_target)(void)
+{
+return sljit_get_platform_name();
+}
+
+#if defined COMPILE_PCRE8
+PCRE_EXP_DECL pcre_jit_stack *
+pcre_jit_stack_alloc(int startsize, int maxsize)
+#elif defined COMPILE_PCRE16
+PCRE_EXP_DECL pcre16_jit_stack *
+pcre16_jit_stack_alloc(int startsize, int maxsize)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DECL pcre32_jit_stack *
+pcre32_jit_stack_alloc(int startsize, int maxsize)
+#endif
+{
+if (startsize < 1 || maxsize < 1)
+  return NULL;
+if (startsize > maxsize)
+  startsize = maxsize;
+startsize = (startsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
+maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
+return (PUBL(jit_stack)*)sljit_allocate_stack(startsize, maxsize);
+}
+
+#if defined COMPILE_PCRE8
+PCRE_EXP_DECL void
+pcre_jit_stack_free(pcre_jit_stack *stack)
+#elif defined COMPILE_PCRE16
+PCRE_EXP_DECL void
+pcre16_jit_stack_free(pcre16_jit_stack *stack)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DECL void
+pcre32_jit_stack_free(pcre32_jit_stack *stack)
+#endif
+{
+sljit_free_stack((struct sljit_stack *)stack);
+}
+
+#if defined COMPILE_PCRE8
+PCRE_EXP_DECL void
+pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
+#elif defined COMPILE_PCRE16
+PCRE_EXP_DECL void
+pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DECL void
+pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata)
+#endif
+{
+executable_functions *functions;
+if (extra != NULL &&
+    (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
+    extra->executable_jit != NULL)
+  {
+  functions = (executable_functions *)extra->executable_jit;
+  functions->callback = callback;
+  functions->userdata = userdata;
+  }
+}
+
+#if defined COMPILE_PCRE8
+PCRE_EXP_DECL void
+pcre_jit_free_unused_memory(void)
+#elif defined COMPILE_PCRE16
+PCRE_EXP_DECL void
+pcre16_jit_free_unused_memory(void)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DECL void
+pcre32_jit_free_unused_memory(void)
+#endif
+{
+sljit_free_unused_memory_exec();
+}
+
+#else  /* SUPPORT_JIT */
+
+/* These are dummy functions to avoid linking errors when JIT support is not
+being compiled. */
+
+#if defined COMPILE_PCRE8
+PCRE_EXP_DECL pcre_jit_stack *
+pcre_jit_stack_alloc(int startsize, int maxsize)
+#elif defined COMPILE_PCRE16
+PCRE_EXP_DECL pcre16_jit_stack *
+pcre16_jit_stack_alloc(int startsize, int maxsize)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DECL pcre32_jit_stack *
+pcre32_jit_stack_alloc(int startsize, int maxsize)
+#endif
+{
+(void)startsize;
+(void)maxsize;
+return NULL;
+}
+
+#if defined COMPILE_PCRE8
+PCRE_EXP_DECL void
+pcre_jit_stack_free(pcre_jit_stack *stack)
+#elif defined COMPILE_PCRE16
+PCRE_EXP_DECL void
+pcre16_jit_stack_free(pcre16_jit_stack *stack)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DECL void
+pcre32_jit_stack_free(pcre32_jit_stack *stack)
+#endif
+{
+(void)stack;
+}
+
+#if defined COMPILE_PCRE8
+PCRE_EXP_DECL void
+pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
+#elif defined COMPILE_PCRE16
+PCRE_EXP_DECL void
+pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DECL void
+pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata)
+#endif
+{
+(void)extra;
+(void)callback;
+(void)userdata;
+}
+
+#if defined COMPILE_PCRE8
+PCRE_EXP_DECL void
+pcre_jit_free_unused_memory(void)
+#elif defined COMPILE_PCRE16
+PCRE_EXP_DECL void
+pcre16_jit_free_unused_memory(void)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DECL void
+pcre32_jit_free_unused_memory(void)
+#endif
+{
+}
+
+#endif
+
+/* End of pcre_jit_compile.c */
index a44a6eaa905b71567c8845c85185165d7e165907..209cae9024115cdee36808e44450ed7823f2b709 100644 (file)
@@ -45,9 +45,7 @@ compilation of dftables.c, in which case the macro DFTABLES is defined. */
 
 
 #ifndef DFTABLES
-#  ifdef HAVE_CONFIG_H
 #  include "config.h"
-#  endif
 #  include "pcre_internal.h"
 #endif
 
index b8f5a4de19c8663f2cf40f39e1eb71de9eb1e2f3..405b91337592c0b3366308396b9effe3e92d9362 100644 (file)
@@ -47,9 +47,7 @@ and NLTYPE_ANY. The full list of Unicode newline characters is taken from
 http://unicode.org/unicode/reports/tr18/. */
 
 
-#ifdef HAVE_CONFIG_H
 #include "config.h"
-#endif
 
 #include "pcre_internal.h"
 
index 95f1beb963e46b65ab6118144aded7339df70963..ba3718612fb0e32c7de8b0053e00dd324deb4e56 100644 (file)
@@ -41,9 +41,7 @@ POSSIBILITY OF SUCH DAMAGE.
 /* This file contains a private PCRE function that converts an ordinal
 character value into a UTF8 string. */
 
-#ifdef HAVE_CONFIG_H
 #include "config.h"
-#endif
 
 #define COMPILE_PCRE8
 
diff --git a/ext/pcre/pcrelib/pcre_printint.c b/ext/pcre/pcrelib/pcre_printint.c
new file mode 100644 (file)
index 0000000..8cbb161
--- /dev/null
@@ -0,0 +1,832 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+           Copyright (c) 1997-2012 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+
+/* This module contains a PCRE private debugging function for printing out the
+internal form of a compiled regular expression, along with some supporting
+local functions. This source file is used in two places:
+
+(1) It is #included by pcre_compile.c when it is compiled in debugging mode
+(PCRE_DEBUG defined in pcre_internal.h). It is not included in production
+compiles. In this case PCRE_INCLUDED is defined.
+
+(2) It is also compiled separately and linked with pcretest.c, which can be
+asked to print out a compiled regex for debugging purposes. */
+
+#ifndef PCRE_INCLUDED
+
+#include "config.h"
+
+/* For pcretest program. */
+#define PRIV(name) name
+
+/* We have to include pcre_internal.h because we need the internal info for
+displaying the results of pcre_study() and we also need to know about the
+internal macros, structures, and other internal data values; pcretest has
+"inside information" compared to a program that strictly follows the PCRE API.
+
+Although pcre_internal.h does itself include pcre.h, we explicitly include it
+here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
+appropriately for an application, not for building PCRE. */
+
+#include "pcre.h"
+#include "pcre_internal.h"
+
+/* These are the funtions that are contained within. It doesn't seem worth
+having a separate .h file just for this. */
+
+#endif /* PCRE_INCLUDED */
+
+#ifdef PCRE_INCLUDED
+static /* Keep the following function as private. */
+#endif
+
+#if defined COMPILE_PCRE8
+void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
+#elif defined COMPILE_PCRE16
+void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
+#elif defined COMPILE_PCRE32
+void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
+#endif
+
+/* Macro that decides whether a character should be output as a literal or in
+hexadecimal. We don't use isprint() because that can vary from system to system
+(even without the use of locales) and we want the output always to be the same,
+for testing purposes. */
+
+#ifdef EBCDIC
+#define PRINTABLE(c) ((c) >= 64 && (c) < 255)
+#else
+#define PRINTABLE(c) ((c) >= 32 && (c) < 127)
+#endif
+
+/* The table of operator names. */
+
+static const char *priv_OP_names[] = { OP_NAME_LIST };
+
+/* This table of operator lengths is not actually used by the working code,
+but its size is needed for a check that ensures it is the correct size for the
+number of opcodes (thus catching update omissions). */
+
+static const pcre_uint8 priv_OP_lengths[] = { OP_LENGTHS };
+
+
+
+/*************************************************
+*       Print single- or multi-byte character    *
+*************************************************/
+
+static unsigned int
+print_char(FILE *f, pcre_uchar *ptr, BOOL utf)
+{
+pcre_uint32 c = *ptr;
+
+#ifndef SUPPORT_UTF
+
+(void)utf;  /* Avoid compiler warning */
+if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
+else if (c <= 0x80) fprintf(f, "\\x%02x", c);
+else fprintf(f, "\\x{%x}", c);
+return 0;
+
+#else
+
+#if defined COMPILE_PCRE8
+
+if (!utf || (c & 0xc0) != 0xc0)
+  {
+  if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
+  else if (c < 0x80) fprintf(f, "\\x%02x", c);
+  else fprintf(f, "\\x{%02x}", c);
+  return 0;
+  }
+else
+  {
+  int i;
+  int a = PRIV(utf8_table4)[c & 0x3f];  /* Number of additional bytes */
+  int s = 6*a;
+  c = (c & PRIV(utf8_table3)[a]) << s;
+  for (i = 1; i <= a; i++)
+    {
+    /* This is a check for malformed UTF-8; it should only occur if the sanity
+    check has been turned off. Rather than swallow random bytes, just stop if
+    we hit a bad one. Print it with \X instead of \x as an indication. */
+
+    if ((ptr[i] & 0xc0) != 0x80)
+      {
+      fprintf(f, "\\X{%x}", c);
+      return i - 1;
+      }
+
+    /* The byte is OK */
+
+    s -= 6;
+    c |= (ptr[i] & 0x3f) << s;
+    }
+  fprintf(f, "\\x{%x}", c);
+  return a;
+  }
+
+#elif defined COMPILE_PCRE16
+
+if (!utf || (c & 0xfc00) != 0xd800)
+  {
+  if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
+  else if (c <= 0x80) fprintf(f, "\\x%02x", c);
+  else fprintf(f, "\\x{%02x}", c);
+  return 0;
+  }
+else
+  {
+  /* This is a check for malformed UTF-16; it should only occur if the sanity
+  check has been turned off. Rather than swallow a low surrogate, just stop if
+  we hit a bad one. Print it with \X instead of \x as an indication. */
+
+  if ((ptr[1] & 0xfc00) != 0xdc00)
+    {
+    fprintf(f, "\\X{%x}", c);
+    return 0;
+    }
+
+  c = (((c & 0x3ff) << 10) | (ptr[1] & 0x3ff)) + 0x10000;
+  fprintf(f, "\\x{%x}", c);
+  return 1;
+  }
+
+#elif defined COMPILE_PCRE32
+
+if (!utf || (c & 0xfffff800u) != 0xd800u)
+  {
+  if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
+  else if (c <= 0x80) fprintf(f, "\\x%02x", c);
+  else fprintf(f, "\\x{%x}", c);
+  return 0;
+  }
+else
+  {
+  /* This is a check for malformed UTF-32; it should only occur if the sanity
+  check has been turned off. Rather than swallow a surrogate, just stop if
+  we hit one. Print it with \X instead of \x as an indication. */
+  fprintf(f, "\\X{%x}", c);
+  return 0;
+  }
+
+#endif /* COMPILE_PCRE[8|16|32] */
+
+#endif /* SUPPORT_UTF */
+}
+
+/*************************************************
+*  Print uchar string (regardless of utf)        *
+*************************************************/
+
+static void
+print_puchar(FILE *f, PCRE_PUCHAR ptr)
+{
+while (*ptr != '\0')
+  {
+  register pcre_uint32 c = *ptr++;
+  if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
+  }
+}
+
+/*************************************************
+*          Find Unicode property name            *
+*************************************************/
+
+static const char *
+get_ucpname(unsigned int ptype, unsigned int pvalue)
+{
+#ifdef SUPPORT_UCP
+int i;
+for (i = PRIV(utt_size) - 1; i >= 0; i--)
+  {
+  if (ptype == PRIV(utt)[i].type && pvalue == PRIV(utt)[i].value) break;
+  }
+return (i >= 0)? PRIV(utt_names) + PRIV(utt)[i].name_offset : "??";
+#else
+/* It gets harder and harder to shut off unwanted compiler warnings. */
+ptype = ptype * pvalue;
+return (ptype == pvalue)? "??" : "??";
+#endif
+}
+
+
+/*************************************************
+*       Print Unicode property value             *
+*************************************************/
+
+/* "Normal" properties can be printed from tables. The PT_CLIST property is a
+pseudo-property that contains a pointer to a list of case-equivalent
+characters. This is used only when UCP support is available and UTF mode is
+selected. It should never occur otherwise, but just in case it does, have
+something ready to print. */
+
+static void
+print_prop(FILE *f, pcre_uchar *code, const char *before, const char *after)
+{
+if (code[1] != PT_CLIST)
+  {
+  fprintf(f, "%s%s %s%s", before, priv_OP_names[*code], get_ucpname(code[1],
+    code[2]), after);
+  }
+else
+  {
+  const char *not = (*code == OP_PROP)? "" : "not ";
+#ifndef SUPPORT_UCP
+  fprintf(f, "%s%sclist %d%s", before, not, code[2], after);
+#else
+  const pcre_uint32 *p = PRIV(ucd_caseless_sets) + code[2];
+  fprintf (f, "%s%sclist", before, not);
+  while (*p < NOTACHAR) fprintf(f, " %04x", *p++);
+  fprintf(f, "%s", after);
+#endif
+  }
+}
+
+
+
+
+/*************************************************
+*         Print compiled regex                   *
+*************************************************/
+
+/* Make this function work for a regex with integers either byte order.
+However, we assume that what we are passed is a compiled regex. The
+print_lengths flag controls whether offsets and lengths of items are printed.
+They can be turned off from pcretest so that automatic tests on bytecode can be
+written that do not depend on the value of LINK_SIZE. */
+
+#ifdef PCRE_INCLUDED
+static /* Keep the following function as private. */
+#endif
+#if defined COMPILE_PCRE8
+void
+pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths)
+#elif defined COMPILE_PCRE16
+void
+pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths)
+#elif defined COMPILE_PCRE32
+void
+pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths)
+#endif
+{
+REAL_PCRE *re = (REAL_PCRE *)external_re;
+pcre_uchar *codestart, *code;
+BOOL utf;
+
+unsigned int options = re->options;
+int offset = re->name_table_offset;
+int count = re->name_count;
+int size = re->name_entry_size;
+
+if (re->magic_number != MAGIC_NUMBER)
+  {
+  offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
+  count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
+  size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
+  options = ((options << 24) & 0xff000000) |
+            ((options <<  8) & 0x00ff0000) |
+            ((options >>  8) & 0x0000ff00) |
+            ((options >> 24) & 0x000000ff);
+  }
+
+code = codestart = (pcre_uchar *)re + offset + count * size;
+/* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */
+utf = (options & PCRE_UTF8) != 0;
+
+for(;;)
+  {
+  pcre_uchar *ccode;
+  const char *flag = "  ";
+  pcre_uint32 c;
+  unsigned int extra = 0;
+
+  if (print_lengths)
+    fprintf(f, "%3d ", (int)(code - codestart));
+  else
+    fprintf(f, "    ");
+
+  switch(*code)
+    {
+/* ========================================================================== */
+      /* These cases are never obeyed. This is a fudge that causes a compile-
+      time error if the vectors OP_names or OP_lengths, which are indexed
+      by opcode, are not the correct length. It seems to be the only way to do
+      such a check at compile time, as the sizeof() operator does not work in
+      the C preprocessor. */
+
+      case OP_TABLE_LENGTH:
+      case OP_TABLE_LENGTH +
+        ((sizeof(priv_OP_names)/sizeof(const char *) == OP_TABLE_LENGTH) &&
+        (sizeof(priv_OP_lengths) == OP_TABLE_LENGTH)):
+      break;
+/* ========================================================================== */
+
+    case OP_END:
+    fprintf(f, "    %s\n", priv_OP_names[*code]);
+    fprintf(f, "------------------------------------------------------------------\n");
+    return;
+
+    case OP_CHAR:
+    fprintf(f, "    ");
+    do
+      {
+      code++;
+      code += 1 + print_char(f, code, utf);
+      }
+    while (*code == OP_CHAR);
+    fprintf(f, "\n");
+    continue;
+
+    case OP_CHARI:
+    fprintf(f, " /i ");
+    do
+      {
+      code++;
+      code += 1 + print_char(f, code, utf);
+      }
+    while (*code == OP_CHARI);
+    fprintf(f, "\n");
+    continue;
+
+    case OP_CBRA:
+    case OP_CBRAPOS:
+    case OP_SCBRA:
+    case OP_SCBRAPOS:
+    if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
+      else fprintf(f, "    ");
+    fprintf(f, "%s %d", priv_OP_names[*code], GET2(code, 1+LINK_SIZE));
+    break;
+
+    case OP_BRA:
+    case OP_BRAPOS:
+    case OP_SBRA:
+    case OP_SBRAPOS:
+    case OP_KETRMAX:
+    case OP_KETRMIN:
+    case OP_KETRPOS:
+    case OP_ALT:
+    case OP_KET:
+    case OP_ASSERT:
+    case OP_ASSERT_NOT:
+    case OP_ASSERTBACK:
+    case OP_ASSERTBACK_NOT:
+    case OP_ONCE:
+    case OP_ONCE_NC:
+    case OP_COND:
+    case OP_SCOND:
+    case OP_REVERSE:
+    if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
+      else fprintf(f, "    ");
+    fprintf(f, "%s", priv_OP_names[*code]);
+    break;
+
+    case OP_CLOSE:
+    fprintf(f, "    %s %d", priv_OP_names[*code], GET2(code, 1));
+    break;
+
+    case OP_CREF:
+    fprintf(f, "%3d %s", GET2(code,1), priv_OP_names[*code]);
+    break;
+
+    case OP_DNCREF:
+      {
+      pcre_uchar *entry = (pcre_uchar *)re + offset + (GET2(code, 1) * size) +
+        IMM2_SIZE;
+      fprintf(f, " %s Cond ref <", flag);
+      print_puchar(f, entry);
+      fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
+      }
+    break;
+
+    case OP_RREF:
+    c = GET2(code, 1);
+    if (c == RREF_ANY)
+      fprintf(f, "    Cond recurse any");
+    else
+      fprintf(f, "    Cond recurse %d", c);
+    break;
+
+    case OP_DNRREF:
+      {
+      pcre_uchar *entry = (pcre_uchar *)re + offset + (GET2(code, 1) * size) +
+        IMM2_SIZE;
+      fprintf(f, " %s Cond recurse <", flag);
+      print_puchar(f, entry);
+      fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
+      }
+    break;
+
+    case OP_DEF:
+    fprintf(f, "    Cond def");
+    break;
+
+    case OP_STARI:
+    case OP_MINSTARI:
+    case OP_POSSTARI:
+    case OP_PLUSI:
+    case OP_MINPLUSI:
+    case OP_POSPLUSI:
+    case OP_QUERYI:
+    case OP_MINQUERYI:
+    case OP_POSQUERYI:
+    flag = "/i";
+    /* Fall through */
+    case OP_STAR:
+    case OP_MINSTAR:
+    case OP_POSSTAR:
+    case OP_PLUS:
+    case OP_MINPLUS:
+    case OP_POSPLUS:
+    case OP_QUERY:
+    case OP_MINQUERY:
+    case OP_POSQUERY:
+    case OP_TYPESTAR:
+    case OP_TYPEMINSTAR:
+    case OP_TYPEPOSSTAR:
+    case OP_TYPEPLUS:
+    case OP_TYPEMINPLUS:
+    case OP_TYPEPOSPLUS:
+    case OP_TYPEQUERY:
+    case OP_TYPEMINQUERY:
+    case OP_TYPEPOSQUERY:
+    fprintf(f, " %s ", flag);
+    if (*code >= OP_TYPESTAR)
+      {
+      if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
+        {
+        print_prop(f, code + 1, "", " ");
+        extra = 2;
+        }
+      else fprintf(f, "%s", priv_OP_names[code[1]]);
+      }
+    else extra = print_char(f, code+1, utf);
+    fprintf(f, "%s", priv_OP_names[*code]);
+    break;
+
+    case OP_EXACTI:
+    case OP_UPTOI:
+    case OP_MINUPTOI:
+    case OP_POSUPTOI:
+    flag = "/i";
+    /* Fall through */
+    case OP_EXACT:
+    case OP_UPTO:
+    case OP_MINUPTO:
+    case OP_POSUPTO:
+    fprintf(f, " %s ", flag);
+    extra = print_char(f, code + 1 + IMM2_SIZE, utf);
+    fprintf(f, "{");
+    if (*code != OP_EXACT && *code != OP_EXACTI) fprintf(f, "0,");
+    fprintf(f, "%d}", GET2(code,1));
+    if (*code == OP_MINUPTO || *code == OP_MINUPTOI) fprintf(f, "?");
+      else if (*code == OP_POSUPTO || *code == OP_POSUPTOI) fprintf(f, "+");
+    break;
+
+    case OP_TYPEEXACT:
+    case OP_TYPEUPTO:
+    case OP_TYPEMINUPTO:
+    case OP_TYPEPOSUPTO:
+    if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
+      {
+      print_prop(f, code + IMM2_SIZE + 1, "    ", " ");
+      extra = 2;
+      }
+    else fprintf(f, "    %s", priv_OP_names[code[1 + IMM2_SIZE]]);
+    fprintf(f, "{");
+    if (*code != OP_TYPEEXACT) fprintf(f, "0,");
+    fprintf(f, "%d}", GET2(code,1));
+    if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
+      else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
+    break;
+
+    case OP_NOTI:
+    flag = "/i";
+    /* Fall through */
+    case OP_NOT:
+    fprintf(f, " %s [^", flag);
+    extra = print_char(f, code + 1, utf);
+    fprintf(f, "]");
+    break;
+
+    case OP_NOTSTARI:
+    case OP_NOTMINSTARI:
+    case OP_NOTPOSSTARI:
+    case OP_NOTPLUSI:
+    case OP_NOTMINPLUSI:
+    case OP_NOTPOSPLUSI:
+    case OP_NOTQUERYI:
+    case OP_NOTMINQUERYI:
+    case OP_NOTPOSQUERYI:
+    flag = "/i";
+    /* Fall through */
+
+    case OP_NOTSTAR:
+    case OP_NOTMINSTAR:
+    case OP_NOTPOSSTAR:
+    case OP_NOTPLUS:
+    case OP_NOTMINPLUS:
+    case OP_NOTPOSPLUS:
+    case OP_NOTQUERY:
+    case OP_NOTMINQUERY:
+    case OP_NOTPOSQUERY:
+    fprintf(f, " %s [^", flag);
+    extra = print_char(f, code + 1, utf);
+    fprintf(f, "]%s", priv_OP_names[*code]);
+    break;
+
+    case OP_NOTEXACTI:
+    case OP_NOTUPTOI:
+    case OP_NOTMINUPTOI:
+    case OP_NOTPOSUPTOI:
+    flag = "/i";
+    /* Fall through */
+
+    case OP_NOTEXACT:
+    case OP_NOTUPTO:
+    case OP_NOTMINUPTO:
+    case OP_NOTPOSUPTO:
+    fprintf(f, " %s [^", flag);
+    extra = print_char(f, code + 1 + IMM2_SIZE, utf);
+    fprintf(f, "]{");
+    if (*code != OP_NOTEXACT && *code != OP_NOTEXACTI) fprintf(f, "0,");
+    fprintf(f, "%d}", GET2(code,1));
+    if (*code == OP_NOTMINUPTO || *code == OP_NOTMINUPTOI) fprintf(f, "?");
+      else
+    if (*code == OP_NOTPOSUPTO || *code == OP_NOTPOSUPTOI) fprintf(f, "+");
+    break;
+
+    case OP_RECURSE:
+    if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
+      else fprintf(f, "    ");
+    fprintf(f, "%s", priv_OP_names[*code]);
+    break;
+
+    case OP_REFI:
+    flag = "/i";
+    /* Fall through */
+    case OP_REF:
+    fprintf(f, " %s \\%d", flag, GET2(code,1));
+    ccode = code + priv_OP_lengths[*code];
+    goto CLASS_REF_REPEAT;
+
+    case OP_DNREFI:
+    flag = "/i";
+    /* Fall through */
+    case OP_DNREF:
+      {
+      pcre_uchar *entry = (pcre_uchar *)re + offset + (GET2(code, 1) * size) +
+        IMM2_SIZE;
+      fprintf(f, " %s \\k<", flag);
+      print_puchar(f, entry);
+      fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
+      }
+    ccode = code + priv_OP_lengths[*code];
+    goto CLASS_REF_REPEAT;
+
+    case OP_CALLOUT:
+    fprintf(f, "    %s %d %d %d", priv_OP_names[*code], code[1], GET(code,2),
+      GET(code, 2 + LINK_SIZE));
+    break;
+
+    case OP_PROP:
+    case OP_NOTPROP:
+    print_prop(f, code, "    ", "");
+    break;
+
+    /* OP_XCLASS cannot occur in 8-bit, non-UTF mode. However, there's no harm
+    in having this code always here, and it makes it less messy without all
+    those #ifdefs. */
+
+    case OP_CLASS:
+    case OP_NCLASS:
+    case OP_XCLASS:
+      {
+      int i;
+      unsigned int min, max;
+      BOOL printmap;
+      BOOL invertmap = FALSE;
+      pcre_uint8 *map;
+      pcre_uint8 inverted_map[32];
+
+      fprintf(f, "    [");
+
+      if (*code == OP_XCLASS)
+        {
+        extra = GET(code, 1);
+        ccode = code + LINK_SIZE + 1;
+        printmap = (*ccode & XCL_MAP) != 0;
+        if ((*ccode & XCL_NOT) != 0)
+          {
+          invertmap = (*ccode & XCL_HASPROP) == 0;
+          fprintf(f, "^");
+          }
+        ccode++;
+        }
+      else
+        {
+        printmap = TRUE;
+        ccode = code + 1;
+        }
+
+      /* Print a bit map */
+
+      if (printmap)
+        {
+        map = (pcre_uint8 *)ccode;
+        if (invertmap)
+          {
+          for (i = 0; i < 32; i++) inverted_map[i] = ~map[i];
+          map = inverted_map;
+          }
+
+        for (i = 0; i < 256; i++)
+          {
+          if ((map[i/8] & (1 << (i&7))) != 0)
+            {
+            int j;
+            for (j = i+1; j < 256; j++)
+              if ((map[j/8] & (1 << (j&7))) == 0) break;
+            if (i == '-' || i == ']') fprintf(f, "\\");
+            if (PRINTABLE(i)) fprintf(f, "%c", i);
+              else fprintf(f, "\\x%02x", i);
+            if (--j > i)
+              {
+              if (j != i + 1) fprintf(f, "-");
+              if (j == '-' || j == ']') fprintf(f, "\\");
+              if (PRINTABLE(j)) fprintf(f, "%c", j);
+                else fprintf(f, "\\x%02x", j);
+              }
+            i = j;
+            }
+          }
+        ccode += 32 / sizeof(pcre_uchar);
+        }
+
+      /* For an XCLASS there is always some additional data */
+
+      if (*code == OP_XCLASS)
+        {
+        pcre_uchar ch;
+        while ((ch = *ccode++) != XCL_END)
+          {
+          BOOL not = FALSE;
+          const char *notch = "";
+
+          switch(ch)
+            {
+            case XCL_NOTPROP:
+            not = TRUE;
+            notch = "^";
+            /* Fall through */
+
+            case XCL_PROP:
+              {
+              unsigned int ptype = *ccode++;
+              unsigned int pvalue = *ccode++;
+
+              switch(ptype)
+                {
+                case PT_PXGRAPH:
+                fprintf(f, "[:%sgraph:]", notch);
+                break;
+
+                case PT_PXPRINT:
+                fprintf(f, "[:%sprint:]", notch);
+                break;
+
+                case PT_PXPUNCT:
+                fprintf(f, "[:%spunct:]", notch);
+                break;
+
+                default:
+                fprintf(f, "\\%c{%s}", (not? 'P':'p'),
+                  get_ucpname(ptype, pvalue));
+                break;
+                }
+              }
+            break;
+
+            default:
+            ccode += 1 + print_char(f, ccode, utf);
+            if (ch == XCL_RANGE)
+              {
+              fprintf(f, "-");
+              ccode += 1 + print_char(f, ccode, utf);
+              }
+            break;
+            }
+          }
+        }
+
+      /* Indicate a non-UTF class which was created by negation */
+
+      fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
+
+      /* Handle repeats after a class or a back reference */
+
+      CLASS_REF_REPEAT:
+      switch(*ccode)
+        {
+        case OP_CRSTAR:
+        case OP_CRMINSTAR:
+        case OP_CRPLUS:
+        case OP_CRMINPLUS:
+        case OP_CRQUERY:
+        case OP_CRMINQUERY:
+        case OP_CRPOSSTAR:
+        case OP_CRPOSPLUS:
+        case OP_CRPOSQUERY:
+        fprintf(f, "%s", priv_OP_names[*ccode]);
+        extra += priv_OP_lengths[*ccode];
+        break;
+
+        case OP_CRRANGE:
+        case OP_CRMINRANGE:
+        case OP_CRPOSRANGE:
+        min = GET2(ccode,1);
+        max = GET2(ccode,1 + IMM2_SIZE);
+        if (max == 0) fprintf(f, "{%u,}", min);
+        else fprintf(f, "{%u,%u}", min, max);
+        if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
+        else if (*ccode == OP_CRPOSRANGE) fprintf(f, "+");
+        extra += priv_OP_lengths[*ccode];
+        break;
+
+        /* Do nothing if it's not a repeat; this code stops picky compilers
+        warning about the lack of a default code path. */
+
+        default:
+        break;
+        }
+      }
+    break;
+
+    case OP_MARK:
+    case OP_PRUNE_ARG:
+    case OP_SKIP_ARG:
+    case OP_THEN_ARG:
+    fprintf(f, "    %s ", priv_OP_names[*code]);
+    print_puchar(f, code + 2);
+    extra += code[1];
+    break;
+
+    case OP_THEN:
+    fprintf(f, "    %s", priv_OP_names[*code]);
+    break;
+
+    case OP_CIRCM:
+    case OP_DOLLM:
+    flag = "/m";
+    /* Fall through */
+
+    /* Anything else is just an item with no data, but possibly a flag. */
+
+    default:
+    fprintf(f, " %s %s", flag, priv_OP_names[*code]);
+    break;
+    }
+
+  code += priv_OP_lengths[*code] + extra;
+  fprintf(f, "\n");
+  }
+}
+
+/* End of pcre_printint.src */
diff --git a/ext/pcre/pcrelib/pcre_printint.src b/ext/pcre/pcrelib/pcre_printint.src
deleted file mode 100644 (file)
index c7d8629..0000000
+++ /dev/null
@@ -1,572 +0,0 @@
-/*************************************************
-*      Perl-Compatible Regular Expressions       *
-*************************************************/
-
-/* PCRE is a library of functions to support regular expressions whose syntax
-and semantics are as close as possible to those of the Perl 5 language.
-
-                       Written by Philip Hazel
-           Copyright (c) 1997-2010 University of Cambridge
-
------------------------------------------------------------------------------
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-      this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-
-    * Neither the name of the University of Cambridge nor the names of its
-      contributors may be used to endorse or promote products derived from
-      this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
------------------------------------------------------------------------------
-*/
-
-
-/* This module contains a PCRE private debugging function for printing out the
-internal form of a compiled regular expression, along with some supporting
-local functions. This source file is used in two places:
-
-(1) It is #included by pcre_compile.c when it is compiled in debugging mode
-(PCRE_DEBUG defined in pcre_internal.h). It is not included in production
-compiles.
-
-(2) It is always #included by pcretest.c, which can be asked to print out a
-compiled regex for debugging purposes. */
-
-
-/* Macro that decides whether a character should be output as a literal or in
-hexadecimal. We don't use isprint() because that can vary from system to system
-(even without the use of locales) and we want the output always to be the same,
-for testing purposes. This macro is used in pcretest as well as in this file. */
-
-#ifdef EBCDIC
-#define PRINTABLE(c) ((c) >= 64 && (c) < 255)
-#else
-#define PRINTABLE(c) ((c) >= 32 && (c) < 127)
-#endif
-
-/* The table of operator names. */
-
-static const char *OP_names[] = { OP_NAME_LIST };
-
-
-
-/*************************************************
-*       Print single- or multi-byte character    *
-*************************************************/
-
-static int
-print_char(FILE *f, uschar *ptr, BOOL utf8)
-{
-int c = *ptr;
-
-#ifndef SUPPORT_UTF8
-utf8 = utf8;  /* Avoid compiler warning */
-if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
-return 0;
-
-#else
-if (!utf8 || (c & 0xc0) != 0xc0)
-  {
-  if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
-  return 0;
-  }
-else
-  {
-  int i;
-  int a = _pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */
-  int s = 6*a;
-  c = (c & _pcre_utf8_table3[a]) << s;
-  for (i = 1; i <= a; i++)
-    {
-    /* This is a check for malformed UTF-8; it should only occur if the sanity
-    check has been turned off. Rather than swallow random bytes, just stop if
-    we hit a bad one. Print it with \X instead of \x as an indication. */
-
-    if ((ptr[i] & 0xc0) != 0x80)
-      {
-      fprintf(f, "\\X{%x}", c);
-      return i - 1;
-      }
-
-    /* The byte is OK */
-
-    s -= 6;
-    c |= (ptr[i] & 0x3f) << s;
-    }
-  if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
-  return a;
-  }
-#endif
-}
-
-
-
-/*************************************************
-*          Find Unicode property name            *
-*************************************************/
-
-static const char *
-get_ucpname(int ptype, int pvalue)
-{
-#ifdef SUPPORT_UCP
-int i;
-for (i = _pcre_utt_size - 1; i >= 0; i--)
-  {
-  if (ptype == _pcre_utt[i].type && pvalue == _pcre_utt[i].value) break;
-  }
-return (i >= 0)? _pcre_utt_names + _pcre_utt[i].name_offset : "??";
-#else
-/* It gets harder and harder to shut off unwanted compiler warnings. */
-ptype = ptype * pvalue;
-return (ptype == pvalue)? "??" : "??";
-#endif
-}
-
-
-
-/*************************************************
-*         Print compiled regex                   *
-*************************************************/
-
-/* Make this function work for a regex with integers either byte order.
-However, we assume that what we are passed is a compiled regex. The
-print_lengths flag controls whether offsets and lengths of items are printed.
-They can be turned off from pcretest so that automatic tests on bytecode can be
-written that do not depend on the value of LINK_SIZE. */
-
-static void
-pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths)
-{
-real_pcre *re = (real_pcre *)external_re;
-uschar *codestart, *code;
-BOOL utf8;
-
-unsigned int options = re->options;
-int offset = re->name_table_offset;
-int count = re->name_count;
-int size = re->name_entry_size;
-
-if (re->magic_number != MAGIC_NUMBER)
-  {
-  offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
-  count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
-  size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
-  options = ((options << 24) & 0xff000000) |
-            ((options <<  8) & 0x00ff0000) |
-            ((options >>  8) & 0x0000ff00) |
-            ((options >> 24) & 0x000000ff);
-  }
-
-code = codestart = (uschar *)re + offset + count * size;
-utf8 = (options & PCRE_UTF8) != 0;
-
-for(;;)
-  {
-  uschar *ccode;
-  int c;
-  int extra = 0;
-
-  if (print_lengths)
-    fprintf(f, "%3d ", (int)(code - codestart));
-  else
-    fprintf(f, "    ");
-
-  switch(*code)
-    {
-/* ========================================================================== */
-      /* These cases are never obeyed. This is a fudge that causes a compile-
-      time error if the vectors OP_names or _pcre_OP_lengths, which are indexed
-      by opcode, are not the correct length. It seems to be the only way to do
-      such a check at compile time, as the sizeof() operator does not work in
-      the C preprocessor. We do this while compiling pcretest, because that
-      #includes pcre_tables.c, which holds _pcre_OP_lengths. We can't do this
-      when building pcre_compile.c with PCRE_DEBUG set, because it doesn't then
-      know the size of _pcre_OP_lengths. */
-
-#ifdef COMPILING_PCRETEST
-      case OP_TABLE_LENGTH:
-      case OP_TABLE_LENGTH +
-        ((sizeof(OP_names)/sizeof(const char *) == OP_TABLE_LENGTH) &&
-        (sizeof(_pcre_OP_lengths) == OP_TABLE_LENGTH)):
-      break;
-#endif
-/* ========================================================================== */
-
-    case OP_END:
-    fprintf(f, "    %s\n", OP_names[*code]);
-    fprintf(f, "------------------------------------------------------------------\n");
-    return;
-
-    case OP_OPT:
-    fprintf(f, " %.2x %s", code[1], OP_names[*code]);
-    break;
-
-    case OP_CHAR:
-    fprintf(f, "    ");
-    do
-      {
-      code++;
-      code += 1 + print_char(f, code, utf8);
-      }
-    while (*code == OP_CHAR);
-    fprintf(f, "\n");
-    continue;
-
-    case OP_CHARNC:
-    fprintf(f, " NC ");
-    do
-      {
-      code++;
-      code += 1 + print_char(f, code, utf8);
-      }
-    while (*code == OP_CHARNC);
-    fprintf(f, "\n");
-    continue;
-
-    case OP_CBRA:
-    case OP_SCBRA:
-    if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
-      else fprintf(f, "    ");
-    fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE));
-    break;
-
-    case OP_BRA:
-    case OP_SBRA:
-    case OP_KETRMAX:
-    case OP_KETRMIN:
-    case OP_ALT:
-    case OP_KET:
-    case OP_ASSERT:
-    case OP_ASSERT_NOT:
-    case OP_ASSERTBACK:
-    case OP_ASSERTBACK_NOT:
-    case OP_ONCE:
-    case OP_COND:
-    case OP_SCOND:
-    case OP_REVERSE:
-    if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
-      else fprintf(f, "    ");
-    fprintf(f, "%s", OP_names[*code]);
-    break;
-
-    case OP_CLOSE:
-    fprintf(f, "    %s %d", OP_names[*code], GET2(code, 1));
-    break;
-
-    case OP_CREF:
-    case OP_NCREF:
-    fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
-    break;
-
-    case OP_RREF:
-    c = GET2(code, 1);
-    if (c == RREF_ANY)
-      fprintf(f, "    Cond recurse any");
-    else
-      fprintf(f, "    Cond recurse %d", c);
-    break;
-
-    case OP_NRREF:
-    c = GET2(code, 1);
-    if (c == RREF_ANY)
-      fprintf(f, "    Cond nrecurse any");
-    else
-      fprintf(f, "    Cond nrecurse %d", c);
-    break;
-
-    case OP_DEF:
-    fprintf(f, "    Cond def");
-    break;
-
-    case OP_STAR:
-    case OP_MINSTAR:
-    case OP_POSSTAR:
-    case OP_PLUS:
-    case OP_MINPLUS:
-    case OP_POSPLUS:
-    case OP_QUERY:
-    case OP_MINQUERY:
-    case OP_POSQUERY:
-    case OP_TYPESTAR:
-    case OP_TYPEMINSTAR:
-    case OP_TYPEPOSSTAR:
-    case OP_TYPEPLUS:
-    case OP_TYPEMINPLUS:
-    case OP_TYPEPOSPLUS:
-    case OP_TYPEQUERY:
-    case OP_TYPEMINQUERY:
-    case OP_TYPEPOSQUERY:
-    fprintf(f, "    ");
-    if (*code >= OP_TYPESTAR)
-      {
-      fprintf(f, "%s", OP_names[code[1]]);
-      if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
-        {
-        fprintf(f, " %s ", get_ucpname(code[2], code[3]));
-        extra = 2;
-        }
-      }
-    else extra = print_char(f, code+1, utf8);
-    fprintf(f, "%s", OP_names[*code]);
-    break;
-
-    case OP_EXACT:
-    case OP_UPTO:
-    case OP_MINUPTO:
-    case OP_POSUPTO:
-    fprintf(f, "    ");
-    extra = print_char(f, code+3, utf8);
-    fprintf(f, "{");
-    if (*code != OP_EXACT) fprintf(f, "0,");
-    fprintf(f, "%d}", GET2(code,1));
-    if (*code == OP_MINUPTO) fprintf(f, "?");
-      else if (*code == OP_POSUPTO) fprintf(f, "+");
-    break;
-
-    case OP_TYPEEXACT:
-    case OP_TYPEUPTO:
-    case OP_TYPEMINUPTO:
-    case OP_TYPEPOSUPTO:
-    fprintf(f, "    %s", OP_names[code[3]]);
-    if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
-      {
-      fprintf(f, " %s ", get_ucpname(code[4], code[5]));
-      extra = 2;
-      }
-    fprintf(f, "{");
-    if (*code != OP_TYPEEXACT) fprintf(f, "0,");
-    fprintf(f, "%d}", GET2(code,1));
-    if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
-      else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
-    break;
-
-    case OP_NOT:
-    c = code[1];
-    if (PRINTABLE(c)) fprintf(f, "    [^%c]", c);
-      else fprintf(f, "    [^\\x%02x]", c);
-    break;
-
-    case OP_NOTSTAR:
-    case OP_NOTMINSTAR:
-    case OP_NOTPOSSTAR:
-    case OP_NOTPLUS:
-    case OP_NOTMINPLUS:
-    case OP_NOTPOSPLUS:
-    case OP_NOTQUERY:
-    case OP_NOTMINQUERY:
-    case OP_NOTPOSQUERY:
-    c = code[1];
-    if (PRINTABLE(c)) fprintf(f, "    [^%c]", c);
-      else fprintf(f, "    [^\\x%02x]", c);
-    fprintf(f, "%s", OP_names[*code]);
-    break;
-
-    case OP_NOTEXACT:
-    case OP_NOTUPTO:
-    case OP_NOTMINUPTO:
-    case OP_NOTPOSUPTO:
-    c = code[3];
-    if (PRINTABLE(c)) fprintf(f, "    [^%c]{", c);
-      else fprintf(f, "    [^\\x%02x]{", c);
-    if (*code != OP_NOTEXACT) fprintf(f, "0,");
-    fprintf(f, "%d}", GET2(code,1));
-    if (*code == OP_NOTMINUPTO) fprintf(f, "?");
-      else if (*code == OP_NOTPOSUPTO) fprintf(f, "+");
-    break;
-
-    case OP_RECURSE:
-    if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
-      else fprintf(f, "    ");
-    fprintf(f, "%s", OP_names[*code]);
-    break;
-
-    case OP_REF:
-    fprintf(f, "    \\%d", GET2(code,1));
-    ccode = code + _pcre_OP_lengths[*code];
-    goto CLASS_REF_REPEAT;
-
-    case OP_CALLOUT:
-    fprintf(f, "    %s %d %d %d", OP_names[*code], code[1], GET(code,2),
-      GET(code, 2 + LINK_SIZE));
-    break;
-
-    case OP_PROP:
-    case OP_NOTPROP:
-    fprintf(f, "    %s %s", OP_names[*code], get_ucpname(code[1], code[2]));
-    break;
-
-    /* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
-    having this code always here, and it makes it less messy without all those
-    #ifdefs. */
-
-    case OP_CLASS:
-    case OP_NCLASS:
-    case OP_XCLASS:
-      {
-      int i, min, max;
-      BOOL printmap;
-
-      fprintf(f, "    [");
-
-      if (*code == OP_XCLASS)
-        {
-        extra = GET(code, 1);
-        ccode = code + LINK_SIZE + 1;
-        printmap = (*ccode & XCL_MAP) != 0;
-        if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
-        }
-      else
-        {
-        printmap = TRUE;
-        ccode = code + 1;
-        }
-
-      /* Print a bit map */
-
-      if (printmap)
-        {
-        for (i = 0; i < 256; i++)
-          {
-          if ((ccode[i/8] & (1 << (i&7))) != 0)
-            {
-            int j;
-            for (j = i+1; j < 256; j++)
-              if ((ccode[j/8] & (1 << (j&7))) == 0) break;
-            if (i == '-' || i == ']') fprintf(f, "\\");
-            if (PRINTABLE(i)) fprintf(f, "%c", i);
-              else fprintf(f, "\\x%02x", i);
-            if (--j > i)
-              {
-              if (j != i + 1) fprintf(f, "-");
-              if (j == '-' || j == ']') fprintf(f, "\\");
-              if (PRINTABLE(j)) fprintf(f, "%c", j);
-                else fprintf(f, "\\x%02x", j);
-              }
-            i = j;
-            }
-          }
-        ccode += 32;
-        }
-
-      /* For an XCLASS there is always some additional data */
-
-      if (*code == OP_XCLASS)
-        {
-        int ch;
-        while ((ch = *ccode++) != XCL_END)
-          {
-          if (ch == XCL_PROP)
-            {
-            int ptype = *ccode++;
-            int pvalue = *ccode++;
-            fprintf(f, "\\p{%s}", get_ucpname(ptype, pvalue));
-            }
-          else if (ch == XCL_NOTPROP)
-            {
-            int ptype = *ccode++;
-            int pvalue = *ccode++;
-            fprintf(f, "\\P{%s}", get_ucpname(ptype, pvalue));
-            }
-          else
-            {
-            ccode += 1 + print_char(f, ccode, TRUE);
-            if (ch == XCL_RANGE)
-              {
-              fprintf(f, "-");
-              ccode += 1 + print_char(f, ccode, TRUE);
-              }
-            }
-          }
-        }
-
-      /* Indicate a non-UTF8 class which was created by negation */
-
-      fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
-
-      /* Handle repeats after a class or a back reference */
-
-      CLASS_REF_REPEAT:
-      switch(*ccode)
-        {
-        case OP_CRSTAR:
-        case OP_CRMINSTAR:
-        case OP_CRPLUS:
-        case OP_CRMINPLUS:
-        case OP_CRQUERY:
-        case OP_CRMINQUERY:
-        fprintf(f, "%s", OP_names[*ccode]);
-        extra += _pcre_OP_lengths[*ccode];
-        break;
-
-        case OP_CRRANGE:
-        case OP_CRMINRANGE:
-        min = GET2(ccode,1);
-        max = GET2(ccode,3);
-        if (max == 0) fprintf(f, "{%d,}", min);
-        else fprintf(f, "{%d,%d}", min, max);
-        if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
-        extra += _pcre_OP_lengths[*ccode];
-        break;
-
-        /* Do nothing if it's not a repeat; this code stops picky compilers
-        warning about the lack of a default code path. */
-
-        default:
-        break;
-        }
-      }
-    break;
-
-    case OP_MARK:
-    case OP_PRUNE_ARG:
-    case OP_SKIP_ARG:
-    fprintf(f, "    %s %s", OP_names[*code], code + 2);
-    extra += code[1];
-    break;
-
-    case OP_THEN:
-    if (print_lengths)
-      fprintf(f, "    %s %d", OP_names[*code], GET(code, 1));
-    else
-      fprintf(f, "    %s", OP_names[*code]);
-    break;
-
-    case OP_THEN_ARG:
-    if (print_lengths)
-      fprintf(f, "    %s %d %s", OP_names[*code], GET(code, 1),
-        code + 2 + LINK_SIZE);
-    else
-      fprintf(f, "    %s %s", OP_names[*code], code + 2 + LINK_SIZE);
-    extra += code[1+LINK_SIZE];
-    break;
-
-    /* Anything else is just an item with no data*/
-
-    default:
-    fprintf(f, "    %s", OP_names[*code]);
-    break;
-    }
-
-  code += _pcre_OP_lengths[*code] + extra;
-  fprintf(f, "\n");
-  }
-}
-
-/* End of pcre_printint.src */
index 79efa90f216c5d86475f68687bb0141e75893a97..d5e0b61c417c6286b317af8c222d6817251b806b 100644 (file)
@@ -44,9 +44,7 @@ pattern data block. This might be helpful in applications where the block is
 shared by different users. */
 
 
-#ifdef HAVE_CONFIG_H
 #include "config.h"
-#endif
 
 #include "pcre_internal.h"
 
index c2aff517a5d2da67336207e042294126dac1b49e..2fe43c692022f68d4a6ceed9d82fa03eb3c039c7 100644 (file)
@@ -42,9 +42,7 @@ POSSIBILITY OF SUCH DAMAGE.
 supporting functions. */
 
 
-#ifdef HAVE_CONFIG_H
 #include "config.h"
-#endif
 
 #include "pcre_internal.h"
 
@@ -879,9 +877,6 @@ do
       case OP_SOM:
       case OP_THEN:
       case OP_THEN_ARG:
-#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
-      case OP_XCLASS:
-#endif
       return SSB_FAIL;
 
       /* We can ignore word boundary tests. */
@@ -1257,6 +1252,16 @@ do
       with a value >= 0xc4 is a potentially valid starter because it starts a
       character with a value > 255. */
 
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+      case OP_XCLASS:
+      if ((tcode[1 + LINK_SIZE] & XCL_HASPROP) != 0)
+        return SSB_FAIL;
+      /* All bits are set. */
+      if ((tcode[1 + LINK_SIZE] & XCL_MAP) == 0 && (tcode[1 + LINK_SIZE] & XCL_NOT) != 0)
+        return SSB_FAIL;
+#endif
+      /* Fall through */
+
       case OP_NCLASS:
 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
       if (utf)
@@ -1273,8 +1278,21 @@ do
       case OP_CLASS:
         {
         pcre_uint8 *map;
-        tcode++;
-        map = (pcre_uint8 *)tcode;
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+        map = NULL;
+        if (*tcode == OP_XCLASS)
+          {
+          if ((tcode[1 + LINK_SIZE] & XCL_MAP) != 0)
+            map = (pcre_uint8 *)(tcode + 1 + LINK_SIZE + 1);
+          tcode += GET(tcode, 1);
+          }
+        else
+#endif
+          {
+          tcode++;
+          map = (pcre_uint8 *)tcode;
+          tcode += 32 / sizeof(pcre_uchar);
+          }
 
         /* In UTF-8 mode, the bits in a bit map correspond to character
         values, not to byte values. However, the bit map we are constructing is
@@ -1282,31 +1300,35 @@ do
         value is > 127. In fact, there are only two possible starting bytes for
         characters in the range 128 - 255. */
 
-#if defined SUPPORT_UTF && defined COMPILE_PCRE8
-        if (utf)
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+        if (map != NULL)
+#endif
           {
-          for (c = 0; c < 16; c++) start_bits[c] |= map[c];
-          for (c = 128; c < 256; c++)
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+          if (utf)
             {
-            if ((map[c/8] && (1 << (c&7))) != 0)
+            for (c = 0; c < 16; c++) start_bits[c] |= map[c];
+            for (c = 128; c < 256; c++)
               {
-              int d = (c >> 6) | 0xc0;            /* Set bit for this starter */
-              start_bits[d/8] |= (1 << (d&7));    /* and then skip on to the */
-              c = (c & 0xc0) + 0x40 - 1;          /* next relevant character. */
+              if ((map[c/8] && (1 << (c&7))) != 0)
+                {
+                int d = (c >> 6) | 0xc0;            /* Set bit for this starter */
+                start_bits[d/8] |= (1 << (d&7));    /* and then skip on to the */
+                c = (c & 0xc0) + 0x40 - 1;          /* next relevant character. */
+                }
               }
             }
-          }
-        else
+          else
 #endif
-          {
-          /* In non-UTF-8 mode, the two bit maps are completely compatible. */
-          for (c = 0; c < 32; c++) start_bits[c] |= map[c];
+            {
+            /* In non-UTF-8 mode, the two bit maps are completely compatible. */
+            for (c = 0; c < 32; c++) start_bits[c] |= map[c];
+            }
           }
 
         /* Advance past the bit map, and act on what follows. For a zero
         minimum repeat, continue; otherwise stop processing. */
 
-        tcode += 32 / sizeof(pcre_uchar);
         switch (*tcode)
           {
           case OP_CRSTAR:
index f38ab52cbb8f840a3a038a078040bba84d7560f8..0c2dcb6afbfb027dd1bd0a879ba75a6f1fbc9e71 100644 (file)
@@ -45,9 +45,7 @@ uses macros to change their names from _pcre_xxx to xxxx, thereby avoiding name
 clashes with the library. */
 
 
-#ifdef HAVE_CONFIG_H
 #include "config.h"
-#endif
 
 #include "pcre_internal.h"
 
index 46ea70c44cc5bce78aed1cf497c8281cc1533cb0..9f9e944bbf5bfd812ef1ea847a8c7295786f777a 100644 (file)
@@ -10,9 +10,7 @@ needed. */
 
 #ifndef PCRE_INCLUDED
 
-#ifdef HAVE_CONFIG_H
 #include "config.h"
-#endif
 
 #include "pcre_internal.h"
 
index 3b0f6464a359a64db422ef7bff5ed74051bcbda5..3a9fba785c7ed232827a967478539ea5009a0040 100644 (file)
@@ -42,9 +42,7 @@ POSSIBILITY OF SUCH DAMAGE.
 strings. */
 
 
-#ifdef HAVE_CONFIG_H
 #include "config.h"
-#endif
 
 #include "pcre_internal.h"
 
index ae86ff28bc8f5859540a212c695a244c5f0e7f65..00b8dd680c2f7b452279f3debb540aa75d9da03d 100644 (file)
@@ -42,9 +42,7 @@ POSSIBILITY OF SUCH DAMAGE.
 string that identifies the PCRE version that is in use. */
 
 
-#ifdef HAVE_CONFIG_H
 #include "config.h"
-#endif
 
 #include "pcre_internal.h"
 
index ad153be7851512e58cdfbddf9c63cba987988cb6..6f800c20a9df2d246b0a6cb8cb86e95eba667278 100644 (file)
@@ -42,9 +42,7 @@ POSSIBILITY OF SUCH DAMAGE.
 class. It is used by both pcre_exec() and pcre_def_exec(). */
 
 
-#ifdef HAVE_CONFIG_H
 #include "config.h"
-#endif
 
 #include "pcre_internal.h"
 
@@ -81,6 +79,11 @@ additional data. */
 
 if (c < 256)
   {
+  if ((*data & XCL_HASPROP) == 0)
+    {
+    if ((*data & XCL_MAP) == 0) return negated;
+    return (((pcre_uint8 *)(data + 1))[c/8] & (1 << (c&7))) != 0;
+    }
   if ((*data & XCL_MAP) != 0 &&
     (((pcre_uint8 *)(data + 1))[c/8] & (1 << (c&7))) != 0)
     return !negated; /* char found */
index 7cf4a4a657b9b3de9ffec153d54e766336a57773..3800239bdbe711d62c8e379a55ee9afdd88d3b56 100644 (file)
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
-           Copyright (c) 1997-2012 University of Cambridge
+           Copyright (c) 1997-2014 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -42,9 +42,7 @@ POSSIBILITY OF SUCH DAMAGE.
 functions. */
 
 
-#ifdef HAVE_CONFIG_H
 #include "config.h"
-#endif
 
 
 /* Ensure that the PCREPOSIX_EXP_xxx macros are set appropriately for
@@ -170,7 +168,9 @@ static const int eint[] = {
   REG_BADPAT,  /* missing opening brace after \o */
   REG_BADPAT,  /* parentheses too deeply nested */
   REG_BADPAT,  /* invalid range in character class */
-  REG_BADPAT   /* group name must start with a non-digit */
+  REG_BADPAT,  /* group name must start with a non-digit */
+  /* 85 */
+  REG_BADPAT   /* parentheses too deeply nested (stack check) */
 };
 
 /* Table of texts corresponding to POSIX error codes */
diff --git a/ext/pcre/pcrelib/sljit/sljitConfig.h b/ext/pcre/pcrelib/sljit/sljitConfig.h
new file mode 100644 (file)
index 0000000..4d93f4f
--- /dev/null
@@ -0,0 +1,120 @@
+/*
+ *    Stack-less Just-In-Time compiler
+ *
+ *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice, this list of
+ *      conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *      of conditions and the following disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _SLJIT_CONFIG_H_
+#define _SLJIT_CONFIG_H_
+
+/* --------------------------------------------------------------------- */
+/*  Custom defines                                                       */
+/* --------------------------------------------------------------------- */
+
+/* Put your custom defines here. This empty section will never change
+   which helps maintaining patches (with diff / patch utilities). */
+
+/* --------------------------------------------------------------------- */
+/*  Architecture                                                         */
+/* --------------------------------------------------------------------- */
+
+/* Architecture selection. */
+/* #define SLJIT_CONFIG_X86_32 1 */
+/* #define SLJIT_CONFIG_X86_64 1 */
+/* #define SLJIT_CONFIG_ARM_V5 1 */
+/* #define SLJIT_CONFIG_ARM_V7 1 */
+/* #define SLJIT_CONFIG_ARM_THUMB2 1 */
+/* #define SLJIT_CONFIG_ARM_64 1 */
+/* #define SLJIT_CONFIG_PPC_32 1 */
+/* #define SLJIT_CONFIG_PPC_64 1 */
+/* #define SLJIT_CONFIG_MIPS_32 1 */
+/* #define SLJIT_CONFIG_MIPS_64 1 */
+/* #define SLJIT_CONFIG_SPARC_32 1 */
+/* #define SLJIT_CONFIG_TILEGX 1 */
+
+/* #define SLJIT_CONFIG_AUTO 1 */
+/* #define SLJIT_CONFIG_UNSUPPORTED 1 */
+
+/* --------------------------------------------------------------------- */
+/*  Utilities                                                            */
+/* --------------------------------------------------------------------- */
+
+/* Useful for thread-safe compiling of global functions. */
+#ifndef SLJIT_UTIL_GLOBAL_LOCK
+/* Enabled by default */
+#define SLJIT_UTIL_GLOBAL_LOCK 1
+#endif
+
+/* Implements a stack like data structure (by using mmap / VirtualAlloc). */
+#ifndef SLJIT_UTIL_STACK
+/* Enabled by default */
+#define SLJIT_UTIL_STACK 1
+#endif
+
+/* Single threaded application. Does not require any locks. */
+#ifndef SLJIT_SINGLE_THREADED
+/* Disabled by default. */
+#define SLJIT_SINGLE_THREADED 0
+#endif
+
+/* --------------------------------------------------------------------- */
+/*  Configuration                                                        */
+/* --------------------------------------------------------------------- */
+
+/* If SLJIT_STD_MACROS_DEFINED is not defined, the application should
+   define SLJIT_MALLOC, SLJIT_FREE, SLJIT_MEMMOVE, and NULL. */
+#ifndef SLJIT_STD_MACROS_DEFINED
+/* Disabled by default. */
+#define SLJIT_STD_MACROS_DEFINED 0
+#endif
+
+/* Executable code allocation:
+   If SLJIT_EXECUTABLE_ALLOCATOR is not defined, the application should
+   define both SLJIT_MALLOC_EXEC and SLJIT_FREE_EXEC. */
+#ifndef SLJIT_EXECUTABLE_ALLOCATOR
+/* Enabled by default. */
+#define SLJIT_EXECUTABLE_ALLOCATOR 1
+#endif
+
+/* Debug checks (assertions, etc.). */
+#ifndef SLJIT_DEBUG
+/* Enabled by default */
+#define SLJIT_DEBUG 1
+#endif
+
+/* Verbose operations */
+#ifndef SLJIT_VERBOSE
+/* Enabled by default */
+#define SLJIT_VERBOSE 1
+#endif
+
+/*
+  SLJIT_IS_FPU_AVAILABLE
+    The availability of the FPU can be controlled by SLJIT_IS_FPU_AVAILABLE.
+      zero value - FPU is NOT present.
+      nonzero value - FPU is present.
+*/
+
+/* For further configurations, see the beginning of sljitConfigInternal.h */
+
+#endif
diff --git a/ext/pcre/pcrelib/sljit/sljitConfigInternal.h b/ext/pcre/pcrelib/sljit/sljitConfigInternal.h
new file mode 100644 (file)
index 0000000..89be38b
--- /dev/null
@@ -0,0 +1,523 @@
+/*
+ *    Stack-less Just-In-Time compiler
+ *
+ *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice, this list of
+ *      conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *      of conditions and the following disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _SLJIT_CONFIG_INTERNAL_H_
+#define _SLJIT_CONFIG_INTERNAL_H_
+
+/*
+   SLJIT defines the following macros depending on the target architecture:
+
+   Feature detection (boolean) macros:
+   SLJIT_32BIT_ARCHITECTURE : 32 bit architecture
+   SLJIT_64BIT_ARCHITECTURE : 64 bit architecture
+   SLJIT_WORD_SHIFT : the shift required to apply when accessing a sljit_sw/sljit_uw array by index
+   SLJIT_DOUBLE_SHIFT : the shift required to apply when accessing a double array by index
+   SLJIT_LITTLE_ENDIAN : little endian architecture
+   SLJIT_BIG_ENDIAN : big endian architecture
+   SLJIT_UNALIGNED : allows unaligned memory accesses for non-fpu operations (only!)
+   SLJIT_INDIRECT_CALL : see SLJIT_FUNC_OFFSET() for more information
+   SLJIT_RETURN_ADDRESS_OFFSET : a return instruction always adds this offset to the return address
+
+   Types and useful macros:
+   sljit_sb, sljit_ub : signed and unsigned 8 bit byte
+   sljit_sh, sljit_uh : signed and unsigned 16 bit half-word (short) type
+   sljit_si, sljit_ui : signed and unsigned 32 bit integer type
+   sljit_sw, sljit_uw : signed and unsigned machine word, enough to store a pointer
+   sljit_p : unsgined pointer value (usually the same as sljit_uw, but
+             some 64 bit ABIs may use 32 bit pointers)
+   sljit_s : single precision floating point value
+   sljit_d : double precision floating point value
+   SLJIT_CALL : C calling convention define for both calling JIT form C and C callbacks for JIT
+   SLJIT_W(number) : defining 64 bit constants on 64 bit architectures (compiler independent helper)
+*/
+
+#if !((defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \
+       || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \
+       || (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) \
+       || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \
+       || (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) \
+       || (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \
+       || (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \
+       || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \
+       || (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \
+       || (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) \
+       || (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) \
+       || (defined SLJIT_CONFIG_TILEGX && SLJIT_CONFIG_TILEGX) \
+       || (defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO) \
+       || (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED))
+#error "An architecture must be selected"
+#endif
+
+/* Sanity check. */
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \
+       + (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \
+       + (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) \
+       + (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \
+       + (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) \
+       + (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \
+       + (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \
+       + (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \
+       + (defined SLJIT_CONFIG_TILEGX && SLJIT_CONFIG_TILEGX) \
+       + (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \
+       + (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) \
+       + (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) \
+       + (defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO) \
+       + (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) >= 2
+#error "Multiple architectures are selected"
+#endif
+
+/* Auto select option (requires compiler support) */
+#if (defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO)
+
+#ifndef _WIN32
+
+#if defined(__i386__) || defined(__i386)
+#define SLJIT_CONFIG_X86_32 1
+#elif defined(__x86_64__)
+#define SLJIT_CONFIG_X86_64 1
+#elif defined(__arm__) || defined(__ARM__)
+#ifdef __thumb2__
+#define SLJIT_CONFIG_ARM_THUMB2 1
+#elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__)
+#define SLJIT_CONFIG_ARM_V7 1
+#else
+#define SLJIT_CONFIG_ARM_V5 1
+#endif
+#elif defined (__aarch64__)
+#define SLJIT_CONFIG_ARM_64 1
+#elif defined(__ppc64__) || defined(__powerpc64__) || defined(_ARCH_PPC64) || (defined(_POWER) && defined(__64BIT__))
+#define SLJIT_CONFIG_PPC_64 1
+#elif defined(__ppc__) || defined(__powerpc__) || defined(_ARCH_PPC) || defined(_ARCH_PWR) || defined(_ARCH_PWR2) || defined(_POWER)
+#define SLJIT_CONFIG_PPC_32 1
+#elif defined(__mips__) && !defined(_LP64)
+#define SLJIT_CONFIG_MIPS_32 1
+#elif defined(__mips64)
+#define SLJIT_CONFIG_MIPS_64 1
+#elif defined(__sparc__) || defined(__sparc)
+#define SLJIT_CONFIG_SPARC_32 1
+#elif defined(__tilegx__)
+#define SLJIT_CONFIG_TILEGX 1
+#else
+/* Unsupported architecture */
+#define SLJIT_CONFIG_UNSUPPORTED 1
+#endif
+
+#else /* !_WIN32 */
+
+#if defined(_M_X64) || defined(__x86_64__)
+#define SLJIT_CONFIG_X86_64 1
+#elif defined(_ARM_)
+#define SLJIT_CONFIG_ARM_V5 1
+#else
+#define SLJIT_CONFIG_X86_32 1
+#endif
+
+#endif /* !WIN32 */
+#endif /* SLJIT_CONFIG_AUTO */
+
+#if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
+#undef SLJIT_EXECUTABLE_ALLOCATOR
+#endif
+
+#if !(defined SLJIT_STD_MACROS_DEFINED && SLJIT_STD_MACROS_DEFINED)
+
+/* These libraries are needed for the macros below. */
+#include <stdlib.h>
+#include <string.h>
+
+#endif /* STD_MACROS_DEFINED */
+
+/* General macros:
+   Note: SLJIT is designed to be independent from them as possible.
+
+   In release mode (SLJIT_DEBUG is not defined) only the following macros are needed:
+*/
+
+#ifndef SLJIT_MALLOC
+#define SLJIT_MALLOC(size) malloc(size)
+#endif
+
+#ifndef SLJIT_FREE
+#define SLJIT_FREE(ptr) free(ptr)
+#endif
+
+#ifndef SLJIT_MEMMOVE
+#define SLJIT_MEMMOVE(dest, src, len) memmove(dest, src, len)
+#endif
+
+#ifndef SLJIT_ZEROMEM
+#define SLJIT_ZEROMEM(dest, len) memset(dest, 0, len)
+#endif
+
+#if !defined(SLJIT_LIKELY) && !defined(SLJIT_UNLIKELY)
+
+#if defined(__GNUC__) && (__GNUC__ >= 3)
+#define SLJIT_LIKELY(x)                __builtin_expect((x), 1)
+#define SLJIT_UNLIKELY(x)      __builtin_expect((x), 0)
+#else
+#define SLJIT_LIKELY(x)                (x)
+#define SLJIT_UNLIKELY(x)      (x)
+#endif
+
+#endif /* !defined(SLJIT_LIKELY) && !defined(SLJIT_UNLIKELY) */
+
+#ifndef SLJIT_INLINE
+/* Inline functions. Some old compilers do not support them. */
+#if defined(__SUNPRO_C) && __SUNPRO_C <= 0x510
+#define SLJIT_INLINE
+#else
+#define SLJIT_INLINE __inline
+#endif
+#endif /* !SLJIT_INLINE */
+
+#ifndef SLJIT_CONST
+/* Const variables. */
+#define SLJIT_CONST const
+#endif
+
+#ifndef SLJIT_UNUSED_ARG
+/* Unused arguments. */
+#define SLJIT_UNUSED_ARG(arg) (void)arg
+#endif
+
+#if (defined SLJIT_CONFIG_STATIC && SLJIT_CONFIG_STATIC)
+/* Static ABI functions. For all-in-one programs. */
+
+#if defined(__GNUC__)
+/* Disable unused warnings in gcc. */
+#define SLJIT_API_FUNC_ATTRIBUTE static __attribute__((unused))
+#else
+#define SLJIT_API_FUNC_ATTRIBUTE static
+#endif
+
+#else
+#define SLJIT_API_FUNC_ATTRIBUTE
+#endif /* (defined SLJIT_CONFIG_STATIC && SLJIT_CONFIG_STATIC) */
+
+#ifndef SLJIT_CACHE_FLUSH
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+
+/* Not required to implement on archs with unified caches. */
+#define SLJIT_CACHE_FLUSH(from, to)
+
+#elif defined __APPLE__
+
+/* Supported by all macs since Mac OS 10.5.
+   However, it does not work on non-jailbroken iOS devices,
+   although the compilation is successful. */
+
+#define SLJIT_CACHE_FLUSH(from, to) \
+       sys_icache_invalidate((char*)(from), (char*)(to) - (char*)(from))
+
+#elif defined __ANDROID__
+
+/* Android lacks __clear_cache; instead, cacheflush should be used. */
+
+#define SLJIT_CACHE_FLUSH(from, to) \
+    cacheflush((long)(from), (long)(to), 0)
+
+#elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+
+/* The __clear_cache() implementation of GCC is a dummy function on PowerPC. */
+#define SLJIT_CACHE_FLUSH(from, to) \
+       ppc_cache_flush((from), (to))
+
+#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+
+/* The __clear_cache() implementation of GCC is a dummy function on Sparc. */
+#define SLJIT_CACHE_FLUSH(from, to) \
+       sparc_cache_flush((from), (to))
+
+#else
+
+/* Calls __ARM_NR_cacheflush on ARM-Linux. */
+#define SLJIT_CACHE_FLUSH(from, to) \
+       __clear_cache((char*)(from), (char*)(to))
+
+#endif
+
+#endif /* !SLJIT_CACHE_FLUSH */
+
+/* 8 bit byte type. */
+typedef unsigned char sljit_ub;
+typedef signed char sljit_sb;
+
+/* 16 bit half-word type. */
+typedef unsigned short int sljit_uh;
+typedef signed short int sljit_sh;
+
+/* 32 bit integer type. */
+typedef unsigned int sljit_ui;
+typedef signed int sljit_si;
+
+/* Machine word type. Can encapsulate a pointer.
+     32 bit for 32 bit machines.
+     64 bit for 64 bit machines. */
+#if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
+/* Just to have something. */
+#define SLJIT_WORD_SHIFT 0
+typedef unsigned long int sljit_uw;
+typedef long int sljit_sw;
+#elif !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \
+       && !(defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \
+       && !(defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \
+       && !(defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) \
+       && !(defined SLJIT_CONFIG_TILEGX && SLJIT_CONFIG_TILEGX)
+#define SLJIT_32BIT_ARCHITECTURE 1
+#define SLJIT_WORD_SHIFT 2
+typedef unsigned int sljit_uw;
+typedef int sljit_sw;
+#else
+#define SLJIT_64BIT_ARCHITECTURE 1
+#define SLJIT_WORD_SHIFT 3
+#ifdef _WIN32
+typedef unsigned __int64 sljit_uw;
+typedef __int64 sljit_sw;
+#else
+typedef unsigned long int sljit_uw;
+typedef long int sljit_sw;
+#endif
+#endif
+
+typedef sljit_uw sljit_p;
+
+/* Floating point types. */
+typedef float sljit_s;
+typedef double sljit_d;
+
+/* Shift for pointer sized data. */
+#define SLJIT_POINTER_SHIFT SLJIT_WORD_SHIFT
+
+/* Shift for double precision sized data. */
+#define SLJIT_DOUBLE_SHIFT 3
+
+#ifndef SLJIT_W
+
+/* Defining long constants. */
+#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE)
+#define SLJIT_W(w)     (w##ll)
+#else
+#define SLJIT_W(w)     (w)
+#endif
+
+#endif /* !SLJIT_W */
+
+#ifndef SLJIT_CALL
+
+/* ABI (Application Binary Interface) types. */
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+
+#if defined(__GNUC__) && !defined(__APPLE__)
+
+#define SLJIT_CALL __attribute__ ((fastcall))
+#define SLJIT_X86_32_FASTCALL 1
+
+#elif defined(_MSC_VER)
+
+#define SLJIT_CALL __fastcall
+#define SLJIT_X86_32_FASTCALL 1
+
+#elif defined(__BORLANDC__)
+
+#define SLJIT_CALL __msfastcall
+#define SLJIT_X86_32_FASTCALL 1
+
+#else /* Unknown compiler. */
+
+/* The cdecl attribute is the default. */
+#define SLJIT_CALL
+
+#endif
+
+#else /* Non x86-32 architectures. */
+
+#define SLJIT_CALL
+
+#endif /* SLJIT_CONFIG_X86_32 */
+
+#endif /* !SLJIT_CALL */
+
+#if !defined(SLJIT_BIG_ENDIAN) && !defined(SLJIT_LITTLE_ENDIAN)
+
+/* These macros are useful for the applications. */
+#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \
+       || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+
+#ifdef __LITTLE_ENDIAN__
+#define SLJIT_LITTLE_ENDIAN 1
+#else
+#define SLJIT_BIG_ENDIAN 1
+#endif
+
+#elif (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \
+       || (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+
+#ifdef __MIPSEL__
+#define SLJIT_LITTLE_ENDIAN 1
+#else
+#define SLJIT_BIG_ENDIAN 1
+#endif
+
+#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+
+#define SLJIT_BIG_ENDIAN 1
+
+#else
+#define SLJIT_LITTLE_ENDIAN 1
+#endif
+
+#endif /* !defined(SLJIT_BIG_ENDIAN) && !defined(SLJIT_LITTLE_ENDIAN) */
+
+/* Sanity check. */
+#if (defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN) && (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
+#error "Exactly one endianness must be selected"
+#endif
+
+#if !(defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN) && !(defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
+#error "Exactly one endianness must be selected"
+#endif
+
+#ifndef SLJIT_INDIRECT_CALL
+#if ((defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) && (defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN)) \
+       || ((defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) && defined _AIX)
+/* It seems certain ppc compilers use an indirect addressing for functions
+   which makes things complicated. */
+#define SLJIT_INDIRECT_CALL 1
+#endif
+#endif /* SLJIT_INDIRECT_CALL */
+
+#ifndef SLJIT_RETURN_ADDRESS_OFFSET
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+#define SLJIT_RETURN_ADDRESS_OFFSET 8
+#else
+#define SLJIT_RETURN_ADDRESS_OFFSET 0
+#endif
+#endif /* SLJIT_RETURN_ADDRESS_OFFSET */
+
+#ifndef SLJIT_SSE2
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+/* Turn on SSE2 support on x86. */
+#define SLJIT_SSE2 1
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+/* Auto detect SSE2 support using CPUID.
+   On 64 bit x86 cpus, sse2 must be present. */
+#define SLJIT_DETECT_SSE2 1
+#endif
+
+#endif /* (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) */
+
+#endif /* !SLJIT_SSE2 */
+
+#ifndef SLJIT_UNALIGNED
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \
+       || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \
+       || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \
+       || (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) \
+       || (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \
+       || (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \
+       || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+#define SLJIT_UNALIGNED 1
+#endif
+
+#endif /* !SLJIT_UNALIGNED */
+
+#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR)
+SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size);
+SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr);
+SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void);
+#define SLJIT_MALLOC_EXEC(size) sljit_malloc_exec(size)
+#define SLJIT_FREE_EXEC(ptr) sljit_free_exec(ptr)
+#endif
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+#include <stdio.h>
+#endif
+
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+
+#if !defined(SLJIT_ASSERT) || !defined(SLJIT_ASSERT_STOP)
+
+/* SLJIT_HALT_PROCESS must halt the process. */
+#ifndef SLJIT_HALT_PROCESS
+#include <stdlib.h>
+
+#define SLJIT_HALT_PROCESS() \
+       abort();
+#endif /* !SLJIT_HALT_PROCESS */
+
+#include <stdio.h>
+
+#endif /* !SLJIT_ASSERT || !SLJIT_ASSERT_STOP */
+
+/* Feel free to redefine these two macros. */
+#ifndef SLJIT_ASSERT
+
+#define SLJIT_ASSERT(x) \
+       do { \
+               if (SLJIT_UNLIKELY(!(x))) { \
+                       printf("Assertion failed at " __FILE__ ":%d\n", __LINE__); \
+                       SLJIT_HALT_PROCESS(); \
+               } \
+       } while (0)
+
+#endif /* !SLJIT_ASSERT */
+
+#ifndef SLJIT_ASSERT_STOP
+
+#define SLJIT_ASSERT_STOP() \
+       do { \
+               printf("Should never been reached " __FILE__ ":%d\n", __LINE__); \
+               SLJIT_HALT_PROCESS(); \
+       } while (0)
+
+#endif /* !SLJIT_ASSERT_STOP */
+
+#else /* (defined SLJIT_DEBUG && SLJIT_DEBUG) */
+
+/* Forcing empty, but valid statements. */
+#undef SLJIT_ASSERT
+#undef SLJIT_ASSERT_STOP
+
+#define SLJIT_ASSERT(x) \
+       do { } while (0)
+#define SLJIT_ASSERT_STOP() \
+       do { } while (0)
+
+#endif /* (defined SLJIT_DEBUG && SLJIT_DEBUG) */
+
+#ifndef SLJIT_COMPILE_ASSERT
+
+/* Should be improved eventually. */
+#define SLJIT_COMPILE_ASSERT(x, description) \
+       SLJIT_ASSERT(x)
+
+#endif /* !SLJIT_COMPILE_ASSERT */
+
+#endif
diff --git a/ext/pcre/pcrelib/sljit/sljitExecAllocator.c b/ext/pcre/pcrelib/sljit/sljitExecAllocator.c
new file mode 100644 (file)
index 0000000..f24ed33
--- /dev/null
@@ -0,0 +1,312 @@
+/*
+ *    Stack-less Just-In-Time compiler
+ *
+ *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice, this list of
+ *      conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *      of conditions and the following disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+   This file contains a simple executable memory allocator
+
+   It is assumed, that executable code blocks are usually medium (or sometimes
+   large) memory blocks, and the allocator is not too frequently called (less
+   optimized than other allocators). Thus, using it as a generic allocator is
+   not suggested.
+
+   How does it work:
+     Memory is allocated in continuous memory areas called chunks by alloc_chunk()
+     Chunk format:
+     [ block ][ block ] ... [ block ][ block terminator ]
+
+   All blocks and the block terminator is started with block_header. The block
+   header contains the size of the previous and the next block. These sizes
+   can also contain special values.
+     Block size:
+       0 - The block is a free_block, with a different size member.
+       1 - The block is a block terminator.
+       n - The block is used at the moment, and the value contains its size.
+     Previous block size:
+       0 - This is the first block of the memory chunk.
+       n - The size of the previous block.
+
+   Using these size values we can go forward or backward on the block chain.
+   The unused blocks are stored in a chain list pointed by free_blocks. This
+   list is useful if we need to find a suitable memory area when the allocator
+   is called.
+
+   When a block is freed, the new free block is connected to its adjacent free
+   blocks if possible.
+
+     [ free block ][ used block ][ free block ]
+   and "used block" is freed, the three blocks are connected together:
+     [           one big free block           ]
+*/
+
+/* --------------------------------------------------------------------- */
+/*  System (OS) functions                                                */
+/* --------------------------------------------------------------------- */
+
+/* 64 KByte. */
+#define CHUNK_SIZE     0x10000
+
+/*
+   alloc_chunk / free_chunk :
+     * allocate executable system memory chunks
+     * the size is always divisible by CHUNK_SIZE
+   allocator_grab_lock / allocator_release_lock :
+     * make the allocator thread safe
+     * can be empty if the OS (or the application) does not support threading
+     * only the allocator requires this lock, sljit is fully thread safe
+       as it only uses local variables
+*/
+
+#ifdef _WIN32
+
+static SLJIT_INLINE void* alloc_chunk(sljit_uw size)
+{
+       return VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
+}
+
+static SLJIT_INLINE void free_chunk(void* chunk, sljit_uw size)
+{
+       SLJIT_UNUSED_ARG(size);
+       VirtualFree(chunk, 0, MEM_RELEASE);
+}
+
+#else
+
+static SLJIT_INLINE void* alloc_chunk(sljit_uw size)
+{
+       void* retval;
+
+#ifdef MAP_ANON
+       retval = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0);
+#else
+       if (dev_zero < 0) {
+               if (open_dev_zero())
+                       return NULL;
+       }
+       retval = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE, dev_zero, 0);
+#endif
+
+       return (retval != MAP_FAILED) ? retval : NULL;
+}
+
+static SLJIT_INLINE void free_chunk(void* chunk, sljit_uw size)
+{
+       munmap(chunk, size);
+}
+
+#endif
+
+/* --------------------------------------------------------------------- */
+/*  Common functions                                                     */
+/* --------------------------------------------------------------------- */
+
+#define CHUNK_MASK     (~(CHUNK_SIZE - 1))
+
+struct block_header {
+       sljit_uw size;
+       sljit_uw prev_size;
+};
+
+struct free_block {
+       struct block_header header;
+       struct free_block *next;
+       struct free_block *prev;
+       sljit_uw size;
+};
+
+#define AS_BLOCK_HEADER(base, offset) \
+       ((struct block_header*)(((sljit_ub*)base) + offset))
+#define AS_FREE_BLOCK(base, offset) \
+       ((struct free_block*)(((sljit_ub*)base) + offset))
+#define MEM_START(base)                ((void*)(((sljit_ub*)base) + sizeof(struct block_header)))
+#define ALIGN_SIZE(size)       (((size) + sizeof(struct block_header) + 7) & ~7)
+
+static struct free_block* free_blocks;
+static sljit_uw allocated_size;
+static sljit_uw total_size;
+
+static SLJIT_INLINE void sljit_insert_free_block(struct free_block *free_block, sljit_uw size)
+{
+       free_block->header.size = 0;
+       free_block->size = size;
+
+       free_block->next = free_blocks;
+       free_block->prev = 0;
+       if (free_blocks)
+               free_blocks->prev = free_block;
+       free_blocks = free_block;
+}
+
+static SLJIT_INLINE void sljit_remove_free_block(struct free_block *free_block)
+{
+       if (free_block->next)
+               free_block->next->prev = free_block->prev;
+
+       if (free_block->prev)
+               free_block->prev->next = free_block->next;
+       else {
+               SLJIT_ASSERT(free_blocks == free_block);
+               free_blocks = free_block->next;
+       }
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size)
+{
+       struct block_header *header;
+       struct block_header *next_header;
+       struct free_block *free_block;
+       sljit_uw chunk_size;
+
+       allocator_grab_lock();
+       if (size < sizeof(struct free_block))
+               size = sizeof(struct free_block);
+       size = ALIGN_SIZE(size);
+
+       free_block = free_blocks;
+       while (free_block) {
+               if (free_block->size >= size) {
+                       chunk_size = free_block->size;
+                       if (chunk_size > size + 64) {
+                               /* We just cut a block from the end of the free block. */
+                               chunk_size -= size;
+                               free_block->size = chunk_size;
+                               header = AS_BLOCK_HEADER(free_block, chunk_size);
+                               header->prev_size = chunk_size;
+                               AS_BLOCK_HEADER(header, size)->prev_size = size;
+                       }
+                       else {
+                               sljit_remove_free_block(free_block);
+                               header = (struct block_header*)free_block;
+                               size = chunk_size;
+                       }
+                       allocated_size += size;
+                       header->size = size;
+                       allocator_release_lock();
+                       return MEM_START(header);
+               }
+               free_block = free_block->next;
+       }
+
+       chunk_size = (size + sizeof(struct block_header) + CHUNK_SIZE - 1) & CHUNK_MASK;
+       header = (struct block_header*)alloc_chunk(chunk_size);
+       if (!header) {
+               allocator_release_lock();
+               return NULL;
+       }
+
+       chunk_size -= sizeof(struct block_header);
+       total_size += chunk_size;
+
+       header->prev_size = 0;
+       if (chunk_size > size + 64) {
+               /* Cut the allocated space into a free and a used block. */
+               allocated_size += size;
+               header->size = size;
+               chunk_size -= size;
+
+               free_block = AS_FREE_BLOCK(header, size);
+               free_block->header.prev_size = size;
+               sljit_insert_free_block(free_block, chunk_size);
+               next_header = AS_BLOCK_HEADER(free_block, chunk_size);
+       }
+       else {
+               /* All space belongs to this allocation. */
+               allocated_size += chunk_size;
+               header->size = chunk_size;
+               next_header = AS_BLOCK_HEADER(header, chunk_size);
+       }
+       next_header->size = 1;
+       next_header->prev_size = chunk_size;
+       allocator_release_lock();
+       return MEM_START(header);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr)
+{
+       struct block_header *header;
+       struct free_block* free_block;
+
+       allocator_grab_lock();
+       header = AS_BLOCK_HEADER(ptr, -(sljit_sw)sizeof(struct block_header));
+       allocated_size -= header->size;
+
+       /* Connecting free blocks together if possible. */
+
+       /* If header->prev_size == 0, free_block will equal to header.
+          In this case, free_block->header.size will be > 0. */
+       free_block = AS_FREE_BLOCK(header, -(sljit_sw)header->prev_size);
+       if (SLJIT_UNLIKELY(!free_block->header.size)) {
+               free_block->size += header->size;
+               header = AS_BLOCK_HEADER(free_block, free_block->size);
+               header->prev_size = free_block->size;
+       }
+       else {
+               free_block = (struct free_block*)header;
+               sljit_insert_free_block(free_block, header->size);
+       }
+
+       header = AS_BLOCK_HEADER(free_block, free_block->size);
+       if (SLJIT_UNLIKELY(!header->size)) {
+               free_block->size += ((struct free_block*)header)->size;
+               sljit_remove_free_block((struct free_block*)header);
+               header = AS_BLOCK_HEADER(free_block, free_block->size);
+               header->prev_size = free_block->size;
+       }
+
+       /* The whole chunk is free. */
+       if (SLJIT_UNLIKELY(!free_block->header.prev_size && header->size == 1)) {
+               /* If this block is freed, we still have (allocated_size / 2) free space. */
+               if (total_size - free_block->size > (allocated_size * 3 / 2)) {
+                       total_size -= free_block->size;
+                       sljit_remove_free_block(free_block);
+                       free_chunk(free_block, free_block->size + sizeof(struct block_header));
+               }
+       }
+
+       allocator_release_lock();
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void)
+{
+       struct free_block* free_block;
+       struct free_block* next_free_block;
+
+       allocator_grab_lock();
+
+       free_block = free_blocks;
+       while (free_block) {
+               next_free_block = free_block->next;
+               if (!free_block->header.prev_size && 
+                               AS_BLOCK_HEADER(free_block, free_block->size)->size == 1) {
+                       total_size -= free_block->size;
+                       sljit_remove_free_block(free_block);
+                       free_chunk(free_block, free_block->size + sizeof(struct block_header));
+               }
+               free_block = next_free_block;
+       }
+
+       SLJIT_ASSERT((total_size && free_blocks) || (!total_size && !free_blocks));
+       allocator_release_lock();
+}
diff --git a/ext/pcre/pcrelib/sljit/sljitLir.c b/ext/pcre/pcrelib/sljit/sljitLir.c
new file mode 100644 (file)
index 0000000..1acecba
--- /dev/null
@@ -0,0 +1,1838 @@
+/*
+ *    Stack-less Just-In-Time compiler
+ *
+ *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice, this list of
+ *      conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *      of conditions and the following disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "sljitLir.h"
+
+#define CHECK_ERROR() \
+       do { \
+               if (SLJIT_UNLIKELY(compiler->error)) \
+                       return compiler->error; \
+       } while (0)
+
+#define CHECK_ERROR_PTR() \
+       do { \
+               if (SLJIT_UNLIKELY(compiler->error)) \
+                       return NULL; \
+       } while (0)
+
+#define CHECK_ERROR_VOID() \
+       do { \
+               if (SLJIT_UNLIKELY(compiler->error)) \
+                       return; \
+       } while (0)
+
+#define FAIL_IF(expr) \
+       do { \
+               if (SLJIT_UNLIKELY(expr)) \
+                       return compiler->error; \
+       } while (0)
+
+#define PTR_FAIL_IF(expr) \
+       do { \
+               if (SLJIT_UNLIKELY(expr)) \
+                       return NULL; \
+       } while (0)
+
+#define FAIL_IF_NULL(ptr) \
+       do { \
+               if (SLJIT_UNLIKELY(!(ptr))) { \
+                       compiler->error = SLJIT_ERR_ALLOC_FAILED; \
+                       return SLJIT_ERR_ALLOC_FAILED; \
+               } \
+       } while (0)
+
+#define PTR_FAIL_IF_NULL(ptr) \
+       do { \
+               if (SLJIT_UNLIKELY(!(ptr))) { \
+                       compiler->error = SLJIT_ERR_ALLOC_FAILED; \
+                       return NULL; \
+               } \
+       } while (0)
+
+#define PTR_FAIL_WITH_EXEC_IF(ptr) \
+       do { \
+               if (SLJIT_UNLIKELY(!(ptr))) { \
+                       compiler->error = SLJIT_ERR_EX_ALLOC_FAILED; \
+                       return NULL; \
+               } \
+       } while (0)
+
+#if !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
+
+#define GET_OPCODE(op) \
+       ((op) & ~(SLJIT_INT_OP | SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))
+
+#define GET_FLAGS(op) \
+       ((op) & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C))
+
+#define GET_ALL_FLAGS(op) \
+       ((op) & (SLJIT_INT_OP | SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))
+
+#define TYPE_CAST_NEEDED(op) \
+       (((op) >= SLJIT_MOV_UB && (op) <= SLJIT_MOV_SH) || ((op) >= SLJIT_MOVU_UB && (op) <= SLJIT_MOVU_SH))
+
+#define BUF_SIZE       4096
+
+#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE)
+#define ABUF_SIZE      2048
+#else
+#define ABUF_SIZE      4096
+#endif
+
+/* Parameter parsing. */
+#define REG_MASK               0x3f
+#define OFFS_REG(reg)          (((reg) >> 8) & REG_MASK)
+#define OFFS_REG_MASK          (REG_MASK << 8)
+#define TO_OFFS_REG(reg)       ((reg) << 8)
+/* When reg cannot be unused. */
+#define FAST_IS_REG(reg)       ((reg) <= REG_MASK)
+/* When reg can be unused. */
+#define SLOW_IS_REG(reg)       ((reg) > 0 && (reg) <= REG_MASK)
+
+/* Jump flags. */
+#define JUMP_LABEL     0x1
+#define JUMP_ADDR      0x2
+/* SLJIT_REWRITABLE_JUMP is 0x1000. */
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+#      define PATCH_MB 0x4
+#      define PATCH_MW 0x8
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+#      define PATCH_MD 0x10
+#endif
+#endif
+
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
+#      define IS_BL            0x4
+#      define PATCH_B          0x8
+#endif
+
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+#      define CPOOL_SIZE       512
+#endif
+
+#if (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2)
+#      define IS_COND          0x04
+#      define IS_BL            0x08
+       /* conditional + imm8 */
+#      define PATCH_TYPE1      0x10
+       /* conditional + imm20 */
+#      define PATCH_TYPE2      0x20
+       /* IT + imm24 */
+#      define PATCH_TYPE3      0x30
+       /* imm11 */
+#      define PATCH_TYPE4      0x40
+       /* imm24 */
+#      define PATCH_TYPE5      0x50
+       /* BL + imm24 */
+#      define PATCH_BL         0x60
+       /* 0xf00 cc code for branches */
+#endif
+
+#if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64)
+#      define IS_COND          0x004
+#      define IS_CBZ           0x008
+#      define IS_BL            0x010
+#      define PATCH_B          0x020
+#      define PATCH_COND       0x040
+#      define PATCH_ABS48      0x080
+#      define PATCH_ABS64      0x100
+#endif
+
+#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+#      define IS_COND          0x004
+#      define IS_CALL          0x008
+#      define PATCH_B          0x010
+#      define PATCH_ABS_B      0x020
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+#      define PATCH_ABS32      0x040
+#      define PATCH_ABS48      0x080
+#endif
+#      define REMOVE_COND      0x100
+#endif
+
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) || (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+#      define IS_MOVABLE       0x004
+#      define IS_JAL           0x008
+#      define IS_CALL          0x010
+#      define IS_BIT26_COND    0x020
+#      define IS_BIT16_COND    0x040
+
+#      define IS_COND          (IS_BIT26_COND | IS_BIT16_COND)
+
+#      define PATCH_B          0x080
+#      define PATCH_J          0x100
+
+#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+#      define PATCH_ABS32      0x200
+#      define PATCH_ABS48      0x400
+#endif
+
+       /* instruction types */
+#      define MOVABLE_INS      0
+       /* 1 - 31 last destination register */
+       /* no destination (i.e: store) */
+#      define UNMOVABLE_INS    32
+       /* FPU status register */
+#      define FCSR_FCC         33
+#endif
+
+#if (defined SLJIT_CONFIG_TILEGX && SLJIT_CONFIG_TILEGX)
+#      define IS_JAL           0x04
+#      define IS_COND          0x08
+
+#      define PATCH_B          0x10
+#      define PATCH_J          0x20
+#endif
+
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+#      define IS_MOVABLE       0x04
+#      define IS_COND          0x08
+#      define IS_CALL          0x10
+
+#      define PATCH_B          0x20
+#      define PATCH_CALL       0x40
+
+       /* instruction types */
+#      define MOVABLE_INS      0
+       /* 1 - 31 last destination register */
+       /* no destination (i.e: store) */
+#      define UNMOVABLE_INS    32
+
+#      define DST_INS_MASK     0xff
+
+       /* ICC_SET is the same as SET_FLAGS. */
+#      define ICC_IS_SET       (1 << 23)
+#      define FCC_IS_SET       (1 << 24)
+#endif
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+#define SLJIT_HAS_VARIABLE_LOCALS_OFFSET 1
+#if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
+#define FIXED_LOCALS_OFFSET (3 * sizeof(sljit_sw))
+#endif
+#endif
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+#define SLJIT_HAS_FIXED_LOCALS_OFFSET 1
+#ifdef _WIN64
+#define FIXED_LOCALS_OFFSET ((4 + 2) * sizeof(sljit_sw))
+#else
+#define FIXED_LOCALS_OFFSET (sizeof(sljit_sw))
+#endif
+#endif
+
+#if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64)
+#define SLJIT_HAS_VARIABLE_LOCALS_OFFSET 1
+#endif
+
+#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
+#define SLJIT_HAS_FIXED_LOCALS_OFFSET 1
+#ifdef _AIX
+#define FIXED_LOCALS_OFFSET ((6 + 8) * sizeof(sljit_sw))
+#else
+#define FIXED_LOCALS_OFFSET (2 * sizeof(sljit_sw))
+#endif
+#endif
+
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+#define SLJIT_HAS_FIXED_LOCALS_OFFSET 1
+#define FIXED_LOCALS_OFFSET ((6 + 8) * sizeof(sljit_sw))
+#endif
+
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+#define SLJIT_HAS_FIXED_LOCALS_OFFSET 1
+#define FIXED_LOCALS_OFFSET (4 * sizeof(sljit_sw))
+#endif
+
+#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+#define SLJIT_HAS_FIXED_LOCALS_OFFSET 1
+#define FIXED_LOCALS_OFFSET 0
+#endif
+
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+#define SLJIT_HAS_FIXED_LOCALS_OFFSET 1
+#define FIXED_LOCALS_OFFSET (23 * sizeof(sljit_sw))
+#endif
+
+#if (defined SLJIT_HAS_VARIABLE_LOCALS_OFFSET && SLJIT_HAS_VARIABLE_LOCALS_OFFSET)
+
+#define ADJUST_LOCAL_OFFSET(p, i) \
+       if ((p) == (SLJIT_MEM1(SLJIT_LOCALS_REG))) \
+               (i) += compiler->locals_offset;
+
+#elif (defined SLJIT_HAS_FIXED_LOCALS_OFFSET && SLJIT_HAS_FIXED_LOCALS_OFFSET)
+
+#define ADJUST_LOCAL_OFFSET(p, i) \
+       if ((p) == (SLJIT_MEM1(SLJIT_LOCALS_REG))) \
+               (i) += FIXED_LOCALS_OFFSET;
+
+#else
+
+#define ADJUST_LOCAL_OFFSET(p, i)
+
+#endif
+
+#endif /* !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) */
+
+/* Utils can still be used even if SLJIT_CONFIG_UNSUPPORTED is set. */
+#include "sljitUtils.c"
+
+#if !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
+
+#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR)
+#include "sljitExecAllocator.c"
+#endif
+
+#if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO) && !(defined SLJIT_SSE2 && SLJIT_SSE2)
+#error SLJIT_SSE2_AUTO cannot be enabled without SLJIT_SSE2
+#endif
+
+/* --------------------------------------------------------------------- */
+/*  Public functions                                                     */
+/* --------------------------------------------------------------------- */
+
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || ((defined SLJIT_SSE2 && SLJIT_SSE2) && ((defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)))
+#define SLJIT_NEEDS_COMPILER_INIT 1
+static sljit_si compiler_initialized = 0;
+/* A thread safe initialization. */
+static void init_compiler(void);
+#endif
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void)
+{
+       struct sljit_compiler *compiler = (struct sljit_compiler*)SLJIT_MALLOC(sizeof(struct sljit_compiler));
+       if (!compiler)
+               return NULL;
+       SLJIT_ZEROMEM(compiler, sizeof(struct sljit_compiler));
+
+       SLJIT_COMPILE_ASSERT(
+               sizeof(sljit_sb) == 1 && sizeof(sljit_ub) == 1
+               && sizeof(sljit_sh) == 2 && sizeof(sljit_uh) == 2
+               && sizeof(sljit_si) == 4 && sizeof(sljit_ui) == 4
+               && (sizeof(sljit_p) == 4 || sizeof(sljit_p) == 8)
+               && sizeof(sljit_p) <= sizeof(sljit_sw)
+               && (sizeof(sljit_sw) == 4 || sizeof(sljit_sw) == 8)
+               && (sizeof(sljit_uw) == 4 || sizeof(sljit_uw) == 8),
+               invalid_integer_types);
+       SLJIT_COMPILE_ASSERT(SLJIT_INT_OP == SLJIT_SINGLE_OP,
+               int_op_and_single_op_must_be_the_same);
+       SLJIT_COMPILE_ASSERT(SLJIT_REWRITABLE_JUMP != SLJIT_SINGLE_OP,
+               rewritable_jump_and_single_op_must_not_be_the_same);
+
+       /* Only the non-zero members must be set. */
+       compiler->error = SLJIT_SUCCESS;
+
+       compiler->buf = (struct sljit_memory_fragment*)SLJIT_MALLOC(BUF_SIZE);
+       compiler->abuf = (struct sljit_memory_fragment*)SLJIT_MALLOC(ABUF_SIZE);
+
+       if (!compiler->buf || !compiler->abuf) {
+               if (compiler->buf)
+                       SLJIT_FREE(compiler->buf);
+               if (compiler->abuf)
+                       SLJIT_FREE(compiler->abuf);
+               SLJIT_FREE(compiler);
+               return NULL;
+       }
+
+       compiler->buf->next = NULL;
+       compiler->buf->used_size = 0;
+       compiler->abuf->next = NULL;
+       compiler->abuf->used_size = 0;
+
+       compiler->scratches = -1;
+       compiler->saveds = -1;
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+       compiler->args = -1;
+#endif
+
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+       compiler->cpool = (sljit_uw*)SLJIT_MALLOC(CPOOL_SIZE * sizeof(sljit_uw) + CPOOL_SIZE * sizeof(sljit_ub));
+       if (!compiler->cpool) {
+               SLJIT_FREE(compiler->buf);
+               SLJIT_FREE(compiler->abuf);
+               SLJIT_FREE(compiler);
+               return NULL;
+       }
+       compiler->cpool_unique = (sljit_ub*)(compiler->cpool + CPOOL_SIZE);
+       compiler->cpool_diff = 0xffffffff;
+#endif
+
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) || (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+       compiler->delay_slot = UNMOVABLE_INS;
+#endif
+
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+       compiler->delay_slot = UNMOVABLE_INS;
+#endif
+
+#if (defined SLJIT_NEEDS_COMPILER_INIT && SLJIT_NEEDS_COMPILER_INIT)
+       if (!compiler_initialized) {
+               init_compiler();
+               compiler_initialized = 1;
+       }
+#endif
+
+       return compiler;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_free_compiler(struct sljit_compiler *compiler)
+{
+       struct sljit_memory_fragment *buf;
+       struct sljit_memory_fragment *curr;
+
+       buf = compiler->buf;
+       while (buf) {
+               curr = buf;
+               buf = buf->next;
+               SLJIT_FREE(curr);
+       }
+
+       buf = compiler->abuf;
+       while (buf) {
+               curr = buf;
+               buf = buf->next;
+               SLJIT_FREE(curr);
+       }
+
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+       SLJIT_FREE(compiler->cpool);
+#endif
+       SLJIT_FREE(compiler);
+}
+
+#if (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code)
+{
+       /* Remove thumb mode flag. */
+       SLJIT_FREE_EXEC((void*)((sljit_uw)code & ~0x1));
+}
+#elif (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code)
+{
+       /* Resolve indirection. */
+       code = (void*)(*(sljit_uw*)code);
+       SLJIT_FREE_EXEC(code);
+}
+#else
+SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code)
+{
+       SLJIT_FREE_EXEC(code);
+}
+#endif
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_label(struct sljit_jump *jump, struct sljit_label* label)
+{
+       if (SLJIT_LIKELY(!!jump) && SLJIT_LIKELY(!!label)) {
+               jump->flags &= ~JUMP_ADDR;
+               jump->flags |= JUMP_LABEL;
+               jump->u.label = label;
+       }
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw target)
+{
+       if (SLJIT_LIKELY(!!jump)) {
+               jump->flags &= ~JUMP_LABEL;
+               jump->flags |= JUMP_ADDR;
+               jump->u.target = target;
+       }
+}
+
+/* --------------------------------------------------------------------- */
+/*  Private functions                                                    */
+/* --------------------------------------------------------------------- */
+
+static void* ensure_buf(struct sljit_compiler *compiler, sljit_uw size)
+{
+       sljit_ub *ret;
+       struct sljit_memory_fragment *new_frag;
+
+       SLJIT_ASSERT(size <= 256);
+       if (compiler->buf->used_size + size <= (BUF_SIZE - (sljit_uw)SLJIT_OFFSETOF(struct sljit_memory_fragment, memory))) {
+               ret = compiler->buf->memory + compiler->buf->used_size;
+               compiler->buf->used_size += size;
+               return ret;
+       }
+       new_frag = (struct sljit_memory_fragment*)SLJIT_MALLOC(BUF_SIZE);
+       PTR_FAIL_IF_NULL(new_frag);
+       new_frag->next = compiler->buf;
+       compiler->buf = new_frag;
+       new_frag->used_size = size;
+       return new_frag->memory;
+}
+
+static void* ensure_abuf(struct sljit_compiler *compiler, sljit_uw size)
+{
+       sljit_ub *ret;
+       struct sljit_memory_fragment *new_frag;
+
+       SLJIT_ASSERT(size <= 256);
+       if (compiler->abuf->used_size + size <= (ABUF_SIZE - (sljit_uw)SLJIT_OFFSETOF(struct sljit_memory_fragment, memory))) {
+               ret = compiler->abuf->memory + compiler->abuf->used_size;
+               compiler->abuf->used_size += size;
+               return ret;
+       }
+       new_frag = (struct sljit_memory_fragment*)SLJIT_MALLOC(ABUF_SIZE);
+       PTR_FAIL_IF_NULL(new_frag);
+       new_frag->next = compiler->abuf;
+       compiler->abuf = new_frag;
+       new_frag->used_size = size;
+       return new_frag->memory;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void* sljit_alloc_memory(struct sljit_compiler *compiler, sljit_si size)
+{
+       CHECK_ERROR_PTR();
+
+#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE)
+       if (size <= 0 || size > 128)
+               return NULL;
+       size = (size + 7) & ~7;
+#else
+       if (size <= 0 || size > 64)
+               return NULL;
+       size = (size + 3) & ~3;
+#endif
+       return ensure_abuf(compiler, size);
+}
+
+static SLJIT_INLINE void reverse_buf(struct sljit_compiler *compiler)
+{
+       struct sljit_memory_fragment *buf = compiler->buf;
+       struct sljit_memory_fragment *prev = NULL;
+       struct sljit_memory_fragment *tmp;
+
+       do {
+               tmp = buf->next;
+               buf->next = prev;
+               prev = buf;
+               buf = tmp;
+       } while (buf != NULL);
+
+       compiler->buf = prev;
+}
+
+static SLJIT_INLINE void set_label(struct sljit_label *label, struct sljit_compiler *compiler)
+{
+       label->next = NULL;
+       label->size = compiler->size;
+       if (compiler->last_label)
+               compiler->last_label->next = label;
+       else
+               compiler->labels = label;
+       compiler->last_label = label;
+}
+
+static SLJIT_INLINE void set_jump(struct sljit_jump *jump, struct sljit_compiler *compiler, sljit_si flags)
+{
+       jump->next = NULL;
+       jump->flags = flags;
+       if (compiler->last_jump)
+               compiler->last_jump->next = jump;
+       else
+               compiler->jumps = jump;
+       compiler->last_jump = jump;
+}
+
+static SLJIT_INLINE void set_const(struct sljit_const *const_, struct sljit_compiler *compiler)
+{
+       const_->next = NULL;
+       const_->addr = compiler->size;
+       if (compiler->last_const)
+               compiler->last_const->next = const_;
+       else
+               compiler->consts = const_;
+       compiler->last_const = const_;
+}
+
+#define ADDRESSING_DEPENDS_ON(exp, reg) \
+       (((exp) & SLJIT_MEM) && (((exp) & REG_MASK) == reg || OFFS_REG(exp) == reg))
+
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+#define FUNCTION_CHECK_OP() \
+       SLJIT_ASSERT(!GET_FLAGS(op) || !(op & SLJIT_KEEP_FLAGS)); \
+       switch (GET_OPCODE(op)) { \
+       case SLJIT_NOT: \
+       case SLJIT_CLZ: \
+       case SLJIT_AND: \
+       case SLJIT_OR: \
+       case SLJIT_XOR: \
+       case SLJIT_SHL: \
+       case SLJIT_LSHR: \
+       case SLJIT_ASHR: \
+               SLJIT_ASSERT(!(op & (SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C))); \
+               break; \
+       case SLJIT_NEG: \
+               SLJIT_ASSERT(!(op & (SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_C))); \
+               break; \
+       case SLJIT_MUL: \
+               SLJIT_ASSERT(!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_C))); \
+               break; \
+       case SLJIT_CMPD: \
+               SLJIT_ASSERT(!(op & (SLJIT_SET_U | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))); \
+               SLJIT_ASSERT((op & (SLJIT_SET_E | SLJIT_SET_S))); \
+               break; \
+       case SLJIT_ADD: \
+               SLJIT_ASSERT(!(op & (SLJIT_SET_U | SLJIT_SET_S))); \
+               break; \
+       case SLJIT_SUB: \
+               break; \
+       case SLJIT_ADDC: \
+       case SLJIT_SUBC: \
+               SLJIT_ASSERT(!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O))); \
+               break; \
+       case SLJIT_BREAKPOINT: \
+       case SLJIT_NOP: \
+       case SLJIT_UMUL: \
+       case SLJIT_SMUL: \
+       case SLJIT_MOV: \
+       case SLJIT_MOV_UI: \
+       case SLJIT_MOV_P: \
+       case SLJIT_MOVU: \
+       case SLJIT_MOVU_UI: \
+       case SLJIT_MOVU_P: \
+               /* Nothing allowed */ \
+               SLJIT_ASSERT(!(op & (SLJIT_INT_OP | SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))); \
+               break; \
+       default: \
+               /* Only SLJIT_INT_OP or SLJIT_SINGLE_OP is allowed. */ \
+               SLJIT_ASSERT(!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))); \
+               break; \
+       }
+
+#define FUNCTION_CHECK_IS_REG(r) \
+       ((r) == SLJIT_UNUSED || \
+       ((r) >= SLJIT_SCRATCH_REG1 && (r) <= SLJIT_SCRATCH_REG1 - 1 + compiler->scratches) || \
+       ((r) >= SLJIT_SAVED_REG1 && (r) <= SLJIT_SAVED_REG1 - 1 + compiler->saveds))
+
+#define FUNCTION_CHECK_SRC(p, i) \
+       SLJIT_ASSERT(compiler->scratches != -1 && compiler->saveds != -1); \
+       if (FUNCTION_CHECK_IS_REG(p)) \
+               SLJIT_ASSERT((i) == 0 && (p) != SLJIT_UNUSED); \
+       else if ((p) == SLJIT_IMM) \
+               ; \
+       else if ((p) == (SLJIT_MEM1(SLJIT_LOCALS_REG))) \
+               SLJIT_ASSERT((i) >= 0 && (i) < compiler->logical_local_size); \
+       else if ((p) & SLJIT_MEM) { \
+               SLJIT_ASSERT(FUNCTION_CHECK_IS_REG((p) & REG_MASK)); \
+               if ((p) & OFFS_REG_MASK) { \
+                       SLJIT_ASSERT(FUNCTION_CHECK_IS_REG(OFFS_REG(p))); \
+                       SLJIT_ASSERT(!((i) & ~0x3)); \
+               } \
+               SLJIT_ASSERT(!((p) & ~(SLJIT_MEM | SLJIT_IMM | REG_MASK | OFFS_REG_MASK))); \
+       } \
+       else \
+               SLJIT_ASSERT_STOP();
+
+#define FUNCTION_CHECK_DST(p, i) \
+       SLJIT_ASSERT(compiler->scratches != -1 && compiler->saveds != -1); \
+       if (FUNCTION_CHECK_IS_REG(p)) \
+               SLJIT_ASSERT((i) == 0); \
+       else if ((p) == (SLJIT_MEM1(SLJIT_LOCALS_REG))) \
+               SLJIT_ASSERT((i) >= 0 && (i) < compiler->logical_local_size); \
+       else if ((p) & SLJIT_MEM) { \
+               SLJIT_ASSERT(FUNCTION_CHECK_IS_REG((p) & REG_MASK)); \
+               if ((p) & OFFS_REG_MASK) { \
+                       SLJIT_ASSERT(FUNCTION_CHECK_IS_REG(OFFS_REG(p))); \
+                       SLJIT_ASSERT(!((i) & ~0x3)); \
+               } \
+               SLJIT_ASSERT(!((p) & ~(SLJIT_MEM | SLJIT_IMM | REG_MASK | OFFS_REG_MASK))); \
+       } \
+       else \
+               SLJIT_ASSERT_STOP();
+
+#define FUNCTION_FCHECK(p, i) \
+       if ((p) >= SLJIT_FLOAT_REG1 && (p) <= SLJIT_FLOAT_REG6) \
+               SLJIT_ASSERT(i == 0); \
+       else if ((p) & SLJIT_MEM) { \
+               SLJIT_ASSERT(FUNCTION_CHECK_IS_REG((p) & REG_MASK)); \
+               if ((p) & OFFS_REG_MASK) { \
+                       SLJIT_ASSERT(FUNCTION_CHECK_IS_REG(OFFS_REG(p))); \
+                       SLJIT_ASSERT(((p) & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_LOCALS_REG) && !(i & ~0x3)); \
+               } else \
+                       SLJIT_ASSERT(OFFS_REG(p) == 0); \
+               SLJIT_ASSERT(!((p) & ~(SLJIT_MEM | SLJIT_IMM | REG_MASK | OFFS_REG_MASK))); \
+       } \
+       else \
+               SLJIT_ASSERT_STOP();
+
+#define FUNCTION_CHECK_OP1() \
+       if (GET_OPCODE(op) >= SLJIT_MOVU && GET_OPCODE(op) <= SLJIT_MOVU_P) { \
+               SLJIT_ASSERT(!(src & SLJIT_MEM) || (src & REG_MASK) != SLJIT_LOCALS_REG); \
+               SLJIT_ASSERT(!(dst & SLJIT_MEM) || (dst & REG_MASK) != SLJIT_LOCALS_REG); \
+               if ((src & SLJIT_MEM) && (src & REG_MASK)) \
+                       SLJIT_ASSERT((dst & REG_MASK) != (src & REG_MASK) && OFFS_REG(dst) != (src & REG_MASK)); \
+       }
+
+#endif
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_compiler_verbose(struct sljit_compiler *compiler, FILE* verbose)
+{
+       compiler->verbose = verbose;
+}
+
+static char* reg_names[] = {
+       (char*)"unused", (char*)"s1", (char*)"s2", (char*)"s3",
+       (char*)"se1", (char*)"se2", (char*)"p1", (char*)"p2",
+       (char*)"p3", (char*)"pe1", (char*)"pe2", (char*)"lc"
+};
+
+static char* freg_names[] = {
+       (char*)"unused", (char*)"f1", (char*)"f2", (char*)"f3",
+       (char*)"f4", (char*)"f5", (char*)"f6"
+};
+
+#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE)
+#ifdef _WIN64
+#      define SLJIT_PRINT_D    "I64"
+#else
+#      define SLJIT_PRINT_D    "l"
+#endif
+#else
+#      define SLJIT_PRINT_D    ""
+#endif
+
+#define sljit_verbose_param(p, i) \
+       if ((p) & SLJIT_IMM) \
+               fprintf(compiler->verbose, "#%" SLJIT_PRINT_D "d", (i)); \
+       else if ((p) & SLJIT_MEM) { \
+               if ((p) & REG_MASK) { \
+                       if (i) { \
+                               if ((p) & OFFS_REG_MASK) \
+                                       fprintf(compiler->verbose, "[%s + %s * %d]", reg_names[(p) & REG_MASK], reg_names[OFFS_REG(p)], 1 << (i)); \
+                               else \
+                                       fprintf(compiler->verbose, "[%s + #%" SLJIT_PRINT_D "d]", reg_names[(p) & REG_MASK], (i)); \
+                       } \
+                       else { \
+                               if ((p) & OFFS_REG_MASK) \
+                                       fprintf(compiler->verbose, "[%s + %s]", reg_names[(p) & REG_MASK], reg_names[OFFS_REG(p)]); \
+                               else \
+                                       fprintf(compiler->verbose, "[%s]", reg_names[(p) & REG_MASK]); \
+                       } \
+               } \
+               else \
+                       fprintf(compiler->verbose, "[#%" SLJIT_PRINT_D "d]", (i)); \
+       } else \
+               fprintf(compiler->verbose, "%s", reg_names[p]);
+#define sljit_verbose_fparam(p, i) \
+       if ((p) & SLJIT_MEM) { \
+               if ((p) & REG_MASK) { \
+                       if (i) { \
+                               if ((p) & OFFS_REG_MASK) \
+                                       fprintf(compiler->verbose, "[%s + %s * %d]", reg_names[(p) & REG_MASK], reg_names[OFFS_REG(p)], 1 << (i)); \
+                               else \
+                                       fprintf(compiler->verbose, "[%s + #%" SLJIT_PRINT_D "d]", reg_names[(p) & REG_MASK], (i)); \
+                       } \
+                       else { \
+                               if ((p) & OFFS_REG_MASK) \
+                                       fprintf(compiler->verbose, "[%s + %s]", reg_names[(p) & REG_MASK], reg_names[OFFS_REG(p)]); \
+                               else \
+                                       fprintf(compiler->verbose, "[%s]", reg_names[(p) & REG_MASK]); \
+                       } \
+               } \
+               else \
+                       fprintf(compiler->verbose, "[#%" SLJIT_PRINT_D "d]", (i)); \
+       } else \
+               fprintf(compiler->verbose, "%s", freg_names[p]);
+
+static SLJIT_CONST char* op_names[] = {
+       /* op0 */
+       (char*)"breakpoint", (char*)"nop",
+       (char*)"umul", (char*)"smul", (char*)"udiv", (char*)"sdiv",
+       /* op1 */
+       (char*)"mov", (char*)"mov.ub", (char*)"mov.sb", (char*)"mov.uh",
+       (char*)"mov.sh", (char*)"mov.ui", (char*)"mov.si", (char*)"mov.p",
+       (char*)"movu", (char*)"movu.ub", (char*)"movu.sb", (char*)"movu.uh",
+       (char*)"movu.sh", (char*)"movu.ui", (char*)"movu.si", (char*)"movu.p",
+       (char*)"not", (char*)"neg", (char*)"clz",
+       /* op2 */
+       (char*)"add", (char*)"addc", (char*)"sub", (char*)"subc",
+       (char*)"mul", (char*)"and", (char*)"or", (char*)"xor",
+       (char*)"shl", (char*)"lshr", (char*)"ashr",
+       /* fop1 */
+       (char*)"cmp", (char*)"mov", (char*)"neg", (char*)"abs",
+       /* fop2 */
+       (char*)"add", (char*)"sub", (char*)"mul", (char*)"div"
+};
+
+static char* jump_names[] = {
+       (char*)"equal", (char*)"not_equal",
+       (char*)"less", (char*)"greater_equal",
+       (char*)"greater", (char*)"less_equal",
+       (char*)"sig_less", (char*)"sig_greater_equal",
+       (char*)"sig_greater", (char*)"sig_less_equal",
+       (char*)"overflow", (char*)"not_overflow",
+       (char*)"mul_overflow", (char*)"mul_not_overflow",
+       (char*)"float_equal", (char*)"float_not_equal",
+       (char*)"float_less", (char*)"float_greater_equal",
+       (char*)"float_greater", (char*)"float_less_equal",
+       (char*)"float_unordered", (char*)"float_ordered",
+       (char*)"jump", (char*)"fast_call",
+       (char*)"call0", (char*)"call1", (char*)"call2", (char*)"call3"
+};
+
+#endif
+
+/* --------------------------------------------------------------------- */
+/*  Arch dependent                                                       */
+/* --------------------------------------------------------------------- */
+
+static SLJIT_INLINE void check_sljit_generate_code(struct sljit_compiler *compiler)
+{
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       struct sljit_jump *jump;
+#endif
+       /* If debug and verbose are disabled, all arguments are unused. */
+       SLJIT_UNUSED_ARG(compiler);
+
+       SLJIT_ASSERT(compiler->size > 0);
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       jump = compiler->jumps;
+       while (jump) {
+               /* All jumps have target. */
+               SLJIT_ASSERT(jump->flags & (JUMP_LABEL | JUMP_ADDR));
+               jump = jump->next;
+       }
+#endif
+}
+
+static SLJIT_INLINE void check_sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
+{
+       /* If debug and verbose are disabled, all arguments are unused. */
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(args);
+       SLJIT_UNUSED_ARG(scratches);
+       SLJIT_UNUSED_ARG(saveds);
+       SLJIT_UNUSED_ARG(local_size);
+
+       SLJIT_ASSERT(args >= 0 && args <= 3);
+       SLJIT_ASSERT(scratches >= 0 && scratches <= SLJIT_NO_TMP_REGISTERS);
+       SLJIT_ASSERT(saveds >= 0 && saveds <= SLJIT_NO_GEN_REGISTERS);
+       SLJIT_ASSERT(args <= saveds);
+       SLJIT_ASSERT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE);
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+       if (SLJIT_UNLIKELY(!!compiler->verbose))
+               fprintf(compiler->verbose, "  enter args=%d scratches=%d saveds=%d local_size=%d\n", args, scratches, saveds, local_size);
+#endif
+}
+
+static SLJIT_INLINE void check_sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
+{
+       /* If debug and verbose are disabled, all arguments are unused. */
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(args);
+       SLJIT_UNUSED_ARG(scratches);
+       SLJIT_UNUSED_ARG(saveds);
+       SLJIT_UNUSED_ARG(local_size);
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       if (SLJIT_UNLIKELY(compiler->skip_checks)) {
+               compiler->skip_checks = 0;
+               return;
+       }
+#endif
+
+       SLJIT_ASSERT(args >= 0 && args <= 3);
+       SLJIT_ASSERT(scratches >= 0 && scratches <= SLJIT_NO_TMP_REGISTERS);
+       SLJIT_ASSERT(saveds >= 0 && saveds <= SLJIT_NO_GEN_REGISTERS);
+       SLJIT_ASSERT(args <= saveds);
+       SLJIT_ASSERT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE);
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+       if (SLJIT_UNLIKELY(!!compiler->verbose))
+               fprintf(compiler->verbose, "  set_context args=%d scratches=%d saveds=%d local_size=%d\n", args, scratches, saveds, local_size);
+#endif
+}
+
+static SLJIT_INLINE void check_sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
+{
+       /* If debug and verbose are disabled, all arguments are unused. */
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(op);
+       SLJIT_UNUSED_ARG(src);
+       SLJIT_UNUSED_ARG(srcw);
+
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       if (op != SLJIT_UNUSED) {
+               SLJIT_ASSERT(op >= SLJIT_MOV && op <= SLJIT_MOV_P);
+               FUNCTION_CHECK_SRC(src, srcw);
+       }
+       else
+               SLJIT_ASSERT(src == 0 && srcw == 0);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+       if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+               if (op == SLJIT_UNUSED)
+                       fprintf(compiler->verbose, "  return\n");
+               else {
+                       fprintf(compiler->verbose, "  return %s ", op_names[op]);
+                       sljit_verbose_param(src, srcw);
+                       fprintf(compiler->verbose, "\n");
+               }
+       }
+#endif
+}
+
+static SLJIT_INLINE void check_sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw)
+{
+       /* If debug and verbose are disabled, all arguments are unused. */
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(dst);
+       SLJIT_UNUSED_ARG(dstw);
+
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       FUNCTION_CHECK_DST(dst, dstw);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+       if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+               fprintf(compiler->verbose, "  fast_enter ");
+               sljit_verbose_param(dst, dstw);
+               fprintf(compiler->verbose, "\n");
+       }
+#endif
+}
+
+static SLJIT_INLINE void check_sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw)
+{
+       /* If debug and verbose are disabled, all arguments are unused. */
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(src);
+       SLJIT_UNUSED_ARG(srcw);
+
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       FUNCTION_CHECK_SRC(src, srcw);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+       if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+               fprintf(compiler->verbose, "  fast_return ");
+               sljit_verbose_param(src, srcw);
+               fprintf(compiler->verbose, "\n");
+       }
+#endif
+}
+
+static SLJIT_INLINE void check_sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op)
+{
+       /* If debug and verbose are disabled, all arguments are unused. */
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(op);
+
+       SLJIT_ASSERT((op >= SLJIT_BREAKPOINT && op <= SLJIT_SMUL)
+               || ((op & ~SLJIT_INT_OP) >= SLJIT_UDIV && (op & ~SLJIT_INT_OP) <= SLJIT_SDIV));
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+       if (SLJIT_UNLIKELY(!!compiler->verbose))
+               fprintf(compiler->verbose, "  %s%s\n", !(op & SLJIT_INT_OP) ? "" : "i", op_names[GET_OPCODE(op)]);
+#endif
+}
+
+static SLJIT_INLINE void check_sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       /* If debug and verbose are disabled, all arguments are unused. */
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(op);
+       SLJIT_UNUSED_ARG(dst);
+       SLJIT_UNUSED_ARG(dstw);
+       SLJIT_UNUSED_ARG(src);
+       SLJIT_UNUSED_ARG(srcw);
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       if (SLJIT_UNLIKELY(compiler->skip_checks)) {
+               compiler->skip_checks = 0;
+               return;
+       }
+#endif
+
+       SLJIT_ASSERT(GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_CLZ);
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       FUNCTION_CHECK_OP();
+       FUNCTION_CHECK_SRC(src, srcw);
+       FUNCTION_CHECK_DST(dst, dstw);
+       FUNCTION_CHECK_OP1();
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+       if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+               fprintf(compiler->verbose, "  %s%s%s%s%s%s%s%s ", !(op & SLJIT_INT_OP) ? "" : "i", op_names[GET_OPCODE(op)],
+                       !(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_SET_U) ? "" : ".u", !(op & SLJIT_SET_S) ? "" : ".s",
+                       !(op & SLJIT_SET_O) ? "" : ".o", !(op & SLJIT_SET_C) ? "" : ".c", !(op & SLJIT_KEEP_FLAGS) ? "" : ".k");
+               sljit_verbose_param(dst, dstw);
+               fprintf(compiler->verbose, ", ");
+               sljit_verbose_param(src, srcw);
+               fprintf(compiler->verbose, "\n");
+       }
+#endif
+}
+
+static SLJIT_INLINE void check_sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       /* If debug and verbose are disabled, all arguments are unused. */
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(op);
+       SLJIT_UNUSED_ARG(dst);
+       SLJIT_UNUSED_ARG(dstw);
+       SLJIT_UNUSED_ARG(src1);
+       SLJIT_UNUSED_ARG(src1w);
+       SLJIT_UNUSED_ARG(src2);
+       SLJIT_UNUSED_ARG(src2w);
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       if (SLJIT_UNLIKELY(compiler->skip_checks)) {
+               compiler->skip_checks = 0;
+               return;
+       }
+#endif
+
+       SLJIT_ASSERT(GET_OPCODE(op) >= SLJIT_ADD && GET_OPCODE(op) <= SLJIT_ASHR);
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       FUNCTION_CHECK_OP();
+       FUNCTION_CHECK_SRC(src1, src1w);
+       FUNCTION_CHECK_SRC(src2, src2w);
+       FUNCTION_CHECK_DST(dst, dstw);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+       if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+               fprintf(compiler->verbose, "  %s%s%s%s%s%s%s%s ", !(op & SLJIT_INT_OP) ? "" : "i", op_names[GET_OPCODE(op)],
+                       !(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_SET_U) ? "" : ".u", !(op & SLJIT_SET_S) ? "" : ".s",
+                       !(op & SLJIT_SET_O) ? "" : ".o", !(op & SLJIT_SET_C) ? "" : ".c", !(op & SLJIT_KEEP_FLAGS) ? "" : ".k");
+               sljit_verbose_param(dst, dstw);
+               fprintf(compiler->verbose, ", ");
+               sljit_verbose_param(src1, src1w);
+               fprintf(compiler->verbose, ", ");
+               sljit_verbose_param(src2, src2w);
+               fprintf(compiler->verbose, "\n");
+       }
+#endif
+}
+
+static SLJIT_INLINE void check_sljit_get_register_index(sljit_si reg)
+{
+       SLJIT_UNUSED_ARG(reg);
+       SLJIT_ASSERT(reg > 0 && reg <= SLJIT_NO_REGISTERS);
+}
+
+static SLJIT_INLINE void check_sljit_get_float_register_index(sljit_si reg)
+{
+       SLJIT_UNUSED_ARG(reg);
+       SLJIT_ASSERT(reg > 0 && reg <= SLJIT_NO_FLOAT_REGISTERS);
+}
+
+static SLJIT_INLINE void check_sljit_emit_op_custom(struct sljit_compiler *compiler,
+       void *instruction, sljit_si size)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(instruction);
+       SLJIT_UNUSED_ARG(size);
+       SLJIT_ASSERT(instruction);
+}
+
+static SLJIT_INLINE void check_sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       /* If debug and verbose are disabled, all arguments are unused. */
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(op);
+       SLJIT_UNUSED_ARG(dst);
+       SLJIT_UNUSED_ARG(dstw);
+       SLJIT_UNUSED_ARG(src);
+       SLJIT_UNUSED_ARG(srcw);
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       if (SLJIT_UNLIKELY(compiler->skip_checks)) {
+               compiler->skip_checks = 0;
+               return;
+       }
+#endif
+
+       SLJIT_ASSERT(sljit_is_fpu_available());
+       SLJIT_ASSERT(GET_OPCODE(op) >= SLJIT_CMPD && GET_OPCODE(op) <= SLJIT_ABSD);
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       FUNCTION_CHECK_OP();
+       FUNCTION_FCHECK(src, srcw);
+       FUNCTION_FCHECK(dst, dstw);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+       if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+               fprintf(compiler->verbose, "  %s%s%s%s ", op_names[GET_OPCODE(op)], (op & SLJIT_SINGLE_OP) ? "s" : "d",
+                       !(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_SET_S) ? "" : ".s");
+               sljit_verbose_fparam(dst, dstw);
+               fprintf(compiler->verbose, ", ");
+               sljit_verbose_fparam(src, srcw);
+               fprintf(compiler->verbose, "\n");
+       }
+#endif
+}
+
+static SLJIT_INLINE void check_sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       /* If debug and verbose are disabled, all arguments are unused. */
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(op);
+       SLJIT_UNUSED_ARG(dst);
+       SLJIT_UNUSED_ARG(dstw);
+       SLJIT_UNUSED_ARG(src1);
+       SLJIT_UNUSED_ARG(src1w);
+       SLJIT_UNUSED_ARG(src2);
+       SLJIT_UNUSED_ARG(src2w);
+
+       SLJIT_ASSERT(sljit_is_fpu_available());
+       SLJIT_ASSERT(GET_OPCODE(op) >= SLJIT_ADDD && GET_OPCODE(op) <= SLJIT_DIVD);
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       FUNCTION_CHECK_OP();
+       FUNCTION_FCHECK(src1, src1w);
+       FUNCTION_FCHECK(src2, src2w);
+       FUNCTION_FCHECK(dst, dstw);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+       if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+               fprintf(compiler->verbose, "  %s%s ", op_names[GET_OPCODE(op)], (op & SLJIT_SINGLE_OP) ? "s" : "d");
+               sljit_verbose_fparam(dst, dstw);
+               fprintf(compiler->verbose, ", ");
+               sljit_verbose_fparam(src1, src1w);
+               fprintf(compiler->verbose, ", ");
+               sljit_verbose_fparam(src2, src2w);
+               fprintf(compiler->verbose, "\n");
+       }
+#endif
+}
+
+static SLJIT_INLINE void check_sljit_emit_label(struct sljit_compiler *compiler)
+{
+       /* If debug and verbose are disabled, all arguments are unused. */
+       SLJIT_UNUSED_ARG(compiler);
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+       if (SLJIT_UNLIKELY(!!compiler->verbose))
+               fprintf(compiler->verbose, "label:\n");
+#endif
+}
+
+static SLJIT_INLINE void check_sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type)
+{
+       /* If debug and verbose are disabled, all arguments are unused. */
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(type);
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       if (SLJIT_UNLIKELY(compiler->skip_checks)) {
+               compiler->skip_checks = 0;
+               return;
+       }
+#endif
+
+       SLJIT_ASSERT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP)));
+       SLJIT_ASSERT((type & 0xff) >= SLJIT_C_EQUAL && (type & 0xff) <= SLJIT_CALL3);
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+       if (SLJIT_UNLIKELY(!!compiler->verbose))
+               fprintf(compiler->verbose, "  jump%s.%s\n", !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", jump_names[type & 0xff]);
+#endif
+}
+
+static SLJIT_INLINE void check_sljit_emit_cmp(struct sljit_compiler *compiler, sljit_si type,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(type);
+       SLJIT_UNUSED_ARG(src1);
+       SLJIT_UNUSED_ARG(src1w);
+       SLJIT_UNUSED_ARG(src2);
+       SLJIT_UNUSED_ARG(src2w);
+
+       SLJIT_ASSERT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_INT_OP)));
+       SLJIT_ASSERT((type & 0xff) >= SLJIT_C_EQUAL && (type & 0xff) <= SLJIT_C_SIG_LESS_EQUAL);
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       FUNCTION_CHECK_SRC(src1, src1w);
+       FUNCTION_CHECK_SRC(src2, src2w);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+       if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+               fprintf(compiler->verbose, "  %scmp%s.%s ", !(type & SLJIT_INT_OP) ? "" : "i", !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", jump_names[type & 0xff]);
+               sljit_verbose_param(src1, src1w);
+               fprintf(compiler->verbose, ", ");
+               sljit_verbose_param(src2, src2w);
+               fprintf(compiler->verbose, "\n");
+       }
+#endif
+}
+
+static SLJIT_INLINE void check_sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_si type,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(type);
+       SLJIT_UNUSED_ARG(src1);
+       SLJIT_UNUSED_ARG(src1w);
+       SLJIT_UNUSED_ARG(src2);
+       SLJIT_UNUSED_ARG(src2w);
+
+       SLJIT_ASSERT(sljit_is_fpu_available());
+       SLJIT_ASSERT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_SINGLE_OP)));
+       SLJIT_ASSERT((type & 0xff) >= SLJIT_C_FLOAT_EQUAL && (type & 0xff) <= SLJIT_C_FLOAT_ORDERED);
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       FUNCTION_FCHECK(src1, src1w);
+       FUNCTION_FCHECK(src2, src2w);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+       if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+               fprintf(compiler->verbose, "  %scmp%s.%s ", (type & SLJIT_SINGLE_OP) ? "s" : "d",
+                       !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", jump_names[type & 0xff]);
+               sljit_verbose_fparam(src1, src1w);
+               fprintf(compiler->verbose, ", ");
+               sljit_verbose_fparam(src2, src2w);
+               fprintf(compiler->verbose, "\n");
+       }
+#endif
+}
+
+static SLJIT_INLINE void check_sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw)
+{
+       /* If debug and verbose are disabled, all arguments are unused. */
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(type);
+       SLJIT_UNUSED_ARG(src);
+       SLJIT_UNUSED_ARG(srcw);
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       if (SLJIT_UNLIKELY(compiler->skip_checks)) {
+               compiler->skip_checks = 0;
+               return;
+       }
+#endif
+
+       SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL3);
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       FUNCTION_CHECK_SRC(src, srcw);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+       if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+               fprintf(compiler->verbose, "  ijump.%s ", jump_names[type]);
+               sljit_verbose_param(src, srcw);
+               fprintf(compiler->verbose, "\n");
+       }
+#endif
+}
+
+static SLJIT_INLINE void check_sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw,
+       sljit_si type)
+{
+       /* If debug and verbose are disabled, all arguments are unused. */
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(op);
+       SLJIT_UNUSED_ARG(dst);
+       SLJIT_UNUSED_ARG(dstw);
+       SLJIT_UNUSED_ARG(src);
+       SLJIT_UNUSED_ARG(srcw);
+       SLJIT_UNUSED_ARG(type);
+
+       SLJIT_ASSERT(type >= SLJIT_C_EQUAL && type < SLJIT_JUMP);
+       SLJIT_ASSERT(op == SLJIT_MOV || GET_OPCODE(op) == SLJIT_MOV_UI || GET_OPCODE(op) == SLJIT_MOV_SI
+               || (GET_OPCODE(op) >= SLJIT_AND && GET_OPCODE(op) <= SLJIT_XOR));
+       SLJIT_ASSERT((op & (SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C)) == 0);
+       SLJIT_ASSERT((op & (SLJIT_SET_E | SLJIT_KEEP_FLAGS)) != (SLJIT_SET_E | SLJIT_KEEP_FLAGS));
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       if (GET_OPCODE(op) < SLJIT_ADD) {
+               SLJIT_ASSERT(src == SLJIT_UNUSED && srcw == 0);
+       } else {
+               SLJIT_ASSERT(src == dst && srcw == dstw);
+       }
+       FUNCTION_CHECK_DST(dst, dstw);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+       if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+               fprintf(compiler->verbose, "  %sflags.%s%s%s ", !(op & SLJIT_INT_OP) ? "" : "i",
+                       op_names[GET_OPCODE(op)], !(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_KEEP_FLAGS) ? "" : ".k");
+               sljit_verbose_param(dst, dstw);
+               if (src != SLJIT_UNUSED) {
+                       fprintf(compiler->verbose, ", ");
+                       sljit_verbose_param(src, srcw);
+               }
+               fprintf(compiler->verbose, ", %s\n", jump_names[type]);
+       }
+#endif
+}
+
+static SLJIT_INLINE void check_sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(dst);
+       SLJIT_UNUSED_ARG(dstw);
+       SLJIT_UNUSED_ARG(offset);
+
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       FUNCTION_CHECK_DST(dst, dstw);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+       if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+               fprintf(compiler->verbose, "  local_base ");
+               sljit_verbose_param(dst, dstw);
+               fprintf(compiler->verbose, ", #%" SLJIT_PRINT_D "d\n", offset);
+       }
+#endif
+}
+
+static SLJIT_INLINE void check_sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value)
+{
+       /* If debug and verbose are disabled, all arguments are unused. */
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(dst);
+       SLJIT_UNUSED_ARG(dstw);
+       SLJIT_UNUSED_ARG(init_value);
+
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       FUNCTION_CHECK_DST(dst, dstw);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+       if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+               fprintf(compiler->verbose, "  const ");
+               sljit_verbose_param(dst, dstw);
+               fprintf(compiler->verbose, ", #%" SLJIT_PRINT_D "d\n", init_value);
+       }
+#endif
+}
+
+static SLJIT_INLINE sljit_si emit_mov_before_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
+{
+       /* Return if don't need to do anything. */
+       if (op == SLJIT_UNUSED)
+               return SLJIT_SUCCESS;
+
+#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE)
+       /* At the moment the pointer size is always equal to sljit_sw. May be changed in the future. */
+       if (src == SLJIT_RETURN_REG && (op == SLJIT_MOV || op == SLJIT_MOV_P))
+               return SLJIT_SUCCESS;
+#else
+       if (src == SLJIT_RETURN_REG && (op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI || op == SLJIT_MOV_P))
+               return SLJIT_SUCCESS;
+#endif
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       compiler->skip_checks = 1;
+#endif
+       return sljit_emit_op1(compiler, op, SLJIT_RETURN_REG, 0, src, srcw);
+}
+
+/* CPU description section */
+
+#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE)
+#define SLJIT_CPUINFO_PART1 " 32bit ("
+#elif (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE)
+#define SLJIT_CPUINFO_PART1 " 64bit ("
+#else
+#error "Internal error: CPU type info missing"
+#endif
+
+#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
+#define SLJIT_CPUINFO_PART2 "little endian + "
+#elif (defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN)
+#define SLJIT_CPUINFO_PART2 "big endian + "
+#else
+#error "Internal error: CPU type info missing"
+#endif
+
+#if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED)
+#define SLJIT_CPUINFO_PART3 "unaligned)"
+#else
+#define SLJIT_CPUINFO_PART3 "aligned)"
+#endif
+
+#define SLJIT_CPUINFO SLJIT_CPUINFO_PART1 SLJIT_CPUINFO_PART2 SLJIT_CPUINFO_PART3
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+#      include "sljitNativeX86_common.c"
+#elif (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+#      include "sljitNativeX86_common.c"
+#elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+#      include "sljitNativeARM_32.c"
+#elif (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
+#      include "sljitNativeARM_32.c"
+#elif (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2)
+#      include "sljitNativeARM_T2_32.c"
+#elif (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64)
+#      include "sljitNativeARM_64.c"
+#elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
+#      include "sljitNativePPC_common.c"
+#elif (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+#      include "sljitNativePPC_common.c"
+#elif (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+#      include "sljitNativeMIPS_common.c"
+#elif (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+#      include "sljitNativeMIPS_common.c"
+#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+#      include "sljitNativeSPARC_common.c"
+#elif (defined SLJIT_CONFIG_TILEGX && SLJIT_CONFIG_TILEGX)
+#      include "sljitNativeTILEGX_64.c"
+#endif
+
+#if !(defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) && !(defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_si type,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       /* Default compare for most architectures. */
+       sljit_si flags, tmp_src, condition;
+       sljit_sw tmp_srcw;
+
+       CHECK_ERROR_PTR();
+       check_sljit_emit_cmp(compiler, type, src1, src1w, src2, src2w);
+
+       condition = type & 0xff;
+#if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64)
+       if ((condition == SLJIT_C_EQUAL || condition == SLJIT_C_NOT_EQUAL)) {
+               if ((src1 & SLJIT_IMM) && !src1w) {
+                       src1 = src2;
+                       src1w = src2w;
+                       src2 = SLJIT_IMM;
+                       src2w = 0;
+               }
+               if ((src2 & SLJIT_IMM) && !src2w)
+                       return emit_cmp_to0(compiler, type, src1, src1w);
+       }
+#endif
+
+       if (SLJIT_UNLIKELY((src1 & SLJIT_IMM) && !(src2 & SLJIT_IMM))) {
+               /* Immediate is prefered as second argument by most architectures. */
+               switch (condition) {
+               case SLJIT_C_LESS:
+                       condition = SLJIT_C_GREATER;
+                       break;
+               case SLJIT_C_GREATER_EQUAL:
+                       condition = SLJIT_C_LESS_EQUAL;
+                       break;
+               case SLJIT_C_GREATER:
+                       condition = SLJIT_C_LESS;
+                       break;
+               case SLJIT_C_LESS_EQUAL:
+                       condition = SLJIT_C_GREATER_EQUAL;
+                       break;
+               case SLJIT_C_SIG_LESS:
+                       condition = SLJIT_C_SIG_GREATER;
+                       break;
+               case SLJIT_C_SIG_GREATER_EQUAL:
+                       condition = SLJIT_C_SIG_LESS_EQUAL;
+                       break;
+               case SLJIT_C_SIG_GREATER:
+                       condition = SLJIT_C_SIG_LESS;
+                       break;
+               case SLJIT_C_SIG_LESS_EQUAL:
+                       condition = SLJIT_C_SIG_GREATER_EQUAL;
+                       break;
+               }
+               type = condition | (type & (SLJIT_INT_OP | SLJIT_REWRITABLE_JUMP));
+               tmp_src = src1;
+               src1 = src2;
+               src2 = tmp_src;
+               tmp_srcw = src1w;
+               src1w = src2w;
+               src2w = tmp_srcw;
+       }
+
+       if (condition <= SLJIT_C_NOT_ZERO)
+               flags = SLJIT_SET_E;
+       else if (condition <= SLJIT_C_LESS_EQUAL)
+               flags = SLJIT_SET_U;
+       else
+               flags = SLJIT_SET_S;
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       compiler->skip_checks = 1;
+#endif
+       PTR_FAIL_IF(sljit_emit_op2(compiler, SLJIT_SUB | flags | (type & SLJIT_INT_OP),
+               SLJIT_UNUSED, 0, src1, src1w, src2, src2w));
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       compiler->skip_checks = 1;
+#endif
+       return sljit_emit_jump(compiler, condition | (type & SLJIT_REWRITABLE_JUMP));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_si type,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       sljit_si flags, condition;
+
+       check_sljit_emit_fcmp(compiler, type, src1, src1w, src2, src2w);
+
+       condition = type & 0xff;
+       flags = (condition <= SLJIT_C_FLOAT_NOT_EQUAL) ? SLJIT_SET_E : SLJIT_SET_S;
+       if (type & SLJIT_SINGLE_OP)
+               flags |= SLJIT_SINGLE_OP;
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       compiler->skip_checks = 1;
+#endif
+       sljit_emit_fop1(compiler, SLJIT_CMPD | flags, src1, src1w, src2, src2w);
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       compiler->skip_checks = 1;
+#endif
+       return sljit_emit_jump(compiler, condition | (type & SLJIT_REWRITABLE_JUMP));
+}
+
+#endif
+
+#if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) && !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset)
+{
+       CHECK_ERROR();
+       check_sljit_get_local_base(compiler, dst, dstw, offset);
+
+       ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_LOCALS_REG), offset);
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       compiler->skip_checks = 1;
+#endif
+       if (offset != 0)
+               return sljit_emit_op2(compiler, SLJIT_ADD | SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, SLJIT_IMM, offset);
+       return sljit_emit_op1(compiler, SLJIT_MOV, dst, dstw, SLJIT_LOCALS_REG, 0);
+}
+
+#endif
+
+#else /* SLJIT_CONFIG_UNSUPPORTED */
+
+/* Empty function bodies for those machines, which are not (yet) supported. */
+
+SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
+{
+       return "unsupported";
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void)
+{
+       SLJIT_ASSERT_STOP();
+       return NULL;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_free_compiler(struct sljit_compiler *compiler)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_ASSERT_STOP();
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void* sljit_alloc_memory(struct sljit_compiler *compiler, sljit_si size)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(size);
+       SLJIT_ASSERT_STOP();
+       return NULL;
+}
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_compiler_verbose(struct sljit_compiler *compiler, FILE* verbose)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(verbose);
+       SLJIT_ASSERT_STOP();
+}
+#endif
+
+SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_ASSERT_STOP();
+       return NULL;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code)
+{
+       SLJIT_UNUSED_ARG(code);
+       SLJIT_ASSERT_STOP();
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(args);
+       SLJIT_UNUSED_ARG(scratches);
+       SLJIT_UNUSED_ARG(saveds);
+       SLJIT_UNUSED_ARG(local_size);
+       SLJIT_ASSERT_STOP();
+       return SLJIT_ERR_UNSUPPORTED;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(args);
+       SLJIT_UNUSED_ARG(scratches);
+       SLJIT_UNUSED_ARG(saveds);
+       SLJIT_UNUSED_ARG(local_size);
+       SLJIT_ASSERT_STOP();
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(op);
+       SLJIT_UNUSED_ARG(src);
+       SLJIT_UNUSED_ARG(srcw);
+       SLJIT_ASSERT_STOP();
+       return SLJIT_ERR_UNSUPPORTED;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(dst);
+       SLJIT_UNUSED_ARG(dstw);
+       SLJIT_ASSERT_STOP();
+       return SLJIT_ERR_UNSUPPORTED;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(src);
+       SLJIT_UNUSED_ARG(srcw);
+       SLJIT_ASSERT_STOP();
+       return SLJIT_ERR_UNSUPPORTED;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(op);
+       SLJIT_ASSERT_STOP();
+       return SLJIT_ERR_UNSUPPORTED;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(op);
+       SLJIT_UNUSED_ARG(dst);
+       SLJIT_UNUSED_ARG(dstw);
+       SLJIT_UNUSED_ARG(src);
+       SLJIT_UNUSED_ARG(srcw);
+       SLJIT_ASSERT_STOP();
+       return SLJIT_ERR_UNSUPPORTED;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(op);
+       SLJIT_UNUSED_ARG(dst);
+       SLJIT_UNUSED_ARG(dstw);
+       SLJIT_UNUSED_ARG(src1);
+       SLJIT_UNUSED_ARG(src1w);
+       SLJIT_UNUSED_ARG(src2);
+       SLJIT_UNUSED_ARG(src2w);
+       SLJIT_ASSERT_STOP();
+       return SLJIT_ERR_UNSUPPORTED;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
+{
+       SLJIT_ASSERT_STOP();
+       return reg;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
+       void *instruction, sljit_si size)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(instruction);
+       SLJIT_UNUSED_ARG(size);
+       SLJIT_ASSERT_STOP();
+       return SLJIT_ERR_UNSUPPORTED;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
+{
+       SLJIT_ASSERT_STOP();
+       return 0;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(op);
+       SLJIT_UNUSED_ARG(dst);
+       SLJIT_UNUSED_ARG(dstw);
+       SLJIT_UNUSED_ARG(src);
+       SLJIT_UNUSED_ARG(srcw);
+       SLJIT_ASSERT_STOP();
+       return SLJIT_ERR_UNSUPPORTED;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(op);
+       SLJIT_UNUSED_ARG(dst);
+       SLJIT_UNUSED_ARG(dstw);
+       SLJIT_UNUSED_ARG(src1);
+       SLJIT_UNUSED_ARG(src1w);
+       SLJIT_UNUSED_ARG(src2);
+       SLJIT_UNUSED_ARG(src2w);
+       SLJIT_ASSERT_STOP();
+       return SLJIT_ERR_UNSUPPORTED;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_ASSERT_STOP();
+       return NULL;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(type);
+       SLJIT_ASSERT_STOP();
+       return NULL;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_si type,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(type);
+       SLJIT_UNUSED_ARG(src1);
+       SLJIT_UNUSED_ARG(src1w);
+       SLJIT_UNUSED_ARG(src2);
+       SLJIT_UNUSED_ARG(src2w);
+       SLJIT_ASSERT_STOP();
+       return NULL;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_si type,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(type);
+       SLJIT_UNUSED_ARG(src1);
+       SLJIT_UNUSED_ARG(src1w);
+       SLJIT_UNUSED_ARG(src2);
+       SLJIT_UNUSED_ARG(src2w);
+       SLJIT_ASSERT_STOP();
+       return NULL;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_label(struct sljit_jump *jump, struct sljit_label* label)
+{
+       SLJIT_UNUSED_ARG(jump);
+       SLJIT_UNUSED_ARG(label);
+       SLJIT_ASSERT_STOP();
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw target)
+{
+       SLJIT_UNUSED_ARG(jump);
+       SLJIT_UNUSED_ARG(target);
+       SLJIT_ASSERT_STOP();
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(type);
+       SLJIT_UNUSED_ARG(src);
+       SLJIT_UNUSED_ARG(srcw);
+       SLJIT_ASSERT_STOP();
+       return SLJIT_ERR_UNSUPPORTED;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw,
+       sljit_si type)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(op);
+       SLJIT_UNUSED_ARG(dst);
+       SLJIT_UNUSED_ARG(dstw);
+       SLJIT_UNUSED_ARG(src);
+       SLJIT_UNUSED_ARG(srcw);
+       SLJIT_UNUSED_ARG(type);
+       SLJIT_ASSERT_STOP();
+       return SLJIT_ERR_UNSUPPORTED;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(dst);
+       SLJIT_UNUSED_ARG(dstw);
+       SLJIT_UNUSED_ARG(offset);
+       SLJIT_ASSERT_STOP();
+       return SLJIT_ERR_UNSUPPORTED;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw initval)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(dst);
+       SLJIT_UNUSED_ARG(dstw);
+       SLJIT_UNUSED_ARG(initval);
+       SLJIT_ASSERT_STOP();
+       return NULL;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
+{
+       SLJIT_UNUSED_ARG(addr);
+       SLJIT_UNUSED_ARG(new_addr);
+       SLJIT_ASSERT_STOP();
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
+{
+       SLJIT_UNUSED_ARG(addr);
+       SLJIT_UNUSED_ARG(new_constant);
+       SLJIT_ASSERT_STOP();
+}
+
+#endif
diff --git a/ext/pcre/pcrelib/sljit/sljitLir.h b/ext/pcre/pcrelib/sljit/sljitLir.h
new file mode 100644 (file)
index 0000000..e2cd218
--- /dev/null
@@ -0,0 +1,1001 @@
+/*
+ *    Stack-less Just-In-Time compiler
+ *
+ *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice, this list of
+ *      conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *      of conditions and the following disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _SLJIT_LIR_H_
+#define _SLJIT_LIR_H_
+
+/*
+   ------------------------------------------------------------------------
+    Stack-Less JIT compiler for multiple architectures (x86, ARM, PowerPC)
+   ------------------------------------------------------------------------
+
+   Short description
+    Advantages:
+      - The execution can be continued from any LIR instruction. In other
+        words, it is possible to jump to any label from anywhere, even from
+        a code fragment, which is compiled later, if both compiled code
+        shares the same context. See sljit_emit_enter for more details
+      - Supports self modifying code: target of (conditional) jump and call
+        instructions and some constant values can be dynamically modified
+        during runtime
+        - although it is not suggested to do it frequently
+        - can be used for inline caching: save an important value once
+          in the instruction stream
+        - since this feature limits the optimization possibilities, a
+          special flag must be passed at compile time when these
+          instructions are emitted
+      - A fixed stack space can be allocated for local variables
+      - The compiler is thread-safe
+      - The compiler is highly configurable through preprocessor macros.
+        You can disable unneeded features (multithreading in single
+        threaded applications), and you can use your own system functions
+        (including memory allocators). See sljitConfig.h
+    Disadvantages:
+      - No automatic register allocation, and temporary results are
+        not stored on the stack. (hence the name comes)
+      - Limited number of registers (only 6+4 integer registers, max 3+2
+        scratch, max 3+2 saved and 6 floating point registers)
+    In practice:
+      - This approach is very effective for interpreters
+        - One of the saved registers typically points to a stack interface
+        - It can jump to any exception handler anytime (even if it belongs
+          to another function)
+        - Hot paths can be modified during runtime reflecting the changes
+          of the fastest execution path of the dynamic language
+        - SLJIT supports complex memory addressing modes
+        - mainly position and context independent code (except some cases)
+
+    For valgrind users:
+      - pass --smc-check=all argument to valgrind, since JIT is a "self-modifying code"
+*/
+
+#if !(defined SLJIT_NO_DEFAULT_CONFIG && SLJIT_NO_DEFAULT_CONFIG)
+#include "sljitConfig.h"
+#endif
+
+/* The following header file defines useful macros for fine tuning
+sljit based code generators. They are listed in the beginning
+of sljitConfigInternal.h */
+
+#include "sljitConfigInternal.h"
+
+/* --------------------------------------------------------------------- */
+/*  Error codes                                                          */
+/* --------------------------------------------------------------------- */
+
+/* Indicates no error. */
+#define SLJIT_SUCCESS                  0
+/* After the call of sljit_generate_code(), the error code of the compiler
+   is set to this value to avoid future sljit calls (in debug mode at least).
+   The complier should be freed after sljit_generate_code(). */
+#define SLJIT_ERR_COMPILED             1
+/* Cannot allocate non executable memory. */
+#define SLJIT_ERR_ALLOC_FAILED         2
+/* Cannot allocate executable memory.
+   Only for sljit_generate_code() */
+#define SLJIT_ERR_EX_ALLOC_FAILED      3
+/* return value for SLJIT_CONFIG_UNSUPPORTED empty architecture. */
+#define SLJIT_ERR_UNSUPPORTED          4
+
+/* --------------------------------------------------------------------- */
+/*  Registers                                                            */
+/* --------------------------------------------------------------------- */
+
+#define SLJIT_UNUSED           0
+
+/* Scratch (temporary) registers whose may not preserve their values
+   across function calls. */
+#define SLJIT_SCRATCH_REG1     1
+#define SLJIT_SCRATCH_REG2     2
+#define SLJIT_SCRATCH_REG3     3
+/* Note: extra registers cannot be used for memory addressing. */
+/* Note: on x86-32, these registers are emulated (using stack
+   loads & stores). */
+#define SLJIT_TEMPORARY_EREG1  4
+#define SLJIT_TEMPORARY_EREG2  5
+
+/* Saved registers whose preserve their values across function calls. */
+#define SLJIT_SAVED_REG1       6
+#define SLJIT_SAVED_REG2       7
+#define SLJIT_SAVED_REG3       8
+/* Note: extra registers cannot be used for memory addressing. */
+/* Note: on x86-32, these registers are emulated (using stack
+   loads & stores). */
+#define SLJIT_SAVED_EREG1      9
+#define SLJIT_SAVED_EREG2      10
+
+/* Read-only register (cannot be the destination of an operation).
+   Only SLJIT_MEM1(SLJIT_LOCALS_REG) addressing mode is allowed since
+   several ABIs has certain limitations about the stack layout. However
+   sljit_get_local_base() can be used to obtain the offset of a value
+   on the stack. */
+#define SLJIT_LOCALS_REG       11
+
+/* Number of registers. */
+#define SLJIT_NO_TMP_REGISTERS 5
+#define SLJIT_NO_GEN_REGISTERS 5
+#define SLJIT_NO_REGISTERS     11
+
+/* Return with machine word. */
+
+#define SLJIT_RETURN_REG       SLJIT_SCRATCH_REG1
+
+/* x86 prefers specific registers for special purposes. In case of shift
+   by register it supports only SLJIT_SCRATCH_REG3 for shift argument
+   (which is the src2 argument of sljit_emit_op2). If another register is
+   used, sljit must exchange data between registers which cause a minor
+   slowdown. Other architectures has no such limitation. */
+
+#define SLJIT_PREF_SHIFT_REG   SLJIT_SCRATCH_REG3
+
+/* --------------------------------------------------------------------- */
+/*  Floating point registers                                             */
+/* --------------------------------------------------------------------- */
+
+/* Note: SLJIT_UNUSED as destination is not valid for floating point
+     operations, since they cannot be used for setting flags. */
+
+/* Floating point operations are performed on double or
+   single precision values. */
+
+#define SLJIT_FLOAT_REG1               1
+#define SLJIT_FLOAT_REG2               2
+#define SLJIT_FLOAT_REG3               3
+#define SLJIT_FLOAT_REG4               4
+#define SLJIT_FLOAT_REG5               5
+#define SLJIT_FLOAT_REG6               6
+
+#define SLJIT_NO_FLOAT_REGISTERS       6
+
+/* --------------------------------------------------------------------- */
+/*  Main structures and functions                                        */
+/* --------------------------------------------------------------------- */
+
+struct sljit_memory_fragment {
+       struct sljit_memory_fragment *next;
+       sljit_uw used_size;
+       /* Must be aligned to sljit_sw. */
+       sljit_ub memory[1];
+};
+
+struct sljit_label {
+       struct sljit_label *next;
+       sljit_uw addr;
+       /* The maximum size difference. */
+       sljit_uw size;
+};
+
+struct sljit_jump {
+       struct sljit_jump *next;
+       sljit_uw addr;
+       sljit_sw flags;
+       union {
+               sljit_uw target;
+               struct sljit_label* label;
+       } u;
+};
+
+struct sljit_const {
+       struct sljit_const *next;
+       sljit_uw addr;
+};
+
+struct sljit_compiler {
+       sljit_si error;
+
+       struct sljit_label *labels;
+       struct sljit_jump *jumps;
+       struct sljit_const *consts;
+       struct sljit_label *last_label;
+       struct sljit_jump *last_jump;
+       struct sljit_const *last_const;
+
+       struct sljit_memory_fragment *buf;
+       struct sljit_memory_fragment *abuf;
+
+       /* Used local registers. */
+       sljit_si scratches;
+       /* Used saved registers. */
+       sljit_si saveds;
+       /* Local stack size. */
+       sljit_si local_size;
+       /* Code size. */
+       sljit_uw size;
+       /* For statistical purposes. */
+       sljit_uw executable_size;
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+       sljit_si args;
+       sljit_si locals_offset;
+       sljit_si scratches_start;
+       sljit_si saveds_start;
+#endif
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+       sljit_si mode32;
+#endif
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+       sljit_si flags_saved;
+#endif
+
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+       /* Constant pool handling. */
+       sljit_uw *cpool;
+       sljit_ub *cpool_unique;
+       sljit_uw cpool_diff;
+       sljit_uw cpool_fill;
+       /* Other members. */
+       /* Contains pointer, "ldr pc, [...]" pairs. */
+       sljit_uw patches;
+#endif
+
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
+       /* Temporary fields. */
+       sljit_uw shift_imm;
+       sljit_si cache_arg;
+       sljit_sw cache_argw;
+#endif
+
+#if (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2)
+       sljit_si cache_arg;
+       sljit_sw cache_argw;
+#endif
+
+#if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64)
+       sljit_si locals_offset;
+       sljit_si cache_arg;
+       sljit_sw cache_argw;
+#endif
+
+#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+       sljit_sw imm;
+       sljit_si cache_arg;
+       sljit_sw cache_argw;
+#endif
+
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) || (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+       sljit_si delay_slot;
+       sljit_si cache_arg;
+       sljit_sw cache_argw;
+#endif
+
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+       sljit_si delay_slot;
+       sljit_si cache_arg;
+       sljit_sw cache_argw;
+#endif
+
+#if (defined SLJIT_CONFIG_TILEGX && SLJIT_CONFIG_TILEGX)
+       sljit_si cache_arg;
+       sljit_sw cache_argw;
+#endif
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+       FILE* verbose;
+#endif
+
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       /* Local size passed to the functions. */
+       sljit_si logical_local_size;
+#endif
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       sljit_si skip_checks;
+#endif
+};
+
+/* --------------------------------------------------------------------- */
+/*  Main functions                                                       */
+/* --------------------------------------------------------------------- */
+
+/* Creates an sljit compiler.
+   Returns NULL if failed. */
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void);
+
+/* Free everything except the compiled machine code. */
+SLJIT_API_FUNC_ATTRIBUTE void sljit_free_compiler(struct sljit_compiler *compiler);
+
+/* Returns the current error code. If an error is occurred, future sljit
+   calls which uses the same compiler argument returns early with the same
+   error code. Thus there is no need for checking the error after every
+   call, it is enough to do it before the code is compiled. Removing
+   these checks increases the performance of the compiling process. */
+static SLJIT_INLINE sljit_si sljit_get_compiler_error(struct sljit_compiler *compiler) { return compiler->error; }
+
+/*
+   Allocate a small amount of memory. The size must be <= 64 bytes on 32 bit,
+   and <= 128 bytes on 64 bit architectures. The memory area is owned by the
+   compiler, and freed by sljit_free_compiler. The returned pointer is
+   sizeof(sljit_sw) aligned. Excellent for allocating small blocks during
+   the compiling, and no need to worry about freeing them. The size is
+   enough to contain at most 16 pointers. If the size is outside of the range,
+   the function will return with NULL. However, this return value does not
+   indicate that there is no more memory (does not set the current error code
+   of the compiler to out-of-memory status).
+*/
+SLJIT_API_FUNC_ATTRIBUTE void* sljit_alloc_memory(struct sljit_compiler *compiler, sljit_si size);
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+/* Passing NULL disables verbose. */
+SLJIT_API_FUNC_ATTRIBUTE void sljit_compiler_verbose(struct sljit_compiler *compiler, FILE* verbose);
+#endif
+
+SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler);
+SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code);
+
+/*
+   After the machine code generation is finished we can retrieve the allocated
+   executable memory size, although this area may not be fully filled with
+   instructions depending on some optimizations. This function is useful only
+   for statistical purposes.
+
+   Before a successful code generation, this function returns with 0.
+*/
+static SLJIT_INLINE sljit_uw sljit_get_generated_code_size(struct sljit_compiler *compiler) { return compiler->executable_size; }
+
+/* Instruction generation. Returns with any error code. If there is no
+   error, they return with SLJIT_SUCCESS. */
+
+/*
+   The executable code is basically a function call from the viewpoint of
+   the C language. The function calls must obey to the ABI (Application
+   Binary Interface) of the platform, which specify the purpose of machine
+   registers and stack handling among other things. The sljit_emit_enter
+   function emits the necessary instructions for setting up a new context
+   for the executable code and moves function arguments to the saved
+   registers. The number of arguments are specified in the "args"
+   parameter and the first argument goes to SLJIT_SAVED_REG1, the second
+   goes to SLJIT_SAVED_REG2 and so on. The number of scratch and
+   saved registers are passed in "scratches" and "saveds" arguments
+   respectively. Since the saved registers contains the arguments,
+   "args" must be less or equal than "saveds". The sljit_emit_enter
+   is also capable of allocating a stack space for local variables. The
+   "local_size" argument contains the size in bytes of this local area
+   and its staring address is stored in SLJIT_LOCALS_REG. However
+   the SLJIT_LOCALS_REG is not necessary the machine stack pointer.
+   The memory bytes between SLJIT_LOCALS_REG (inclusive) and
+   SLJIT_LOCALS_REG + local_size (exclusive) can be modified freely
+   until the function returns. The stack space is uninitialized.
+
+   Note: every call of sljit_emit_enter and sljit_set_context
+         overwrites the previous context. */
+
+#define SLJIT_MAX_LOCAL_SIZE   65536
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler,
+       sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size);
+
+/* The machine code has a context (which contains the local stack space size,
+   number of used registers, etc.) which initialized by sljit_emit_enter. Several
+   functions (like sljit_emit_return) requres this context to be able to generate
+   the appropriate code. However, some code fragments (like inline cache) may have
+   no normal entry point so their context is unknown for the compiler. Using the
+   function below we can specify their context.
+
+   Note: every call of sljit_emit_enter and sljit_set_context overwrites
+         the previous context. */
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler,
+       sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size);
+
+/* Return from machine code.  The op argument can be SLJIT_UNUSED which means the
+   function does not return with anything or any opcode between SLJIT_MOV and
+   SLJIT_MOV_P (see sljit_emit_op1). As for src and srcw they must be 0 if op
+   is SLJIT_UNUSED, otherwise see below the description about source and
+   destination arguments. */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si src, sljit_sw srcw);
+
+/* Fast calling mechanism for utility functions (see SLJIT_FAST_CALL). All registers and
+   even the stack frame is passed to the callee. The return address is preserved in
+   dst/dstw by sljit_emit_fast_enter (the type of the value stored by this function
+   is sljit_p), and sljit_emit_fast_return can use this as a return value later. */
+
+/* Note: only for sljit specific, non ABI compilant calls. Fast, since only a few machine
+   instructions are needed. Excellent for small uility functions, where saving registers
+   and setting up a new stack frame would cost too much performance. However, it is still
+   possible to return to the address of the caller (or anywhere else). */
+
+/* Note: flags are not changed (unlike sljit_emit_enter / sljit_emit_return). */
+
+/* Note: although sljit_emit_fast_return could be replaced by an ijump, it is not suggested,
+   since many architectures do clever branch prediction on call / return instruction pairs. */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw);
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw);
+
+/*
+   Source and destination values for arithmetical instructions
+    imm              - a simple immediate value (cannot be used as a destination)
+    reg              - any of the registers (immediate argument must be 0)
+    [imm]            - absolute immediate memory address
+    [reg+imm]        - indirect memory address
+    [reg+(reg<<imm)] - indirect indexed memory address (shift must be between 0 and 3)
+                       useful for (byte, half, int, sljit_sw) array access
+                       (fully supported by both x86 and ARM architectures, and cheap operation on others)
+*/
+
+/*
+   IMPORATNT NOTE: memory access MUST be naturally aligned except
+                   SLJIT_UNALIGNED macro is defined and its value is 1.
+
+     length | alignment
+   ---------+-----------
+     byte   | 1 byte (any physical_address is accepted)
+     half   | 2 byte (physical_address & 0x1 == 0)
+     int    | 4 byte (physical_address & 0x3 == 0)
+     word   | 4 byte if SLJIT_32BIT_ARCHITECTURE is defined and its value is 1
+            | 8 byte if SLJIT_64BIT_ARCHITECTURE is defined and its value is 1
+    pointer | size of sljit_p type (4 byte on 32 bit machines, 4 or 8 byte
+            | on 64 bit machines)
+
+   Note:   Different architectures have different addressing limitations.
+           A single instruction is enough for the following addressing
+           modes. Other adrressing modes are emulated by instruction
+           sequences. This information could help to improve those code
+           generators which focuses only a few architectures.
+
+   x86:    [reg+imm], -2^32+1 <= imm <= 2^32-1 (full address space on x86-32)
+           [reg+(reg<<imm)] is supported
+           [imm], -2^32+1 <= imm <= 2^32-1 is supported
+           Write-back is not supported
+   arm:    [reg+imm], -4095 <= imm <= 4095 or -255 <= imm <= 255 for signed
+                bytes, any halfs or floating point values)
+           [reg+(reg<<imm)] is supported
+           Write-back is supported
+   arm-t2: [reg+imm], -255 <= imm <= 4095
+           [reg+(reg<<imm)] is supported
+           Write back is supported only for [reg+imm], where -255 <= imm <= 255
+   ppc:    [reg+imm], -65536 <= imm <= 65535. 64 bit loads/stores and 32 bit
+                signed load on 64 bit requires immediates divisible by 4.
+                [reg+imm] is not supported for signed 8 bit values.
+           [reg+reg] is supported
+           Write-back is supported except for one instruction: 32 bit signed
+                load with [reg+imm] addressing mode on 64 bit.
+   mips:   [reg+imm], -65536 <= imm <= 65535
+   sparc:  [reg+imm], -4096 <= imm <= 4095
+           [reg+reg] is supported
+*/
+
+/* Register output: simply the name of the register.
+   For destination, you can use SLJIT_UNUSED as well. */
+#define SLJIT_MEM              0x80
+#define SLJIT_MEM0()           (SLJIT_MEM)
+#define SLJIT_MEM1(r1)         (SLJIT_MEM | (r1))
+#define SLJIT_MEM2(r1, r2)     (SLJIT_MEM | (r1) | ((r2) << 8))
+#define SLJIT_IMM              0x40
+
+/* Set 32 bit operation mode (I) on 64 bit CPUs. The flag is totally ignored on
+   32 bit CPUs. If this flag is set for an arithmetic operation, it uses only the
+   lower 32 bit of the input register(s), and set the CPU status flags according
+   to the 32 bit result. The higher 32 bits are undefined for both the input and
+   output. However, the CPU might not ignore those higher 32 bits, like MIPS, which
+   expects it to be the sign extension of the lower 32 bit. All 32 bit operations
+   are undefined, if this condition is not fulfilled. Therefore, when SLJIT_INT_OP
+   is specified, all register arguments must be the result of other operations with
+   the same SLJIT_INT_OP flag. In other words, although a register can hold either
+   a 64 or 32 bit value, these values cannot be mixed. The only exceptions are
+   SLJIT_IMOV and SLJIT_IMOVU (SLJIT_MOV_SI/SLJIT_MOVU_SI with SLJIT_INT_OP flag)
+   which can convert any source argument to SLJIT_INT_OP compatible result. This
+   conversion might be unnecessary on some CPUs like x86-64, since the upper 32
+   bit is always ignored. In this case SLJIT is clever enough to not generate any
+   instructions if the source and destination operands are the same registers.
+   Affects sljit_emit_op0, sljit_emit_op1 and sljit_emit_op2. */
+#define SLJIT_INT_OP           0x100
+
+/* Single precision mode (SP). This flag is similar to SLJIT_INT_OP, just
+   it applies to floating point registers (it is even the same bit). When
+   this flag is passed, the CPU performs single precision floating point
+   operations. Similar to SLJIT_INT_OP, all register arguments must be the
+   result of other floating point operations with this flag. Affects
+   sljit_emit_fop1, sljit_emit_fop2 and sljit_emit_fcmp. */
+#define SLJIT_SINGLE_OP                0x100
+
+/* Common CPU status flags for all architectures (x86, ARM, PPC)
+    - carry flag
+    - overflow flag
+    - zero flag
+    - negative/positive flag (depends on arc)
+   On mips, these flags are emulated by software. */
+
+/* By default, the instructions may, or may not set the CPU status flags.
+   Forcing to set or keep status flags can be done with the following flags: */
+
+/* Note: sljit tries to emit the minimum number of instructions. Using these
+   flags can increase them, so use them wisely to avoid unnecessary code generation. */
+
+/* Set Equal (Zero) status flag (E). */
+#define SLJIT_SET_E                    0x0200
+/* Set unsigned status flag (U). */
+#define SLJIT_SET_U                    0x0400
+/* Set signed status flag (S). */
+#define SLJIT_SET_S                    0x0800
+/* Set signed overflow flag (O). */
+#define SLJIT_SET_O                    0x1000
+/* Set carry flag (C).
+   Note: Kinda unsigned overflow, but behaves differently on various cpus. */
+#define SLJIT_SET_C                    0x2000
+/* Do not modify the flags (K).
+   Note: This flag cannot be combined with any other SLJIT_SET_* flag. */
+#define SLJIT_KEEP_FLAGS               0x4000
+
+/* Notes:
+     - you cannot postpone conditional jump instructions except if noted that
+       the instruction does not set flags (See: SLJIT_KEEP_FLAGS).
+     - flag combinations: '|' means 'logical or'. */
+
+/* Flags: - (never set any flags)
+   Note: breakpoint instruction is not supported by all architectures (namely ppc)
+         It falls back to SLJIT_NOP in those cases. */
+#define SLJIT_BREAKPOINT               0
+/* Flags: - (never set any flags)
+   Note: may or may not cause an extra cycle wait
+         it can even decrease the runtime in a few cases. */
+#define SLJIT_NOP                      1
+/* Flags: - (may destroy flags)
+   Unsigned multiplication of SLJIT_SCRATCH_REG1 and SLJIT_SCRATCH_REG2.
+   Result goes to SLJIT_SCRATCH_REG2:SLJIT_SCRATCH_REG1 (high:low) word */
+#define SLJIT_UMUL                     2
+/* Flags: - (may destroy flags)
+   Signed multiplication of SLJIT_SCRATCH_REG1 and SLJIT_SCRATCH_REG2.
+   Result goes to SLJIT_SCRATCH_REG2:SLJIT_SCRATCH_REG1 (high:low) word */
+#define SLJIT_SMUL                     3
+/* Flags: I - (may destroy flags)
+   Unsigned divide of the value in SLJIT_SCRATCH_REG1 by the value in SLJIT_SCRATCH_REG2.
+   The result is placed in SLJIT_SCRATCH_REG1 and the remainder goes to SLJIT_SCRATCH_REG2.
+   Note: if SLJIT_SCRATCH_REG2 contains 0, the behaviour is undefined. */
+#define SLJIT_UDIV                     4
+#define SLJIT_IUDIV                    (SLJIT_UDIV | SLJIT_INT_OP)
+/* Flags: I - (may destroy flags)
+   Signed divide of the value in SLJIT_SCRATCH_REG1 by the value in SLJIT_SCRATCH_REG2.
+   The result is placed in SLJIT_SCRATCH_REG1 and the remainder goes to SLJIT_SCRATCH_REG2.
+   Note: if SLJIT_SCRATCH_REG2 contains 0, the behaviour is undefined. */
+#define SLJIT_SDIV                     5
+#define SLJIT_ISDIV                    (SLJIT_SDIV | SLJIT_INT_OP)
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op);
+
+/* Notes for MOV instructions:
+   U = Mov with update (pre form). If source or destination defined as SLJIT_MEM1(r1)
+       or SLJIT_MEM2(r1, r2), r1 is increased by the sum of r2 and the constant argument
+   UB = unsigned byte (8 bit)
+   SB = signed byte (8 bit)
+   UH = unsigned half (16 bit)
+   SH = signed half (16 bit)
+   UI = unsigned int (32 bit)
+   SI = signed int (32 bit)
+   P  = pointer (sljit_p) size */
+
+/* Flags: - (never set any flags) */
+#define SLJIT_MOV                      6
+/* Flags: I - (never set any flags) */
+#define SLJIT_MOV_UB                   7
+#define SLJIT_IMOV_UB                  (SLJIT_MOV_UB | SLJIT_INT_OP)
+/* Flags: I - (never set any flags) */
+#define SLJIT_MOV_SB                   8
+#define SLJIT_IMOV_SB                  (SLJIT_MOV_SB | SLJIT_INT_OP)
+/* Flags: I - (never set any flags) */
+#define SLJIT_MOV_UH                   9
+#define SLJIT_IMOV_UH                  (SLJIT_MOV_UH | SLJIT_INT_OP)
+/* Flags: I - (never set any flags) */
+#define SLJIT_MOV_SH                   10
+#define SLJIT_IMOV_SH                  (SLJIT_MOV_SH | SLJIT_INT_OP)
+/* Flags: I - (never set any flags)
+   Note: see SLJIT_INT_OP for further details. */
+#define SLJIT_MOV_UI                   11
+/* No SLJIT_INT_OP form, since it is the same as SLJIT_IMOV. */
+/* Flags: I - (never set any flags)
+   Note: see SLJIT_INT_OP for further details. */
+#define SLJIT_MOV_SI                   12
+#define SLJIT_IMOV                     (SLJIT_MOV_SI | SLJIT_INT_OP)
+/* Flags: - (never set any flags) */
+#define SLJIT_MOV_P                    13
+/* Flags: - (never set any flags) */
+#define SLJIT_MOVU                     14
+/* Flags: I - (never set any flags) */
+#define SLJIT_MOVU_UB                  15
+#define SLJIT_IMOVU_UB                 (SLJIT_MOVU_UB | SLJIT_INT_OP)
+/* Flags: I - (never set any flags) */
+#define SLJIT_MOVU_SB                  16
+#define SLJIT_IMOVU_SB                 (SLJIT_MOVU_SB | SLJIT_INT_OP)
+/* Flags: I - (never set any flags) */
+#define SLJIT_MOVU_UH                  17
+#define SLJIT_IMOVU_UH                 (SLJIT_MOVU_UH | SLJIT_INT_OP)
+/* Flags: I - (never set any flags) */
+#define SLJIT_MOVU_SH                  18
+#define SLJIT_IMOVU_SH                 (SLJIT_MOVU_SH | SLJIT_INT_OP)
+/* Flags: I - (never set any flags)
+   Note: see SLJIT_INT_OP for further details. */
+#define SLJIT_MOVU_UI                  19
+/* No SLJIT_INT_OP form, since it is the same as SLJIT_IMOVU. */
+/* Flags: I - (never set any flags)
+   Note: see SLJIT_INT_OP for further details. */
+#define SLJIT_MOVU_SI                  20
+#define SLJIT_IMOVU                    (SLJIT_MOVU_SI | SLJIT_INT_OP)
+/* Flags: - (never set any flags) */
+#define SLJIT_MOVU_P                   21
+/* Flags: I | E | K */
+#define SLJIT_NOT                      22
+#define SLJIT_INOT                     (SLJIT_NOT | SLJIT_INT_OP)
+/* Flags: I | E | O | K */
+#define SLJIT_NEG                      23
+#define SLJIT_INEG                     (SLJIT_NEG | SLJIT_INT_OP)
+/* Count leading zeroes
+   Flags: I | E | K
+   Important note! Sparc 32 does not support K flag, since
+   the required popc instruction is introduced only in sparc 64. */
+#define SLJIT_CLZ                      24
+#define SLJIT_ICLZ                     (SLJIT_CLZ | SLJIT_INT_OP)
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw);
+
+/* Flags: I | E | O | C | K */
+#define SLJIT_ADD                      25
+#define SLJIT_IADD                     (SLJIT_ADD | SLJIT_INT_OP)
+/* Flags: I | C | K */
+#define SLJIT_ADDC                     26
+#define SLJIT_IADDC                    (SLJIT_ADDC | SLJIT_INT_OP)
+/* Flags: I | E | U | S | O | C | K */
+#define SLJIT_SUB                      27
+#define SLJIT_ISUB                     (SLJIT_SUB | SLJIT_INT_OP)
+/* Flags: I | C | K */
+#define SLJIT_SUBC                     28
+#define SLJIT_ISUBC                    (SLJIT_SUBC | SLJIT_INT_OP)
+/* Note: integer mul
+   Flags: I | O (see SLJIT_C_MUL_*) | K */
+#define SLJIT_MUL                      29
+#define SLJIT_IMUL                     (SLJIT_MUL | SLJIT_INT_OP)
+/* Flags: I | E | K */
+#define SLJIT_AND                      30
+#define SLJIT_IAND                     (SLJIT_AND | SLJIT_INT_OP)
+/* Flags: I | E | K */
+#define SLJIT_OR                       31
+#define SLJIT_IOR                      (SLJIT_OR | SLJIT_INT_OP)
+/* Flags: I | E | K */
+#define SLJIT_XOR                      32
+#define SLJIT_IXOR                     (SLJIT_XOR | SLJIT_INT_OP)
+/* Flags: I | E | K
+   Let bit_length be the length of the shift operation: 32 or 64.
+   If src2 is immediate, src2w is masked by (bit_length - 1).
+   Otherwise, if the content of src2 is outside the range from 0
+   to bit_length - 1, the operation is undefined. */
+#define SLJIT_SHL                      33
+#define SLJIT_ISHL                     (SLJIT_SHL | SLJIT_INT_OP)
+/* Flags: I | E | K
+   Let bit_length be the length of the shift operation: 32 or 64.
+   If src2 is immediate, src2w is masked by (bit_length - 1).
+   Otherwise, if the content of src2 is outside the range from 0
+   to bit_length - 1, the operation is undefined. */
+#define SLJIT_LSHR                     34
+#define SLJIT_ILSHR                    (SLJIT_LSHR | SLJIT_INT_OP)
+/* Flags: I | E | K
+   Let bit_length be the length of the shift operation: 32 or 64.
+   If src2 is immediate, src2w is masked by (bit_length - 1).
+   Otherwise, if the content of src2 is outside the range from 0
+   to bit_length - 1, the operation is undefined. */
+#define SLJIT_ASHR                     35
+#define SLJIT_IASHR                    (SLJIT_ASHR | SLJIT_INT_OP)
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w);
+
+/* The following function is a helper function for sljit_emit_op_custom.
+   It returns with the real machine register index of any SLJIT_SCRATCH
+   SLJIT_SAVED or SLJIT_LOCALS register.
+   Note: it returns with -1 for virtual registers (all EREGs on x86-32). */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg);
+
+/* The following function is a helper function for sljit_emit_op_custom.
+   It returns with the real machine register index of any SLJIT_FLOAT register.
+   Note: the index is divided by 2 on ARM 32 bit architectures. */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg);
+
+/* Any instruction can be inserted into the instruction stream by
+   sljit_emit_op_custom. It has a similar purpose as inline assembly.
+   The size parameter must match to the instruction size of the target
+   architecture:
+
+         x86: 0 < size <= 15. The instruction argument can be byte aligned.
+      Thumb2: if size == 2, the instruction argument must be 2 byte aligned.
+              if size == 4, the instruction argument must be 4 byte aligned.
+   Otherwise: size must be 4 and instruction argument must be 4 byte aligned. */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
+       void *instruction, sljit_si size);
+
+/* Returns with non-zero if fpu is available. */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void);
+
+/* Note: dst is the left and src is the right operand for SLJIT_FCMP.
+   Note: NaN check is always performed. If SLJIT_C_FLOAT_UNORDERED is set,
+         the comparison result is unpredictable.
+   Flags: SP | E | S (see SLJIT_C_FLOAT_*) */
+#define SLJIT_CMPD                     36
+#define SLJIT_CMPS                     (SLJIT_CMPD | SLJIT_SINGLE_OP)
+/* Flags: SP - (never set any flags) */
+#define SLJIT_MOVD                     37
+#define SLJIT_MOVS                     (SLJIT_MOVD | SLJIT_SINGLE_OP)
+/* Flags: SP - (never set any flags) */
+#define SLJIT_NEGD                     38
+#define SLJIT_NEGS                     (SLJIT_NEGD | SLJIT_SINGLE_OP)
+/* Flags: SP - (never set any flags) */
+#define SLJIT_ABSD                     39
+#define SLJIT_ABSS                     (SLJIT_ABSD | SLJIT_SINGLE_OP)
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw);
+
+/* Flags: SP - (never set any flags) */
+#define SLJIT_ADDD                     40
+#define SLJIT_ADDS                     (SLJIT_ADDD | SLJIT_SINGLE_OP)
+/* Flags: SP - (never set any flags) */
+#define SLJIT_SUBD                     41
+#define SLJIT_SUBS                     (SLJIT_SUBD | SLJIT_SINGLE_OP)
+/* Flags: SP - (never set any flags) */
+#define SLJIT_MULD                     42
+#define SLJIT_MULS                     (SLJIT_MULD | SLJIT_SINGLE_OP)
+/* Flags: SP - (never set any flags) */
+#define SLJIT_DIVD                     43
+#define SLJIT_DIVS                     (SLJIT_DIVD | SLJIT_SINGLE_OP)
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w);
+
+/* Label and jump instructions. */
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler);
+
+/* Invert conditional instruction: xor (^) with 0x1 */
+#define SLJIT_C_EQUAL                  0
+#define SLJIT_C_ZERO                   0
+#define SLJIT_C_NOT_EQUAL              1
+#define SLJIT_C_NOT_ZERO               1
+
+#define SLJIT_C_LESS                   2
+#define SLJIT_C_GREATER_EQUAL          3
+#define SLJIT_C_GREATER                        4
+#define SLJIT_C_LESS_EQUAL             5
+#define SLJIT_C_SIG_LESS               6
+#define SLJIT_C_SIG_GREATER_EQUAL      7
+#define SLJIT_C_SIG_GREATER            8
+#define SLJIT_C_SIG_LESS_EQUAL         9
+
+#define SLJIT_C_OVERFLOW               10
+#define SLJIT_C_NOT_OVERFLOW           11
+
+#define SLJIT_C_MUL_OVERFLOW           12
+#define SLJIT_C_MUL_NOT_OVERFLOW       13
+
+#define SLJIT_C_FLOAT_EQUAL            14
+#define SLJIT_C_FLOAT_NOT_EQUAL                15
+#define SLJIT_C_FLOAT_LESS             16
+#define SLJIT_C_FLOAT_GREATER_EQUAL    17
+#define SLJIT_C_FLOAT_GREATER          18
+#define SLJIT_C_FLOAT_LESS_EQUAL       19
+#define SLJIT_C_FLOAT_UNORDERED                20
+#define SLJIT_C_FLOAT_ORDERED          21
+
+#define SLJIT_JUMP                     22
+#define SLJIT_FAST_CALL                        23
+#define SLJIT_CALL0                    24
+#define SLJIT_CALL1                    25
+#define SLJIT_CALL2                    26
+#define SLJIT_CALL3                    27
+
+/* Fast calling method. See sljit_emit_fast_enter / sljit_emit_fast_return. */
+
+/* The target can be changed during runtime (see: sljit_set_jump_addr). */
+#define SLJIT_REWRITABLE_JUMP          0x1000
+
+/* Emit a jump instruction. The destination is not set, only the type of the jump.
+    type must be between SLJIT_C_EQUAL and SLJIT_CALL3
+    type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP
+   Flags: - (never set any flags) for both conditional and unconditional jumps.
+   Flags: destroy all flags for calls. */
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type);
+
+/* Basic arithmetic comparison. In most architectures it is implemented as
+   an SLJIT_SUB operation (with SLJIT_UNUSED destination and setting
+   appropriate flags) followed by a sljit_emit_jump. However some
+   architectures (i.e: MIPS) may employ special optimizations here. It is
+   suggested to use this comparison form when appropriate.
+    type must be between SLJIT_C_EQUAL and SLJIT_C_SIG_LESS_EQUAL
+    type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP or SLJIT_INT_OP
+   Flags: destroy flags. */
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_si type,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w);
+
+/* Basic floating point comparison. In most architectures it is implemented as
+   an SLJIT_FCMP operation (setting appropriate flags) followed by a
+   sljit_emit_jump. However some architectures (i.e: MIPS) may employ
+   special optimizations here. It is suggested to use this comparison form
+   when appropriate.
+    type must be between SLJIT_C_FLOAT_EQUAL and SLJIT_C_FLOAT_ORDERED
+    type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP and SLJIT_SINGLE_OP
+   Flags: destroy flags.
+   Note: if either operand is NaN, the behaviour is undefined for
+         type <= SLJIT_C_FLOAT_LESS_EQUAL. */
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_si type,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w);
+
+/* Set the destination of the jump to this label. */
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_label(struct sljit_jump *jump, struct sljit_label* label);
+/* Set the destination address of the jump to this label. */
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw target);
+
+/* Call function or jump anywhere. Both direct and indirect form
+    type must be between SLJIT_JUMP and SLJIT_CALL3
+    Direct form: set src to SLJIT_IMM() and srcw to the address
+    Indirect form: any other valid addressing mode
+   Flags: - (never set any flags) for unconditional jumps.
+   Flags: destroy all flags for calls. */
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw);
+
+/* Perform the operation using the conditional flags as the second argument.
+   Type must always be between SLJIT_C_EQUAL and SLJIT_C_FLOAT_ORDERED. The
+   value represented by the type is 1, if the condition represented by the type
+   is fulfilled, and 0 otherwise.
+
+   If op == SLJIT_MOV, SLJIT_MOV_SI, SLJIT_MOV_UI:
+     Set dst to the value represented by the type (0 or 1).
+     Src must be SLJIT_UNUSED, and srcw must be 0
+     Flags: - (never set any flags)
+   If op == SLJIT_OR, op == SLJIT_AND, op == SLJIT_XOR
+     Performs the binary operation using src as the first, and the value
+     represented by type as the second argument.
+     Important note: only dst=src and dstw=srcw is supported at the moment!
+     Flags: I | E | K
+   Note: sljit_emit_op_flags does nothing, if dst is SLJIT_UNUSED (regardless of op). */
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw,
+       sljit_si type);
+
+/* Copies the base address of SLJIT_LOCALS_REG+offset to dst.
+   Flags: - (never set any flags) */
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset);
+
+/* The constant can be changed runtime (see: sljit_set_const)
+   Flags: - (never set any flags) */
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value);
+
+/* After the code generation the address for label, jump and const instructions
+   are computed. Since these structures are freed by sljit_free_compiler, the
+   addresses must be preserved by the user program elsewere. */
+static SLJIT_INLINE sljit_uw sljit_get_label_addr(struct sljit_label *label) { return label->addr; }
+static SLJIT_INLINE sljit_uw sljit_get_jump_addr(struct sljit_jump *jump) { return jump->addr; }
+static SLJIT_INLINE sljit_uw sljit_get_const_addr(struct sljit_const *const_) { return const_->addr; }
+
+/* Only the address is required to rewrite the code. */
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr);
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant);
+
+/* --------------------------------------------------------------------- */
+/*  Miscellaneous utility functions                                      */
+/* --------------------------------------------------------------------- */
+
+#define SLJIT_MAJOR_VERSION    0
+#define SLJIT_MINOR_VERSION    91
+
+/* Get the human readable name of the platform. Can be useful on platforms
+   like ARM, where ARM and Thumb2 functions can be mixed, and
+   it is useful to know the type of the code generator. */
+SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void);
+
+/* Portable helper function to get an offset of a member. */
+#define SLJIT_OFFSETOF(base, member) ((sljit_sw)(&((base*)0x10)->member) - 0x10)
+
+#if (defined SLJIT_UTIL_GLOBAL_LOCK && SLJIT_UTIL_GLOBAL_LOCK)
+/* This global lock is useful to compile common functions. */
+SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_grab_lock(void);
+SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_release_lock(void);
+#endif
+
+#if (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK)
+
+/* The sljit_stack is a utiliy feature of sljit, which allocates a
+   writable memory region between base (inclusive) and limit (exclusive).
+   Both base and limit is a pointer, and base is always <= than limit.
+   This feature uses the "address space reserve" feature
+   of modern operating systems. Basically we don't need to allocate a
+   huge memory block in one step for the worst case, we can start with
+   a smaller chunk and extend it later. Since the address space is
+   reserved, the data never copied to other regions, thus it is safe
+   to store pointers here. */
+
+/* Note: The base field is aligned to PAGE_SIZE bytes (usually 4k or more).
+   Note: stack growing should not happen in small steps: 4k, 16k or even
+     bigger growth is better.
+   Note: this structure may not be supported by all operating systems.
+     Some kind of fallback mechanism is suggested when SLJIT_UTIL_STACK
+     is not defined. */
+
+struct sljit_stack {
+       /* User data, anything can be stored here.
+          Starting with the same value as base. */
+       sljit_uw top;
+       /* These members are read only. */
+       sljit_uw base;
+       sljit_uw limit;
+       sljit_uw max_limit;
+};
+
+/* Returns NULL if unsuccessful.
+   Note: limit and max_limit contains the size for stack allocation
+   Note: the top field is initialized to base. */
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_CALL sljit_allocate_stack(sljit_uw limit, sljit_uw max_limit);
+SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_free_stack(struct sljit_stack* stack);
+
+/* Can be used to increase (allocate) or decrease (free) the memory area.
+   Returns with a non-zero value if unsuccessful. If new_limit is greater than
+   max_limit, it will fail. It is very easy to implement a stack data structure,
+   since the growth ratio can be added to the current limit, and sljit_stack_resize
+   will do all the necessary checks. The fields of the stack are not changed if
+   sljit_stack_resize fails. */
+SLJIT_API_FUNC_ATTRIBUTE sljit_sw SLJIT_CALL sljit_stack_resize(struct sljit_stack* stack, sljit_uw new_limit);
+
+#endif /* (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK) */
+
+#if !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
+
+/* Get the entry address of a given function. */
+#define SLJIT_FUNC_OFFSET(func_name)   ((sljit_sw)func_name)
+
+#else /* !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) */
+
+/* All JIT related code should be placed in the same context (library, binary, etc.). */
+
+#define SLJIT_FUNC_OFFSET(func_name)   (*(sljit_sw*)(void*)func_name)
+
+/* For powerpc64, the function pointers point to a context descriptor. */
+struct sljit_function_context {
+       sljit_sw addr;
+       sljit_sw r2;
+       sljit_sw r11;
+};
+
+/* Fill the context arguments using the addr and the function.
+   If func_ptr is NULL, it will not be set to the address of context
+   If addr is NULL, the function address also comes from the func pointer. */
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_sw addr, void* func);
+
+#endif /* !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) */
+
+#endif /* _SLJIT_LIR_H_ */
diff --git a/ext/pcre/pcrelib/sljit/sljitNativeARM_32.c b/ext/pcre/pcrelib/sljit/sljitNativeARM_32.c
new file mode 100644 (file)
index 0000000..6747c4f
--- /dev/null
@@ -0,0 +1,2524 @@
+/*
+ *    Stack-less Just-In-Time compiler
+ *
+ *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice, this list of
+ *      conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *      of conditions and the following disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
+{
+#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
+       return "ARMv7" SLJIT_CPUINFO;
+#elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+       return "ARMv5" SLJIT_CPUINFO;
+#else
+#error "Internal error: Unknown ARM architecture"
+#endif
+}
+
+/* Last register + 1. */
+#define TMP_REG1       (SLJIT_NO_REGISTERS + 1)
+#define TMP_REG2       (SLJIT_NO_REGISTERS + 2)
+#define TMP_REG3       (SLJIT_NO_REGISTERS + 3)
+#define TMP_PC         (SLJIT_NO_REGISTERS + 4)
+
+#define TMP_FREG1      (0)
+#define TMP_FREG2      (SLJIT_FLOAT_REG6 + 1)
+
+/* In ARM instruction words.
+   Cache lines are usually 32 byte aligned. */
+#define CONST_POOL_ALIGNMENT   8
+#define CONST_POOL_EMPTY       0xffffffff
+
+#define ALIGN_INSTRUCTION(ptr) \
+       (sljit_uw*)(((sljit_uw)(ptr) + (CONST_POOL_ALIGNMENT * sizeof(sljit_uw)) - 1) & ~((CONST_POOL_ALIGNMENT * sizeof(sljit_uw)) - 1))
+#define MAX_DIFFERENCE(max_diff) \
+       (((max_diff) / (sljit_si)sizeof(sljit_uw)) - (CONST_POOL_ALIGNMENT - 1))
+
+/* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */
+static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = {
+       0, 0, 1, 2, 10, 11, 4, 5, 6, 7, 8, 13, 3, 12, 14, 15
+};
+
+#define RM(rm) (reg_map[rm])
+#define RD(rd) (reg_map[rd] << 12)
+#define RN(rn) (reg_map[rn] << 16)
+
+/* --------------------------------------------------------------------- */
+/*  Instrucion forms                                                     */
+/* --------------------------------------------------------------------- */
+
+/* The instruction includes the AL condition.
+   INST_NAME - CONDITIONAL remove this flag. */
+#define COND_MASK      0xf0000000
+#define CONDITIONAL    0xe0000000
+#define PUSH_POOL      0xff000000
+
+/* DP - Data Processing instruction (use with EMIT_DATA_PROCESS_INS). */
+#define ADC_DP         0x5
+#define ADD_DP         0x4
+#define AND_DP         0x0
+#define B              0xea000000
+#define BIC_DP         0xe
+#define BL             0xeb000000
+#define BLX            0xe12fff30
+#define BX             0xe12fff10
+#define CLZ            0xe16f0f10
+#define CMP_DP         0xa
+#define BKPT           0xe1200070
+#define EOR_DP         0x1
+#define MOV_DP         0xd
+#define MUL            0xe0000090
+#define MVN_DP         0xf
+#define NOP            0xe1a00000
+#define ORR_DP         0xc
+#define PUSH           0xe92d0000
+#define POP            0xe8bd0000
+#define RSB_DP         0x3
+#define RSC_DP         0x7
+#define SBC_DP         0x6
+#define SMULL          0xe0c00090
+#define SUB_DP         0x2
+#define UMULL          0xe0800090
+#define VABS_F32       0xeeb00ac0
+#define VADD_F32       0xee300a00
+#define VCMP_F32       0xeeb40a40
+#define VDIV_F32       0xee800a00
+#define VMOV_F32       0xeeb00a40
+#define VMRS           0xeef1fa10
+#define VMUL_F32       0xee200a00
+#define VNEG_F32       0xeeb10a40
+#define VSTR_F32       0xed000a00
+#define VSUB_F32       0xee300a40
+
+#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
+/* Arm v7 specific instructions. */
+#define MOVW           0xe3000000
+#define MOVT           0xe3400000
+#define SXTB           0xe6af0070
+#define SXTH           0xe6bf0070
+#define UXTB           0xe6ef0070
+#define UXTH           0xe6ff0070
+#endif
+
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+
+static sljit_si push_cpool(struct sljit_compiler *compiler)
+{
+       /* Pushing the constant pool into the instruction stream. */
+       sljit_uw* inst;
+       sljit_uw* cpool_ptr;
+       sljit_uw* cpool_end;
+       sljit_si i;
+
+       /* The label could point the address after the constant pool. */
+       if (compiler->last_label && compiler->last_label->size == compiler->size)
+               compiler->last_label->size += compiler->cpool_fill + (CONST_POOL_ALIGNMENT - 1) + 1;
+
+       SLJIT_ASSERT(compiler->cpool_fill > 0 && compiler->cpool_fill <= CPOOL_SIZE);
+       inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
+       FAIL_IF(!inst);
+       compiler->size++;
+       *inst = 0xff000000 | compiler->cpool_fill;
+
+       for (i = 0; i < CONST_POOL_ALIGNMENT - 1; i++) {
+               inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
+               FAIL_IF(!inst);
+               compiler->size++;
+               *inst = 0;
+       }
+
+       cpool_ptr = compiler->cpool;
+       cpool_end = cpool_ptr + compiler->cpool_fill;
+       while (cpool_ptr < cpool_end) {
+               inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
+               FAIL_IF(!inst);
+               compiler->size++;
+               *inst = *cpool_ptr++;
+       }
+       compiler->cpool_diff = CONST_POOL_EMPTY;
+       compiler->cpool_fill = 0;
+       return SLJIT_SUCCESS;
+}
+
+static sljit_si push_inst(struct sljit_compiler *compiler, sljit_uw inst)
+{
+       sljit_uw* ptr;
+
+       if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)))
+               FAIL_IF(push_cpool(compiler));
+
+       ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
+       FAIL_IF(!ptr);
+       compiler->size++;
+       *ptr = inst;
+       return SLJIT_SUCCESS;
+}
+
+static sljit_si push_inst_with_literal(struct sljit_compiler *compiler, sljit_uw inst, sljit_uw literal)
+{
+       sljit_uw* ptr;
+       sljit_uw cpool_index = CPOOL_SIZE;
+       sljit_uw* cpool_ptr;
+       sljit_uw* cpool_end;
+       sljit_ub* cpool_unique_ptr;
+
+       if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)))
+               FAIL_IF(push_cpool(compiler));
+       else if (compiler->cpool_fill > 0) {
+               cpool_ptr = compiler->cpool;
+               cpool_end = cpool_ptr + compiler->cpool_fill;
+               cpool_unique_ptr = compiler->cpool_unique;
+               do {
+                       if ((*cpool_ptr == literal) && !(*cpool_unique_ptr)) {
+                               cpool_index = cpool_ptr - compiler->cpool;
+                               break;
+                       }
+                       cpool_ptr++;
+                       cpool_unique_ptr++;
+               } while (cpool_ptr < cpool_end);
+       }
+
+       if (cpool_index == CPOOL_SIZE) {
+               /* Must allocate a new entry in the literal pool. */
+               if (compiler->cpool_fill < CPOOL_SIZE) {
+                       cpool_index = compiler->cpool_fill;
+                       compiler->cpool_fill++;
+               }
+               else {
+                       FAIL_IF(push_cpool(compiler));
+                       cpool_index = 0;
+                       compiler->cpool_fill = 1;
+               }
+       }
+
+       SLJIT_ASSERT((inst & 0xfff) == 0);
+       ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
+       FAIL_IF(!ptr);
+       compiler->size++;
+       *ptr = inst | cpool_index;
+
+       compiler->cpool[cpool_index] = literal;
+       compiler->cpool_unique[cpool_index] = 0;
+       if (compiler->cpool_diff == CONST_POOL_EMPTY)
+               compiler->cpool_diff = compiler->size;
+       return SLJIT_SUCCESS;
+}
+
+static sljit_si push_inst_with_unique_literal(struct sljit_compiler *compiler, sljit_uw inst, sljit_uw literal)
+{
+       sljit_uw* ptr;
+       if (SLJIT_UNLIKELY((compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)) || compiler->cpool_fill >= CPOOL_SIZE))
+               FAIL_IF(push_cpool(compiler));
+
+       SLJIT_ASSERT(compiler->cpool_fill < CPOOL_SIZE && (inst & 0xfff) == 0);
+       ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
+       FAIL_IF(!ptr);
+       compiler->size++;
+       *ptr = inst | compiler->cpool_fill;
+
+       compiler->cpool[compiler->cpool_fill] = literal;
+       compiler->cpool_unique[compiler->cpool_fill] = 1;
+       compiler->cpool_fill++;
+       if (compiler->cpool_diff == CONST_POOL_EMPTY)
+               compiler->cpool_diff = compiler->size;
+       return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_si prepare_blx(struct sljit_compiler *compiler)
+{
+       /* Place for at least two instruction (doesn't matter whether the first has a literal). */
+       if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4088)))
+               return push_cpool(compiler);
+       return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_si emit_blx(struct sljit_compiler *compiler)
+{
+       /* Must follow tightly the previous instruction (to be able to convert it to bl instruction). */
+       SLJIT_ASSERT(compiler->cpool_diff == CONST_POOL_EMPTY || compiler->size - compiler->cpool_diff < MAX_DIFFERENCE(4092));
+       return push_inst(compiler, BLX | RM(TMP_REG1));
+}
+
+static sljit_uw patch_pc_relative_loads(sljit_uw *last_pc_patch, sljit_uw *code_ptr, sljit_uw* const_pool, sljit_uw cpool_size)
+{
+       sljit_uw diff;
+       sljit_uw ind;
+       sljit_uw counter = 0;
+       sljit_uw* clear_const_pool = const_pool;
+       sljit_uw* clear_const_pool_end = const_pool + cpool_size;
+
+       SLJIT_ASSERT(const_pool - code_ptr <= CONST_POOL_ALIGNMENT);
+       /* Set unused flag for all literals in the constant pool.
+          I.e.: unused literals can belong to branches, which can be encoded as B or BL.
+          We can "compress" the constant pool by discarding these literals. */
+       while (clear_const_pool < clear_const_pool_end)
+               *clear_const_pool++ = (sljit_uw)(-1);
+
+       while (last_pc_patch < code_ptr) {
+               /* Data transfer instruction with Rn == r15. */
+               if ((*last_pc_patch & 0x0c0f0000) == 0x040f0000) {
+                       diff = const_pool - last_pc_patch;
+                       ind = (*last_pc_patch) & 0xfff;
+
+                       /* Must be a load instruction with immediate offset. */
+                       SLJIT_ASSERT(ind < cpool_size && !(*last_pc_patch & (1 << 25)) && (*last_pc_patch & (1 << 20)));
+                       if ((sljit_si)const_pool[ind] < 0) {
+                               const_pool[ind] = counter;
+                               ind = counter;
+                               counter++;
+                       }
+                       else
+                               ind = const_pool[ind];
+
+                       SLJIT_ASSERT(diff >= 1);
+                       if (diff >= 2 || ind > 0) {
+                               diff = (diff + ind - 2) << 2;
+                               SLJIT_ASSERT(diff <= 0xfff);
+                               *last_pc_patch = (*last_pc_patch & ~0xfff) | diff;
+                       }
+                       else
+                               *last_pc_patch = (*last_pc_patch & ~(0xfff | (1 << 23))) | 0x004;
+               }
+               last_pc_patch++;
+       }
+       return counter;
+}
+
+/* In some rare ocasions we may need future patches. The probability is close to 0 in practice. */
+struct future_patch {
+       struct future_patch* next;
+       sljit_si index;
+       sljit_si value;
+};
+
+static SLJIT_INLINE sljit_si resolve_const_pool_index(struct future_patch **first_patch, sljit_uw cpool_current_index, sljit_uw *cpool_start_address, sljit_uw *buf_ptr)
+{
+       sljit_si value;
+       struct future_patch *curr_patch, *prev_patch;
+
+       /* Using the values generated by patch_pc_relative_loads. */
+       if (!*first_patch)
+               value = (sljit_si)cpool_start_address[cpool_current_index];
+       else {
+               curr_patch = *first_patch;
+               prev_patch = 0;
+               while (1) {
+                       if (!curr_patch) {
+                               value = (sljit_si)cpool_start_address[cpool_current_index];
+                               break;
+                       }
+                       if ((sljit_uw)curr_patch->index == cpool_current_index) {
+                               value = curr_patch->value;
+                               if (prev_patch)
+                                       prev_patch->next = curr_patch->next;
+                               else
+                                       *first_patch = curr_patch->next;
+                               SLJIT_FREE(curr_patch);
+                               break;
+                       }
+                       prev_patch = curr_patch;
+                       curr_patch = curr_patch->next;
+               }
+       }
+
+       if (value >= 0) {
+               if ((sljit_uw)value > cpool_current_index) {
+                       curr_patch = (struct future_patch*)SLJIT_MALLOC(sizeof(struct future_patch));
+                       if (!curr_patch) {
+                               while (*first_patch) {
+                                       curr_patch = *first_patch;
+                                       *first_patch = (*first_patch)->next;
+                                       SLJIT_FREE(curr_patch);
+                               }
+                               return SLJIT_ERR_ALLOC_FAILED;
+                       }
+                       curr_patch->next = *first_patch;
+                       curr_patch->index = value;
+                       curr_patch->value = cpool_start_address[value];
+                       *first_patch = curr_patch;
+               }
+               cpool_start_address[value] = *buf_ptr;
+       }
+       return SLJIT_SUCCESS;
+}
+
+#else
+
+static sljit_si push_inst(struct sljit_compiler *compiler, sljit_uw inst)
+{
+       sljit_uw* ptr;
+
+       ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
+       FAIL_IF(!ptr);
+       compiler->size++;
+       *ptr = inst;
+       return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_si emit_imm(struct sljit_compiler *compiler, sljit_si reg, sljit_sw imm)
+{
+       FAIL_IF(push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff)));
+       return push_inst(compiler, MOVT | RD(reg) | ((imm >> 12) & 0xf0000) | ((imm >> 16) & 0xfff));
+}
+
+#endif
+
+static SLJIT_INLINE sljit_si detect_jump_type(struct sljit_jump *jump, sljit_uw *code_ptr, sljit_uw *code)
+{
+       sljit_sw diff;
+
+       if (jump->flags & SLJIT_REWRITABLE_JUMP)
+               return 0;
+
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+       if (jump->flags & IS_BL)
+               code_ptr--;
+
+       if (jump->flags & JUMP_ADDR)
+               diff = ((sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2));
+       else {
+               SLJIT_ASSERT(jump->flags & JUMP_LABEL);
+               diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)(code_ptr + 2));
+       }
+
+       /* Branch to Thumb code has not been optimized yet. */
+       if (diff & 0x3)
+               return 0;
+
+       if (jump->flags & IS_BL) {
+               if (diff <= 0x01ffffff && diff >= -0x02000000) {
+                       *code_ptr = (BL - CONDITIONAL) | (*(code_ptr + 1) & COND_MASK);
+                       jump->flags |= PATCH_B;
+                       return 1;
+               }
+       }
+       else {
+               if (diff <= 0x01ffffff && diff >= -0x02000000) {
+                       *code_ptr = (B - CONDITIONAL) | (*code_ptr & COND_MASK);
+                       jump->flags |= PATCH_B;
+               }
+       }
+#else
+       if (jump->flags & JUMP_ADDR)
+               diff = ((sljit_sw)jump->u.target - (sljit_sw)code_ptr);
+       else {
+               SLJIT_ASSERT(jump->flags & JUMP_LABEL);
+               diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)code_ptr);
+       }
+
+       /* Branch to Thumb code has not been optimized yet. */
+       if (diff & 0x3)
+               return 0;
+
+       if (diff <= 0x01ffffff && diff >= -0x02000000) {
+               code_ptr -= 2;
+               *code_ptr = ((jump->flags & IS_BL) ? (BL - CONDITIONAL) : (B - CONDITIONAL)) | (code_ptr[2] & COND_MASK);
+               jump->flags |= PATCH_B;
+               return 1;
+       }
+#endif
+       return 0;
+}
+
+static SLJIT_INLINE void inline_set_jump_addr(sljit_uw addr, sljit_uw new_addr, sljit_si flush)
+{
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+       sljit_uw *ptr = (sljit_uw*)addr;
+       sljit_uw *inst = (sljit_uw*)ptr[0];
+       sljit_uw mov_pc = ptr[1];
+       sljit_si bl = (mov_pc & 0x0000f000) != RD(TMP_PC);
+       sljit_sw diff = (sljit_sw)(((sljit_sw)new_addr - (sljit_sw)(inst + 2)) >> 2);
+
+       if (diff <= 0x7fffff && diff >= -0x800000) {
+               /* Turn to branch. */
+               if (!bl) {
+                       inst[0] = (mov_pc & COND_MASK) | (B - CONDITIONAL) | (diff & 0xffffff);
+                       if (flush) {
+                               SLJIT_CACHE_FLUSH(inst, inst + 1);
+                       }
+               } else {
+                       inst[0] = (mov_pc & COND_MASK) | (BL - CONDITIONAL) | (diff & 0xffffff);
+                       inst[1] = NOP;
+                       if (flush) {
+                               SLJIT_CACHE_FLUSH(inst, inst + 2);
+                       }
+               }
+       } else {
+               /* Get the position of the constant. */
+               if (mov_pc & (1 << 23))
+                       ptr = inst + ((mov_pc & 0xfff) >> 2) + 2;
+               else
+                       ptr = inst + 1;
+
+               if (*inst != mov_pc) {
+                       inst[0] = mov_pc;
+                       if (!bl) {
+                               if (flush) {
+                                       SLJIT_CACHE_FLUSH(inst, inst + 1);
+                               }
+                       } else {
+                               inst[1] = BLX | RM(TMP_REG1);
+                               if (flush) {
+                                       SLJIT_CACHE_FLUSH(inst, inst + 2);
+                               }
+                       }
+               }
+               *ptr = new_addr;
+       }
+#else
+       sljit_uw *inst = (sljit_uw*)addr;
+       SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT);
+       inst[0] = MOVW | (inst[0] & 0xf000) | ((new_addr << 4) & 0xf0000) | (new_addr & 0xfff);
+       inst[1] = MOVT | (inst[1] & 0xf000) | ((new_addr >> 12) & 0xf0000) | ((new_addr >> 16) & 0xfff);
+       if (flush) {
+               SLJIT_CACHE_FLUSH(inst, inst + 2);
+       }
+#endif
+}
+
+static sljit_uw get_imm(sljit_uw imm);
+
+static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw new_constant, sljit_si flush)
+{
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+       sljit_uw *ptr = (sljit_uw*)addr;
+       sljit_uw *inst = (sljit_uw*)ptr[0];
+       sljit_uw ldr_literal = ptr[1];
+       sljit_uw src2;
+
+       src2 = get_imm(new_constant);
+       if (src2) {
+               *inst = 0xe3a00000 | (ldr_literal & 0xf000) | src2;
+               if (flush) {
+                       SLJIT_CACHE_FLUSH(inst, inst + 1);
+               }
+               return;
+       }
+
+       src2 = get_imm(~new_constant);
+       if (src2) {
+               *inst = 0xe3e00000 | (ldr_literal & 0xf000) | src2;
+               if (flush) {
+                       SLJIT_CACHE_FLUSH(inst, inst + 1);
+               }
+               return;
+       }
+
+       if (ldr_literal & (1 << 23))
+               ptr = inst + ((ldr_literal & 0xfff) >> 2) + 2;
+       else
+               ptr = inst + 1;
+
+       if (*inst != ldr_literal) {
+               *inst = ldr_literal;
+               if (flush) {
+                       SLJIT_CACHE_FLUSH(inst, inst + 1);
+               }
+       }
+       *ptr = new_constant;
+#else
+       sljit_uw *inst = (sljit_uw*)addr;
+       SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT);
+       inst[0] = MOVW | (inst[0] & 0xf000) | ((new_constant << 4) & 0xf0000) | (new_constant & 0xfff);
+       inst[1] = MOVT | (inst[1] & 0xf000) | ((new_constant >> 12) & 0xf0000) | ((new_constant >> 16) & 0xfff);
+       if (flush) {
+               SLJIT_CACHE_FLUSH(inst, inst + 2);
+       }
+#endif
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
+{
+       struct sljit_memory_fragment *buf;
+       sljit_uw *code;
+       sljit_uw *code_ptr;
+       sljit_uw *buf_ptr;
+       sljit_uw *buf_end;
+       sljit_uw size;
+       sljit_uw word_count;
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+       sljit_uw cpool_size;
+       sljit_uw cpool_skip_alignment;
+       sljit_uw cpool_current_index;
+       sljit_uw *cpool_start_address;
+       sljit_uw *last_pc_patch;
+       struct future_patch *first_patch;
+#endif
+
+       struct sljit_label *label;
+       struct sljit_jump *jump;
+       struct sljit_const *const_;
+
+       CHECK_ERROR_PTR();
+       check_sljit_generate_code(compiler);
+       reverse_buf(compiler);
+
+       /* Second code generation pass. */
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+       size = compiler->size + (compiler->patches << 1);
+       if (compiler->cpool_fill > 0)
+               size += compiler->cpool_fill + CONST_POOL_ALIGNMENT - 1;
+#else
+       size = compiler->size;
+#endif
+       code = (sljit_uw*)SLJIT_MALLOC_EXEC(size * sizeof(sljit_uw));
+       PTR_FAIL_WITH_EXEC_IF(code);
+       buf = compiler->buf;
+
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+       cpool_size = 0;
+       cpool_skip_alignment = 0;
+       cpool_current_index = 0;
+       cpool_start_address = NULL;
+       first_patch = NULL;
+       last_pc_patch = code;
+#endif
+
+       code_ptr = code;
+       word_count = 0;
+
+       label = compiler->labels;
+       jump = compiler->jumps;
+       const_ = compiler->consts;
+
+       if (label && label->size == 0) {
+               label->addr = (sljit_uw)code;
+               label->size = 0;
+               label = label->next;
+       }
+
+       do {
+               buf_ptr = (sljit_uw*)buf->memory;
+               buf_end = buf_ptr + (buf->used_size >> 2);
+               do {
+                       word_count++;
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+                       if (cpool_size > 0) {
+                               if (cpool_skip_alignment > 0) {
+                                       buf_ptr++;
+                                       cpool_skip_alignment--;
+                               }
+                               else {
+                                       if (SLJIT_UNLIKELY(resolve_const_pool_index(&first_patch, cpool_current_index, cpool_start_address, buf_ptr))) {
+                                               SLJIT_FREE_EXEC(code);
+                                               compiler->error = SLJIT_ERR_ALLOC_FAILED;
+                                               return NULL;
+                                       }
+                                       buf_ptr++;
+                                       if (++cpool_current_index >= cpool_size) {
+                                               SLJIT_ASSERT(!first_patch);
+                                               cpool_size = 0;
+                                               if (label && label->size == word_count) {
+                                                       /* Points after the current instruction. */
+                                                       label->addr = (sljit_uw)code_ptr;
+                                                       label->size = code_ptr - code;
+                                                       label = label->next;
+                                               }
+                                       }
+                               }
+                       }
+                       else if ((*buf_ptr & 0xff000000) != PUSH_POOL) {
+#endif
+                               *code_ptr = *buf_ptr++;
+                               /* These structures are ordered by their address. */
+                               SLJIT_ASSERT(!label || label->size >= word_count);
+                               SLJIT_ASSERT(!jump || jump->addr >= word_count);
+                               SLJIT_ASSERT(!const_ || const_->addr >= word_count);
+                               if (jump && jump->addr == word_count) {
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+                                       if (detect_jump_type(jump, code_ptr, code))
+                                               code_ptr--;
+                                       jump->addr = (sljit_uw)code_ptr;
+#else
+                                       jump->addr = (sljit_uw)(code_ptr - 2);
+                                       if (detect_jump_type(jump, code_ptr, code))
+                                               code_ptr -= 2;
+#endif
+                                       jump = jump->next;
+                               }
+                               if (label && label->size == word_count) {
+                                       /* code_ptr can be affected above. */
+                                       label->addr = (sljit_uw)(code_ptr + 1);
+                                       label->size = (code_ptr + 1) - code;
+                                       label = label->next;
+                               }
+                               if (const_ && const_->addr == word_count) {
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+                                       const_->addr = (sljit_uw)code_ptr;
+#else
+                                       const_->addr = (sljit_uw)(code_ptr - 1);
+#endif
+                                       const_ = const_->next;
+                               }
+                               code_ptr++;
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+                       }
+                       else {
+                               /* Fortunately, no need to shift. */
+                               cpool_size = *buf_ptr++ & ~PUSH_POOL;
+                               SLJIT_ASSERT(cpool_size > 0);
+                               cpool_start_address = ALIGN_INSTRUCTION(code_ptr + 1);
+                               cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, cpool_size);
+                               if (cpool_current_index > 0) {
+                                       /* Unconditional branch. */
+                                       *code_ptr = B | (((cpool_start_address - code_ptr) + cpool_current_index - 2) & ~PUSH_POOL);
+                                       code_ptr = cpool_start_address + cpool_current_index;
+                               }
+                               cpool_skip_alignment = CONST_POOL_ALIGNMENT - 1;
+                               cpool_current_index = 0;
+                               last_pc_patch = code_ptr;
+                       }
+#endif
+               } while (buf_ptr < buf_end);
+               buf = buf->next;
+       } while (buf);
+
+       SLJIT_ASSERT(!label);
+       SLJIT_ASSERT(!jump);
+       SLJIT_ASSERT(!const_);
+
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+       SLJIT_ASSERT(cpool_size == 0);
+       if (compiler->cpool_fill > 0) {
+               cpool_start_address = ALIGN_INSTRUCTION(code_ptr);
+               cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, compiler->cpool_fill);
+               if (cpool_current_index > 0)
+                       code_ptr = cpool_start_address + cpool_current_index;
+
+               buf_ptr = compiler->cpool;
+               buf_end = buf_ptr + compiler->cpool_fill;
+               cpool_current_index = 0;
+               while (buf_ptr < buf_end) {
+                       if (SLJIT_UNLIKELY(resolve_const_pool_index(&first_patch, cpool_current_index, cpool_start_address, buf_ptr))) {
+                               SLJIT_FREE_EXEC(code);
+                               compiler->error = SLJIT_ERR_ALLOC_FAILED;
+                               return NULL;
+                       }
+                       buf_ptr++;
+                       cpool_current_index++;
+               }
+               SLJIT_ASSERT(!first_patch);
+       }
+#endif
+
+       jump = compiler->jumps;
+       while (jump) {
+               buf_ptr = (sljit_uw*)jump->addr;
+
+               if (jump->flags & PATCH_B) {
+                       if (!(jump->flags & JUMP_ADDR)) {
+                               SLJIT_ASSERT(jump->flags & JUMP_LABEL);
+                               SLJIT_ASSERT(((sljit_sw)jump->u.label->addr - (sljit_sw)(buf_ptr + 2)) <= 0x01ffffff && ((sljit_sw)jump->u.label->addr - (sljit_sw)(buf_ptr + 2)) >= -0x02000000);
+                               *buf_ptr |= (((sljit_sw)jump->u.label->addr - (sljit_sw)(buf_ptr + 2)) >> 2) & 0x00ffffff;
+                       }
+                       else {
+                               SLJIT_ASSERT(((sljit_sw)jump->u.target - (sljit_sw)(buf_ptr + 2)) <= 0x01ffffff && ((sljit_sw)jump->u.target - (sljit_sw)(buf_ptr + 2)) >= -0x02000000);
+                               *buf_ptr |= (((sljit_sw)jump->u.target - (sljit_sw)(buf_ptr + 2)) >> 2) & 0x00ffffff;
+                       }
+               }
+               else if (jump->flags & SLJIT_REWRITABLE_JUMP) {
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+                       jump->addr = (sljit_uw)code_ptr;
+                       code_ptr[0] = (sljit_uw)buf_ptr;
+                       code_ptr[1] = *buf_ptr;
+                       inline_set_jump_addr((sljit_uw)code_ptr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0);
+                       code_ptr += 2;
+#else
+                       inline_set_jump_addr((sljit_uw)buf_ptr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0);
+#endif
+               }
+               else {
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+                       if (jump->flags & IS_BL)
+                               buf_ptr--;
+                       if (*buf_ptr & (1 << 23))
+                               buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2;
+                       else
+                               buf_ptr += 1;
+                       *buf_ptr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
+#else
+                       inline_set_jump_addr((sljit_uw)buf_ptr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0);
+#endif
+               }
+               jump = jump->next;
+       }
+
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+       const_ = compiler->consts;
+       while (const_) {
+               buf_ptr = (sljit_uw*)const_->addr;
+               const_->addr = (sljit_uw)code_ptr;
+
+               code_ptr[0] = (sljit_uw)buf_ptr;
+               code_ptr[1] = *buf_ptr;
+               if (*buf_ptr & (1 << 23))
+                       buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2;
+               else
+                       buf_ptr += 1;
+               /* Set the value again (can be a simple constant). */
+               inline_set_const((sljit_uw)code_ptr, *buf_ptr, 0);
+               code_ptr += 2;
+
+               const_ = const_->next;
+       }
+#endif
+
+       SLJIT_ASSERT(code_ptr - code <= (sljit_si)size);
+
+       compiler->error = SLJIT_ERR_COMPILED;
+       compiler->executable_size = (code_ptr - code) * sizeof(sljit_uw);
+       SLJIT_CACHE_FLUSH(code, code_ptr);
+       return code;
+}
+
+/* --------------------------------------------------------------------- */
+/*  Entry, exit                                                          */
+/* --------------------------------------------------------------------- */
+
+/* emit_op inp_flags.
+   WRITE_BACK must be the first, since it is a flag. */
+#define WRITE_BACK     0x01
+#define ALLOW_IMM      0x02
+#define ALLOW_INV_IMM  0x04
+#define ALLOW_ANY_IMM  (ALLOW_IMM | ALLOW_INV_IMM)
+#define ARG_TEST       0x08
+
+/* Creates an index in data_transfer_insts array. */
+#define WORD_DATA      0x00
+#define BYTE_DATA      0x10
+#define HALF_DATA      0x20
+#define SIGNED_DATA    0x40
+#define LOAD_DATA      0x80
+
+#define EMIT_INSTRUCTION(inst) \
+       FAIL_IF(push_inst(compiler, (inst)))
+
+/* Condition: AL. */
+#define EMIT_DATA_PROCESS_INS(opcode, set_flags, dst, src1, src2) \
+       (0xe0000000 | ((opcode) << 21) | (set_flags) | RD(dst) | RN(src1) | (src2))
+
+static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si inp_flags,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w);
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
+{
+       sljit_si size;
+       sljit_uw push;
+
+       CHECK_ERROR();
+       check_sljit_emit_enter(compiler, args, scratches, saveds, local_size);
+
+       compiler->scratches = scratches;
+       compiler->saveds = saveds;
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       compiler->logical_local_size = local_size;
+#endif
+
+       /* Push saved registers, temporary registers
+          stmdb sp!, {..., lr} */
+       push = PUSH | (1 << 14);
+       if (scratches >= 5)
+               push |= 1 << 11;
+       if (scratches >= 4)
+               push |= 1 << 10;
+       if (saveds >= 5)
+               push |= 1 << 8;
+       if (saveds >= 4)
+               push |= 1 << 7;
+       if (saveds >= 3)
+               push |= 1 << 6;
+       if (saveds >= 2)
+               push |= 1 << 5;
+       if (saveds >= 1)
+               push |= 1 << 4;
+       EMIT_INSTRUCTION(push);
+
+       /* Stack must be aligned to 8 bytes: */
+       size = (1 + saveds) * sizeof(sljit_uw);
+       if (scratches >= 4)
+               size += (scratches - 3) * sizeof(sljit_uw);
+       local_size += size;
+       local_size = (local_size + 7) & ~7;
+       local_size -= size;
+       compiler->local_size = local_size;
+       if (local_size > 0)
+               FAIL_IF(emit_op(compiler, SLJIT_SUB, ALLOW_IMM, SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, local_size));
+
+       if (args >= 1)
+               EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_SAVED_REG1, SLJIT_UNUSED, RM(SLJIT_SCRATCH_REG1)));
+       if (args >= 2)
+               EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_SAVED_REG2, SLJIT_UNUSED, RM(SLJIT_SCRATCH_REG2)));
+       if (args >= 3)
+               EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_SAVED_REG3, SLJIT_UNUSED, RM(SLJIT_SCRATCH_REG3)));
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
+{
+       sljit_si size;
+
+       CHECK_ERROR_VOID();
+       check_sljit_set_context(compiler, args, scratches, saveds, local_size);
+
+       compiler->scratches = scratches;
+       compiler->saveds = saveds;
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       compiler->logical_local_size = local_size;
+#endif
+
+       size = (1 + saveds) * sizeof(sljit_uw);
+       if (scratches >= 4)
+               size += (scratches - 3) * sizeof(sljit_uw);
+       local_size += size;
+       local_size = (local_size + 7) & ~7;
+       local_size -= size;
+       compiler->local_size = local_size;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
+{
+       sljit_uw pop;
+
+       CHECK_ERROR();
+       check_sljit_emit_return(compiler, op, src, srcw);
+
+       FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
+
+       if (compiler->local_size > 0)
+               FAIL_IF(emit_op(compiler, SLJIT_ADD, ALLOW_IMM, SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, compiler->local_size));
+
+       pop = POP | (1 << 15);
+       /* Push saved registers, temporary registers
+          ldmia sp!, {..., pc} */
+       if (compiler->scratches >= 5)
+               pop |= 1 << 11;
+       if (compiler->scratches >= 4)
+               pop |= 1 << 10;
+       if (compiler->saveds >= 5)
+               pop |= 1 << 8;
+       if (compiler->saveds >= 4)
+               pop |= 1 << 7;
+       if (compiler->saveds >= 3)
+               pop |= 1 << 6;
+       if (compiler->saveds >= 2)
+               pop |= 1 << 5;
+       if (compiler->saveds >= 1)
+               pop |= 1 << 4;
+
+       return push_inst(compiler, pop);
+}
+
+/* --------------------------------------------------------------------- */
+/*  Operators                                                            */
+/* --------------------------------------------------------------------- */
+
+/* s/l - store/load (1 bit)
+   u/s - signed/unsigned (1 bit)
+   w/b/h/N - word/byte/half/NOT allowed (2 bit)
+   It contans 16 items, but not all are different. */
+
+static sljit_sw data_transfer_insts[16] = {
+/* s u w */ 0xe5000000 /* str */,
+/* s u b */ 0xe5400000 /* strb */,
+/* s u h */ 0xe10000b0 /* strh */,
+/* s u N */ 0x00000000 /* not allowed */,
+/* s s w */ 0xe5000000 /* str */,
+/* s s b */ 0xe5400000 /* strb */,
+/* s s h */ 0xe10000b0 /* strh */,
+/* s s N */ 0x00000000 /* not allowed */,
+
+/* l u w */ 0xe5100000 /* ldr */,
+/* l u b */ 0xe5500000 /* ldrb */,
+/* l u h */ 0xe11000b0 /* ldrh */,
+/* l u N */ 0x00000000 /* not allowed */,
+/* l s w */ 0xe5100000 /* ldr */,
+/* l s b */ 0xe11000d0 /* ldrsb */,
+/* l s h */ 0xe11000f0 /* ldrsh */,
+/* l s N */ 0x00000000 /* not allowed */,
+};
+
+#define EMIT_DATA_TRANSFER(type, add, wb, target, base1, base2) \
+       (data_transfer_insts[(type) >> 4] | ((add) << 23) | ((wb) << 21) | (reg_map[target] << 12) | (reg_map[base1] << 16) | (base2))
+/* Normal ldr/str instruction.
+   Type2: ldrsb, ldrh, ldrsh */
+#define IS_TYPE1_TRANSFER(type) \
+       (data_transfer_insts[(type) >> 4] & 0x04000000)
+#define TYPE2_TRANSFER_IMM(imm) \
+       (((imm) & 0xf) | (((imm) & 0xf0) << 4) | (1 << 22))
+
+/* flags: */
+  /* Arguments are swapped. */
+#define ARGS_SWAPPED   0x01
+  /* Inverted immediate. */
+#define INV_IMM                0x02
+  /* Source and destination is register. */
+#define REG_DEST       0x04
+#define REG_SOURCE     0x08
+  /* One instruction is enough. */
+#define FAST_DEST      0x10
+  /* Multiple instructions are required. */
+#define SLOW_DEST      0x20
+/* SET_FLAGS must be (1 << 20) as it is also the value of S bit (can be used for optimization). */
+#define SET_FLAGS      (1 << 20)
+/* dst: reg
+   src1: reg
+   src2: reg or imm (if allowed)
+   SRC2_IMM must be (1 << 25) as it is also the value of I bit (can be used for optimization). */
+#define SRC2_IMM       (1 << 25)
+
+#define EMIT_DATA_PROCESS_INS_AND_RETURN(opcode) \
+       return push_inst(compiler, EMIT_DATA_PROCESS_INS(opcode, flags & SET_FLAGS, dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2)))
+
+#define EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(opcode, dst, src1, src2) \
+       return push_inst(compiler, EMIT_DATA_PROCESS_INS(opcode, flags & SET_FLAGS, dst, src1, src2))
+
+#define EMIT_SHIFT_INS_AND_RETURN(opcode) \
+       SLJIT_ASSERT(!(flags & INV_IMM) && !(src2 & SRC2_IMM)); \
+       if (compiler->shift_imm != 0x20) { \
+               SLJIT_ASSERT(src1 == TMP_REG1); \
+               SLJIT_ASSERT(!(flags & ARGS_SWAPPED)); \
+               if (compiler->shift_imm != 0) \
+                       return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, (compiler->shift_imm << 7) | (opcode << 5) | reg_map[src2])); \
+               return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, reg_map[src2])); \
+       } \
+       return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, (reg_map[(flags & ARGS_SWAPPED) ? src1 : src2] << 8) | (opcode << 5) | 0x10 | ((flags & ARGS_SWAPPED) ? reg_map[src2] : reg_map[src1])));
+
+static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags,
+       sljit_si dst, sljit_si src1, sljit_si src2)
+{
+       sljit_sw mul_inst;
+
+       switch (GET_OPCODE(op)) {
+       case SLJIT_MOV:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
+               if (dst != src2) {
+                       if (src2 & SRC2_IMM) {
+                               if (flags & INV_IMM)
+                                       EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2);
+                               EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2);
+                       }
+                       EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, reg_map[src2]);
+               }
+               return SLJIT_SUCCESS;
+
+       case SLJIT_MOV_UB:
+       case SLJIT_MOV_SB:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
+               if ((flags & (REG_DEST | REG_SOURCE)) == (REG_DEST | REG_SOURCE)) {
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+                       if (op == SLJIT_MOV_UB)
+                               return push_inst(compiler, EMIT_DATA_PROCESS_INS(AND_DP, 0, dst, src2, SRC2_IMM | 0xff));
+                       EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | reg_map[src2]));
+                       return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | (op == SLJIT_MOV_UB ? 0x20 : 0x40) | reg_map[dst]));
+#else
+                       return push_inst(compiler, (op == SLJIT_MOV_UB ? UXTB : SXTB) | RD(dst) | RM(src2));
+#endif
+               }
+               else if (dst != src2) {
+                       SLJIT_ASSERT(src2 & SRC2_IMM);
+                       if (flags & INV_IMM)
+                               EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2);
+                       EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2);
+               }
+               return SLJIT_SUCCESS;
+
+       case SLJIT_MOV_UH:
+       case SLJIT_MOV_SH:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
+               if ((flags & (REG_DEST | REG_SOURCE)) == (REG_DEST | REG_SOURCE)) {
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+                       EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | reg_map[src2]));
+                       return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | (op == SLJIT_MOV_UH ? 0x20 : 0x40) | reg_map[dst]));
+#else
+                       return push_inst(compiler, (op == SLJIT_MOV_UH ? UXTH : SXTH) | RD(dst) | RM(src2));
+#endif
+               }
+               else if (dst != src2) {
+                       SLJIT_ASSERT(src2 & SRC2_IMM);
+                       if (flags & INV_IMM)
+                               EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2);
+                       EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2);
+               }
+               return SLJIT_SUCCESS;
+
+       case SLJIT_NOT:
+               if (src2 & SRC2_IMM) {
+                       if (flags & INV_IMM)
+                               EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2);
+                       EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2);
+               }
+               EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, RM(src2));
+
+       case SLJIT_CLZ:
+               SLJIT_ASSERT(!(flags & INV_IMM));
+               SLJIT_ASSERT(!(src2 & SRC2_IMM));
+               FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(src2)));
+               if (flags & SET_FLAGS)
+                       EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(CMP_DP, SLJIT_UNUSED, dst, SRC2_IMM);
+               return SLJIT_SUCCESS;
+
+       case SLJIT_ADD:
+               SLJIT_ASSERT(!(flags & INV_IMM));
+               EMIT_DATA_PROCESS_INS_AND_RETURN(ADD_DP);
+
+       case SLJIT_ADDC:
+               SLJIT_ASSERT(!(flags & INV_IMM));
+               EMIT_DATA_PROCESS_INS_AND_RETURN(ADC_DP);
+
+       case SLJIT_SUB:
+               SLJIT_ASSERT(!(flags & INV_IMM));
+               if (!(flags & ARGS_SWAPPED))
+                       EMIT_DATA_PROCESS_INS_AND_RETURN(SUB_DP);
+               EMIT_DATA_PROCESS_INS_AND_RETURN(RSB_DP);
+
+       case SLJIT_SUBC:
+               SLJIT_ASSERT(!(flags & INV_IMM));
+               if (!(flags & ARGS_SWAPPED))
+                       EMIT_DATA_PROCESS_INS_AND_RETURN(SBC_DP);
+               EMIT_DATA_PROCESS_INS_AND_RETURN(RSC_DP);
+
+       case SLJIT_MUL:
+               SLJIT_ASSERT(!(flags & INV_IMM));
+               SLJIT_ASSERT(!(src2 & SRC2_IMM));
+               if (SLJIT_UNLIKELY(op & SLJIT_SET_O))
+                       mul_inst = SMULL | (reg_map[TMP_REG3] << 16) | (reg_map[dst] << 12);
+               else
+                       mul_inst = MUL | (reg_map[dst] << 16);
+
+               if (dst != src2)
+                       FAIL_IF(push_inst(compiler, mul_inst | (reg_map[src1] << 8) | reg_map[src2]));
+               else if (dst != src1)
+                       FAIL_IF(push_inst(compiler, mul_inst | (reg_map[src2] << 8) | reg_map[src1]));
+               else {
+                       /* Rm and Rd must not be the same register. */
+                       SLJIT_ASSERT(dst != TMP_REG1);
+                       FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, SLJIT_UNUSED, reg_map[src2])));
+                       FAIL_IF(push_inst(compiler, mul_inst | (reg_map[src2] << 8) | reg_map[TMP_REG1]));
+               }
+
+               if (!(op & SLJIT_SET_O))
+                       return SLJIT_SUCCESS;
+
+               /* We need to use TMP_REG3. */
+               compiler->cache_arg = 0;
+               compiler->cache_argw = 0;
+               /* cmp TMP_REG2, dst asr #31. */
+               return push_inst(compiler, EMIT_DATA_PROCESS_INS(CMP_DP, SET_FLAGS, SLJIT_UNUSED, TMP_REG3, RM(dst) | 0xfc0));
+
+       case SLJIT_AND:
+               if (!(flags & INV_IMM))
+                       EMIT_DATA_PROCESS_INS_AND_RETURN(AND_DP);
+               EMIT_DATA_PROCESS_INS_AND_RETURN(BIC_DP);
+
+       case SLJIT_OR:
+               SLJIT_ASSERT(!(flags & INV_IMM));
+               EMIT_DATA_PROCESS_INS_AND_RETURN(ORR_DP);
+
+       case SLJIT_XOR:
+               SLJIT_ASSERT(!(flags & INV_IMM));
+               EMIT_DATA_PROCESS_INS_AND_RETURN(EOR_DP);
+
+       case SLJIT_SHL:
+               EMIT_SHIFT_INS_AND_RETURN(0);
+
+       case SLJIT_LSHR:
+               EMIT_SHIFT_INS_AND_RETURN(1);
+
+       case SLJIT_ASHR:
+               EMIT_SHIFT_INS_AND_RETURN(2);
+       }
+       SLJIT_ASSERT_STOP();
+       return SLJIT_SUCCESS;
+}
+
+#undef EMIT_DATA_PROCESS_INS_AND_RETURN
+#undef EMIT_FULL_DATA_PROCESS_INS_AND_RETURN
+#undef EMIT_SHIFT_INS_AND_RETURN
+
+/* Tests whether the immediate can be stored in the 12 bit imm field.
+   Returns with 0 if not possible. */
+static sljit_uw get_imm(sljit_uw imm)
+{
+       sljit_si rol;
+
+       if (imm <= 0xff)
+               return SRC2_IMM | imm;
+
+       if (!(imm & 0xff000000)) {
+               imm <<= 8;
+               rol = 8;
+       }
+       else {
+               imm = (imm << 24) | (imm >> 8);
+               rol = 0;
+       }
+
+       if (!(imm & 0xff000000)) {
+               imm <<= 8;
+               rol += 4;
+       }
+
+       if (!(imm & 0xf0000000)) {
+               imm <<= 4;
+               rol += 2;
+       }
+
+       if (!(imm & 0xc0000000)) {
+               imm <<= 2;
+               rol += 1;
+       }
+
+       if (!(imm & 0x00ffffff))
+               return SRC2_IMM | (imm >> 24) | (rol << 8);
+       else
+               return 0;
+}
+
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+static sljit_si generate_int(struct sljit_compiler *compiler, sljit_si reg, sljit_uw imm, sljit_si positive)
+{
+       sljit_uw mask;
+       sljit_uw imm1;
+       sljit_uw imm2;
+       sljit_si rol;
+
+       /* Step1: Search a zero byte (8 continous zero bit). */
+       mask = 0xff000000;
+       rol = 8;
+       while(1) {
+               if (!(imm & mask)) {
+                       /* Rol imm by rol. */
+                       imm = (imm << rol) | (imm >> (32 - rol));
+                       /* Calculate arm rol. */
+                       rol = 4 + (rol >> 1);
+                       break;
+               }
+               rol += 2;
+               mask >>= 2;
+               if (mask & 0x3) {
+                       /* rol by 8. */
+                       imm = (imm << 8) | (imm >> 24);
+                       mask = 0xff00;
+                       rol = 24;
+                       while (1) {
+                               if (!(imm & mask)) {
+                                       /* Rol imm by rol. */
+                                       imm = (imm << rol) | (imm >> (32 - rol));
+                                       /* Calculate arm rol. */
+                                       rol = (rol >> 1) - 8;
+                                       break;
+                               }
+                               rol += 2;
+                               mask >>= 2;
+                               if (mask & 0x3)
+                                       return 0;
+                       }
+                       break;
+               }
+       }
+
+       /* The low 8 bit must be zero. */
+       SLJIT_ASSERT(!(imm & 0xff));
+
+       if (!(imm & 0xff000000)) {
+               imm1 = SRC2_IMM | ((imm >> 16) & 0xff) | (((rol + 4) & 0xf) << 8);
+               imm2 = SRC2_IMM | ((imm >> 8) & 0xff) | (((rol + 8) & 0xf) << 8);
+       }
+       else if (imm & 0xc0000000) {
+               imm1 = SRC2_IMM | ((imm >> 24) & 0xff) | ((rol & 0xf) << 8);
+               imm <<= 8;
+               rol += 4;
+
+               if (!(imm & 0xff000000)) {
+                       imm <<= 8;
+                       rol += 4;
+               }
+
+               if (!(imm & 0xf0000000)) {
+                       imm <<= 4;
+                       rol += 2;
+               }
+
+               if (!(imm & 0xc0000000)) {
+                       imm <<= 2;
+                       rol += 1;
+               }
+
+               if (!(imm & 0x00ffffff))
+                       imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8);
+               else
+                       return 0;
+       }
+       else {
+               if (!(imm & 0xf0000000)) {
+                       imm <<= 4;
+                       rol += 2;
+               }
+
+               if (!(imm & 0xc0000000)) {
+                       imm <<= 2;
+                       rol += 1;
+               }
+
+               imm1 = SRC2_IMM | ((imm >> 24) & 0xff) | ((rol & 0xf) << 8);
+               imm <<= 8;
+               rol += 4;
+
+               if (!(imm & 0xf0000000)) {
+                       imm <<= 4;
+                       rol += 2;
+               }
+
+               if (!(imm & 0xc0000000)) {
+                       imm <<= 2;
+                       rol += 1;
+               }
+
+               if (!(imm & 0x00ffffff))
+                       imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8);
+               else
+                       return 0;
+       }
+
+       EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(positive ? MOV_DP : MVN_DP, 0, reg, SLJIT_UNUSED, imm1));
+       EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(positive ? ORR_DP : BIC_DP, 0, reg, reg, imm2));
+       return 1;
+}
+#endif
+
+static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si reg, sljit_uw imm)
+{
+       sljit_uw tmp;
+
+#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
+       if (!(imm & ~0xffff))
+               return push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff));
+#endif
+
+       /* Create imm by 1 inst. */
+       tmp = get_imm(imm);
+       if (tmp) {
+               EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, tmp));
+               return SLJIT_SUCCESS;
+       }
+
+       tmp = get_imm(~imm);
+       if (tmp) {
+               EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MVN_DP, 0, reg, SLJIT_UNUSED, tmp));
+               return SLJIT_SUCCESS;
+       }
+
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+       /* Create imm by 2 inst. */
+       FAIL_IF(generate_int(compiler, reg, imm, 1));
+       FAIL_IF(generate_int(compiler, reg, ~imm, 0));
+
+       /* Load integer. */
+       return push_inst_with_literal(compiler, EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, reg, TMP_PC, 0), imm);
+#else
+       return emit_imm(compiler, reg, imm);
+#endif
+}
+
+/* Helper function. Dst should be reg + value, using at most 1 instruction, flags does not set. */
+static sljit_si emit_set_delta(struct sljit_compiler *compiler, sljit_si dst, sljit_si reg, sljit_sw value)
+{
+       if (value >= 0) {
+               value = get_imm(value);
+               if (value)
+                       return push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, dst, reg, value));
+       }
+       else {
+               value = get_imm(-value);
+               if (value)
+                       return push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, dst, reg, value));
+       }
+       return SLJIT_ERR_UNSUPPORTED;
+}
+
+/* Can perform an operation using at most 1 instruction. */
+static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si inp_flags, sljit_si reg, sljit_si arg, sljit_sw argw)
+{
+       sljit_uw imm;
+
+       if (arg & SLJIT_IMM) {
+               imm = get_imm(argw);
+               if (imm) {
+                       if (inp_flags & ARG_TEST)
+                               return 1;
+                       EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, imm));
+                       return -1;
+               }
+               imm = get_imm(~argw);
+               if (imm) {
+                       if (inp_flags & ARG_TEST)
+                               return 1;
+                       EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MVN_DP, 0, reg, SLJIT_UNUSED, imm));
+                       return -1;
+               }
+               return 0;
+       }
+
+       SLJIT_ASSERT(arg & SLJIT_MEM);
+
+       /* Fast loads/stores. */
+       if (!(arg & REG_MASK))
+               return 0;
+
+       if (arg & OFFS_REG_MASK) {
+               if ((argw & 0x3) != 0 && !IS_TYPE1_TRANSFER(inp_flags))
+                       return 0;
+
+               if (inp_flags & ARG_TEST)
+                       return 1;
+               EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK,
+                       RM(OFFS_REG(arg)) | (IS_TYPE1_TRANSFER(inp_flags) ? SRC2_IMM : 0) | ((argw & 0x3) << 7)));
+               return -1;
+       }
+
+       if (IS_TYPE1_TRANSFER(inp_flags)) {
+               if (argw >= 0 && argw <= 0xfff) {
+                       if (inp_flags & ARG_TEST)
+                               return 1;
+                       EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, argw));
+                       return -1;
+               }
+               if (argw < 0 && argw >= -0xfff) {
+                       if (inp_flags & ARG_TEST)
+                               return 1;
+                       EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 0, inp_flags & WRITE_BACK, reg, arg & REG_MASK, -argw));
+                       return -1;
+               }
+       }
+       else {
+               if (argw >= 0 && argw <= 0xff) {
+                       if (inp_flags & ARG_TEST)
+                               return 1;
+                       EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, TYPE2_TRANSFER_IMM(argw)));
+                       return -1;
+               }
+               if (argw < 0 && argw >= -0xff) {
+                       if (inp_flags & ARG_TEST)
+                               return 1;
+                       argw = -argw;
+                       EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 0, inp_flags & WRITE_BACK, reg, arg & REG_MASK, TYPE2_TRANSFER_IMM(argw)));
+                       return -1;
+               }
+       }
+
+       return 0;
+}
+
+/* See getput_arg below.
+   Note: can_cache is called only for binary operators. Those
+   operators always uses word arguments without write back. */
+static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw)
+{
+       /* Immediate caching is not supported as it would be an operation on constant arguments. */
+       if (arg & SLJIT_IMM)
+               return 0;
+
+       /* Always a simple operation. */
+       if (arg & OFFS_REG_MASK)
+               return 0;
+
+       if (!(arg & REG_MASK)) {
+               /* Immediate access. */
+               if ((next_arg & SLJIT_MEM) && ((sljit_uw)argw - (sljit_uw)next_argw <= 0xfff || (sljit_uw)next_argw - (sljit_uw)argw <= 0xfff))
+                       return 1;
+               return 0;
+       }
+
+       if (argw <= 0xfffff && argw >= -0xfffff)
+               return 0;
+
+       if (argw == next_argw && (next_arg & SLJIT_MEM))
+               return 1;
+
+       if (arg == next_arg && ((sljit_uw)argw - (sljit_uw)next_argw <= 0xfff || (sljit_uw)next_argw - (sljit_uw)argw <= 0xfff))
+               return 1;
+
+       return 0;
+}
+
+#define GETPUT_ARG_DATA_TRANSFER(add, wb, target, base, imm) \
+       if (max_delta & 0xf00) \
+               FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, add, wb, target, base, imm))); \
+       else \
+               FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, add, wb, target, base, TYPE2_TRANSFER_IMM(imm))));
+
+#define TEST_WRITE_BACK() \
+       if (inp_flags & WRITE_BACK) { \
+               tmp_r = arg & REG_MASK; \
+               if (reg == tmp_r) { \
+                       /* This can only happen for stores */ \
+                       /* since ldr reg, [reg, ...]! has no meaning */ \
+                       SLJIT_ASSERT(!(inp_flags & LOAD_DATA)); \
+                       EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(reg))); \
+                       reg = TMP_REG3; \
+               } \
+       }
+
+/* Emit the necessary instructions. See can_cache above. */
+static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si inp_flags, sljit_si reg, sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw)
+{
+       sljit_si tmp_r;
+       sljit_sw max_delta;
+       sljit_sw sign;
+       sljit_uw imm;
+
+       if (arg & SLJIT_IMM) {
+               SLJIT_ASSERT(inp_flags & LOAD_DATA);
+               return load_immediate(compiler, reg, argw);
+       }
+
+       SLJIT_ASSERT(arg & SLJIT_MEM);
+
+       tmp_r = (inp_flags & LOAD_DATA) ? reg : TMP_REG3;
+       max_delta = IS_TYPE1_TRANSFER(inp_flags) ? 0xfff : 0xff;
+
+       if ((arg & REG_MASK) == SLJIT_UNUSED) {
+               /* Write back is not used. */
+               imm = (sljit_uw)(argw - compiler->cache_argw);
+               if ((compiler->cache_arg & SLJIT_IMM) && (imm <= (sljit_uw)max_delta || imm >= (sljit_uw)-max_delta)) {
+                       if (imm <= (sljit_uw)max_delta) {
+                               sign = 1;
+                               argw = argw - compiler->cache_argw;
+                       }
+                       else {
+                               sign = 0;
+                               argw = compiler->cache_argw - argw;
+                       }
+
+                       GETPUT_ARG_DATA_TRANSFER(sign, 0, reg, TMP_REG3, argw);
+                       return SLJIT_SUCCESS;
+               }
+
+               /* With write back, we can create some sophisticated loads, but
+                  it is hard to decide whether we should convert downward (0s) or upward (1s). */
+               imm = (sljit_uw)(argw - next_argw);
+               if ((next_arg & SLJIT_MEM) && (imm <= (sljit_uw)max_delta || imm >= (sljit_uw)-max_delta)) {
+                       SLJIT_ASSERT(inp_flags & LOAD_DATA);
+
+                       compiler->cache_arg = SLJIT_IMM;
+                       compiler->cache_argw = argw;
+                       tmp_r = TMP_REG3;
+               }
+
+               FAIL_IF(load_immediate(compiler, tmp_r, argw));
+               GETPUT_ARG_DATA_TRANSFER(1, 0, reg, tmp_r, 0);
+               return SLJIT_SUCCESS;
+       }
+
+       if (arg & OFFS_REG_MASK) {
+               SLJIT_ASSERT((argw & 0x3) && !(max_delta & 0xf00));
+               if (inp_flags & WRITE_BACK)
+                       tmp_r = arg & REG_MASK;
+               EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, tmp_r, arg & REG_MASK, RM(OFFS_REG(arg)) | ((argw & 0x3) << 7)));
+               EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, 0, reg, tmp_r, TYPE2_TRANSFER_IMM(0)));
+               return SLJIT_SUCCESS;
+       }
+
+       imm = (sljit_uw)(argw - compiler->cache_argw);
+       if (compiler->cache_arg == arg && imm <= (sljit_uw)max_delta) {
+               SLJIT_ASSERT(!(inp_flags & WRITE_BACK));
+               GETPUT_ARG_DATA_TRANSFER(1, 0, reg, TMP_REG3, imm);
+               return SLJIT_SUCCESS;
+       }
+       if (compiler->cache_arg == arg && imm >= (sljit_uw)-max_delta) {
+               SLJIT_ASSERT(!(inp_flags & WRITE_BACK));
+               imm = (sljit_uw)-(sljit_sw)imm;
+               GETPUT_ARG_DATA_TRANSFER(0, 0, reg, TMP_REG3, imm);
+               return SLJIT_SUCCESS;
+       }
+
+       imm = get_imm(argw & ~max_delta);
+       if (imm) {
+               TEST_WRITE_BACK();
+               EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, tmp_r, arg & REG_MASK, imm));
+               GETPUT_ARG_DATA_TRANSFER(1, inp_flags & WRITE_BACK, reg, tmp_r, argw & max_delta);
+               return SLJIT_SUCCESS;
+       }
+
+       imm = get_imm(-argw & ~max_delta);
+       if (imm) {
+               argw = -argw;
+               TEST_WRITE_BACK();
+               EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(SUB_DP, 0, tmp_r, arg & REG_MASK, imm));
+               GETPUT_ARG_DATA_TRANSFER(0, inp_flags & WRITE_BACK, reg, tmp_r, argw & max_delta);
+               return SLJIT_SUCCESS;
+       }
+
+       if ((compiler->cache_arg & SLJIT_IMM) && compiler->cache_argw == argw) {
+               TEST_WRITE_BACK();
+               EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, RM(TMP_REG3) | (max_delta & 0xf00 ? SRC2_IMM : 0)));
+               return SLJIT_SUCCESS;
+       }
+
+       if (argw == next_argw && (next_arg & SLJIT_MEM)) {
+               SLJIT_ASSERT(inp_flags & LOAD_DATA);
+               FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
+
+               compiler->cache_arg = SLJIT_IMM;
+               compiler->cache_argw = argw;
+
+               TEST_WRITE_BACK();
+               EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, RM(TMP_REG3) | (max_delta & 0xf00 ? SRC2_IMM : 0)));
+               return SLJIT_SUCCESS;
+       }
+
+       imm = (sljit_uw)(argw - next_argw);
+       if (arg == next_arg && !(inp_flags & WRITE_BACK) && (imm <= (sljit_uw)max_delta || imm >= (sljit_uw)-max_delta)) {
+               SLJIT_ASSERT(inp_flags & LOAD_DATA);
+               FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
+               EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG3, TMP_REG3, reg_map[arg & REG_MASK]));
+
+               compiler->cache_arg = arg;
+               compiler->cache_argw = argw;
+
+               GETPUT_ARG_DATA_TRANSFER(1, 0, reg, TMP_REG3, 0);
+               return SLJIT_SUCCESS;
+       }
+
+       if ((arg & REG_MASK) == tmp_r) {
+               compiler->cache_arg = SLJIT_IMM;
+               compiler->cache_argw = argw;
+               tmp_r = TMP_REG3;
+       }
+
+       FAIL_IF(load_immediate(compiler, tmp_r, argw));
+       EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, reg_map[tmp_r] | (max_delta & 0xf00 ? SRC2_IMM : 0)));
+       return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_si emit_op_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw)
+{
+       if (getput_arg_fast(compiler, flags, reg, arg, argw))
+               return compiler->error;
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+       return getput_arg(compiler, flags, reg, arg, argw, 0, 0);
+}
+
+static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg1, sljit_sw arg1w, sljit_si arg2, sljit_sw arg2w)
+{
+       if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
+               return compiler->error;
+       return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
+}
+
+static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si inp_flags,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       /* arg1 goes to TMP_REG1 or src reg
+          arg2 goes to TMP_REG2, imm or src reg
+          TMP_REG3 can be used for caching
+          result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */
+
+       /* We prefers register and simple consts. */
+       sljit_si dst_r;
+       sljit_si src1_r;
+       sljit_si src2_r = 0;
+       sljit_si sugg_src2_r = TMP_REG2;
+       sljit_si flags = GET_FLAGS(op) ? SET_FLAGS : 0;
+
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+
+       /* Destination check. */
+       if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
+               if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI && !(src2 & SLJIT_MEM))
+                       return SLJIT_SUCCESS;
+               dst_r = TMP_REG2;
+       }
+       else if (FAST_IS_REG(dst)) {
+               dst_r = dst;
+               flags |= REG_DEST;
+               if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI)
+                       sugg_src2_r = dst_r;
+       }
+       else {
+               SLJIT_ASSERT(dst & SLJIT_MEM);
+               if (getput_arg_fast(compiler, inp_flags | ARG_TEST, TMP_REG2, dst, dstw)) {
+                       flags |= FAST_DEST;
+                       dst_r = TMP_REG2;
+               }
+               else {
+                       flags |= SLOW_DEST;
+                       dst_r = 0;
+               }
+       }
+
+       /* Source 1. */
+       if (FAST_IS_REG(src1))
+               src1_r = src1;
+       else if (FAST_IS_REG(src2)) {
+               flags |= ARGS_SWAPPED;
+               src1_r = src2;
+               src2 = src1;
+               src2w = src1w;
+       }
+       else do { /* do { } while(0) is used because of breaks. */
+               src1_r = 0;
+               if ((inp_flags & ALLOW_ANY_IMM) && (src1 & SLJIT_IMM)) {
+                       /* The second check will generate a hit. */
+                       src2_r = get_imm(src1w);
+                       if (src2_r) {
+                               flags |= ARGS_SWAPPED;
+                               src1 = src2;
+                               src1w = src2w;
+                               break;
+                       }
+                       if (inp_flags & ALLOW_INV_IMM) {
+                               src2_r = get_imm(~src1w);
+                               if (src2_r) {
+                                       flags |= ARGS_SWAPPED | INV_IMM;
+                                       src1 = src2;
+                                       src1w = src2w;
+                                       break;
+                               }
+                       }
+                       if (GET_OPCODE(op) == SLJIT_ADD) {
+                               src2_r = get_imm(-src1w);
+                               if (src2_r) {
+                                       /* Note: ARGS_SWAPPED is intentionally not applied! */
+                                       src1 = src2;
+                                       src1w = src2w;
+                                       op = SLJIT_SUB | GET_ALL_FLAGS(op);
+                                       break;
+                               }
+                       }
+               }
+
+               if (getput_arg_fast(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w)) {
+                       FAIL_IF(compiler->error);
+                       src1_r = TMP_REG1;
+               }
+       } while (0);
+
+       /* Source 2. */
+       if (src2_r == 0) {
+               if (FAST_IS_REG(src2)) {
+                       src2_r = src2;
+                       flags |= REG_SOURCE;
+                       if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_SI)
+                               dst_r = src2_r;
+               }
+               else do { /* do { } while(0) is used because of breaks. */
+                       if ((inp_flags & ALLOW_ANY_IMM) && (src2 & SLJIT_IMM)) {
+                               src2_r = get_imm(src2w);
+                               if (src2_r)
+                                       break;
+                               if (inp_flags & ALLOW_INV_IMM) {
+                                       src2_r = get_imm(~src2w);
+                                       if (src2_r) {
+                                               flags |= INV_IMM;
+                                               break;
+                                       }
+                               }
+                               if (GET_OPCODE(op) == SLJIT_ADD) {
+                                       src2_r = get_imm(-src2w);
+                                       if (src2_r) {
+                                               op = SLJIT_SUB | GET_ALL_FLAGS(op);
+                                               flags &= ~ARGS_SWAPPED;
+                                               break;
+                                       }
+                               }
+                               if (GET_OPCODE(op) == SLJIT_SUB && !(flags & ARGS_SWAPPED)) {
+                                       src2_r = get_imm(-src2w);
+                                       if (src2_r) {
+                                               op = SLJIT_ADD | GET_ALL_FLAGS(op);
+                                               flags &= ~ARGS_SWAPPED;
+                                               break;
+                                       }
+                               }
+                       }
+
+                       /* src2_r is 0. */
+                       if (getput_arg_fast(compiler, inp_flags | LOAD_DATA, sugg_src2_r, src2, src2w)) {
+                               FAIL_IF(compiler->error);
+                               src2_r = sugg_src2_r;
+                       }
+               } while (0);
+       }
+
+       /* src1_r, src2_r and dst_r can be zero (=unprocessed) or non-zero.
+          If they are zero, they must not be registers. */
+       if (src1_r == 0 && src2_r == 0 && dst_r == 0) {
+               if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
+                       SLJIT_ASSERT(!(flags & ARGS_SWAPPED));
+                       flags |= ARGS_SWAPPED;
+                       FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src2, src2w, src1, src1w));
+                       FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG2, src1, src1w, dst, dstw));
+               }
+               else {
+                       FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
+                       FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw));
+               }
+               src1_r = TMP_REG1;
+               src2_r = TMP_REG2;
+       }
+       else if (src1_r == 0 && src2_r == 0) {
+               FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
+               src1_r = TMP_REG1;
+       }
+       else if (src1_r == 0 && dst_r == 0) {
+               FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
+               src1_r = TMP_REG1;
+       }
+       else if (src2_r == 0 && dst_r == 0) {
+               FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw));
+               src2_r = sugg_src2_r;
+       }
+
+       if (dst_r == 0)
+               dst_r = TMP_REG2;
+
+       if (src1_r == 0) {
+               FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, 0, 0));
+               src1_r = TMP_REG1;
+       }
+
+       if (src2_r == 0) {
+               FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, sugg_src2_r, src2, src2w, 0, 0));
+               src2_r = sugg_src2_r;
+       }
+
+       FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));
+
+       if (flags & (FAST_DEST | SLOW_DEST)) {
+               if (flags & FAST_DEST)
+                       FAIL_IF(getput_arg_fast(compiler, inp_flags, dst_r, dst, dstw));
+               else
+                       FAIL_IF(getput_arg(compiler, inp_flags, dst_r, dst, dstw, 0, 0));
+       }
+       return SLJIT_SUCCESS;
+}
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined(__GNUC__)
+extern unsigned int __aeabi_uidivmod(unsigned int numerator, unsigned int denominator);
+extern int __aeabi_idivmod(int numerator, int denominator);
+#else
+#error "Software divmod functions are needed"
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op)
+{
+       CHECK_ERROR();
+       check_sljit_emit_op0(compiler, op);
+
+       op = GET_OPCODE(op);
+       switch (op) {
+       case SLJIT_BREAKPOINT:
+               EMIT_INSTRUCTION(BKPT);
+               break;
+       case SLJIT_NOP:
+               EMIT_INSTRUCTION(NOP);
+               break;
+       case SLJIT_UMUL:
+       case SLJIT_SMUL:
+#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
+               return push_inst(compiler, (op == SLJIT_UMUL ? UMULL : SMULL)
+                       | (reg_map[SLJIT_SCRATCH_REG2] << 16)
+                       | (reg_map[SLJIT_SCRATCH_REG1] << 12)
+                       | (reg_map[SLJIT_SCRATCH_REG1] << 8)
+                       | reg_map[SLJIT_SCRATCH_REG2]);
+#else
+               EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, SLJIT_UNUSED, RM(SLJIT_SCRATCH_REG2)));
+               return push_inst(compiler, (op == SLJIT_UMUL ? UMULL : SMULL)
+                       | (reg_map[SLJIT_SCRATCH_REG2] << 16)
+                       | (reg_map[SLJIT_SCRATCH_REG1] << 12)
+                       | (reg_map[SLJIT_SCRATCH_REG1] << 8)
+                       | reg_map[TMP_REG1]);
+#endif
+       case SLJIT_UDIV:
+       case SLJIT_SDIV:
+               if (compiler->scratches >= 3)
+                       EMIT_INSTRUCTION(0xe52d2008 /* str r2, [sp, #-8]! */);
+#if defined(__GNUC__)
+               FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
+                       (op == SLJIT_UDIV ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod))));
+#else
+#error "Software divmod functions are needed"
+#endif
+               if (compiler->scratches >= 3)
+                       return push_inst(compiler, 0xe49d2008 /* ldr r2, [sp], #8 */);
+               return SLJIT_SUCCESS;
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       CHECK_ERROR();
+       check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       switch (GET_OPCODE(op)) {
+       case SLJIT_MOV:
+       case SLJIT_MOV_UI:
+       case SLJIT_MOV_SI:
+       case SLJIT_MOV_P:
+               return emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw);
+
+       case SLJIT_MOV_UB:
+               return emit_op(compiler, SLJIT_MOV_UB, ALLOW_ANY_IMM | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ub)srcw : srcw);
+
+       case SLJIT_MOV_SB:
+               return emit_op(compiler, SLJIT_MOV_SB, ALLOW_ANY_IMM | SIGNED_DATA | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sb)srcw : srcw);
+
+       case SLJIT_MOV_UH:
+               return emit_op(compiler, SLJIT_MOV_UH, ALLOW_ANY_IMM | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_uh)srcw : srcw);
+
+       case SLJIT_MOV_SH:
+               return emit_op(compiler, SLJIT_MOV_SH, ALLOW_ANY_IMM | SIGNED_DATA | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sh)srcw : srcw);
+
+       case SLJIT_MOVU:
+       case SLJIT_MOVU_UI:
+       case SLJIT_MOVU_SI:
+       case SLJIT_MOVU_P:
+               return emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
+
+       case SLJIT_MOVU_UB:
+               return emit_op(compiler, SLJIT_MOV_UB, ALLOW_ANY_IMM | BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ub)srcw : srcw);
+
+       case SLJIT_MOVU_SB:
+               return emit_op(compiler, SLJIT_MOV_SB, ALLOW_ANY_IMM | SIGNED_DATA | BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sb)srcw : srcw);
+
+       case SLJIT_MOVU_UH:
+               return emit_op(compiler, SLJIT_MOV_UH, ALLOW_ANY_IMM | HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_uh)srcw : srcw);
+
+       case SLJIT_MOVU_SH:
+               return emit_op(compiler, SLJIT_MOV_SH, ALLOW_ANY_IMM | SIGNED_DATA | HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sh)srcw : srcw);
+
+       case SLJIT_NOT:
+               return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw);
+
+       case SLJIT_NEG:
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
+               compiler->skip_checks = 1;
+#endif
+               return sljit_emit_op2(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), dst, dstw, SLJIT_IMM, 0, src, srcw);
+
+       case SLJIT_CLZ:
+               return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw);
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       CHECK_ERROR();
+       check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+       ADJUST_LOCAL_OFFSET(src1, src1w);
+       ADJUST_LOCAL_OFFSET(src2, src2w);
+
+       switch (GET_OPCODE(op)) {
+       case SLJIT_ADD:
+       case SLJIT_ADDC:
+       case SLJIT_SUB:
+       case SLJIT_SUBC:
+       case SLJIT_OR:
+       case SLJIT_XOR:
+               return emit_op(compiler, op, ALLOW_IMM, dst, dstw, src1, src1w, src2, src2w);
+
+       case SLJIT_MUL:
+               return emit_op(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w);
+
+       case SLJIT_AND:
+               return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, src1, src1w, src2, src2w);
+
+       case SLJIT_SHL:
+       case SLJIT_LSHR:
+       case SLJIT_ASHR:
+               if (src2 & SLJIT_IMM) {
+                       compiler->shift_imm = src2w & 0x1f;
+                       return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src1, src1w);
+               }
+               else {
+                       compiler->shift_imm = 0x20;
+                       return emit_op(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w);
+               }
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
+{
+       check_sljit_get_register_index(reg);
+       return reg_map[reg];
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg)
+{
+       check_sljit_get_float_register_index(reg);
+       return reg;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
+       void *instruction, sljit_si size)
+{
+       CHECK_ERROR();
+       check_sljit_emit_op_custom(compiler, instruction, size);
+       SLJIT_ASSERT(size == 4);
+
+       return push_inst(compiler, *(sljit_uw*)instruction);
+}
+
+/* --------------------------------------------------------------------- */
+/*  Floating point operators                                             */
+/* --------------------------------------------------------------------- */
+
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+
+/* 0 - no fpu
+   1 - vfp */
+static sljit_si arm_fpu_type = -1;
+
+static void init_compiler(void)
+{
+       if (arm_fpu_type != -1)
+               return;
+
+       /* TODO: Only the OS can help to determine the correct fpu type. */
+       arm_fpu_type = 1;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
+{
+#ifdef SLJIT_IS_FPU_AVAILABLE
+       return SLJIT_IS_FPU_AVAILABLE;
+#else
+       if (arm_fpu_type == -1)
+               init_compiler();
+       return arm_fpu_type;
+#endif
+}
+
+#else
+
+#define arm_fpu_type 1
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
+{
+       /* Always available. */
+       return 1;
+}
+
+#endif
+
+#define FPU_LOAD (1 << 20)
+#define EMIT_FPU_DATA_TRANSFER(inst, add, base, freg, offs) \
+       ((inst) | ((add) << 23) | (reg_map[base] << 16) | (freg << 12) | (offs))
+#define EMIT_FPU_OPERATION(opcode, mode, dst, src1, src2) \
+       ((opcode) | (mode) | ((dst) << 12) | (src1) | ((src2) << 16))
+
+static sljit_si emit_fop_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw)
+{
+       sljit_sw tmp;
+       sljit_uw imm;
+       sljit_sw inst = VSTR_F32 | (flags & (SLJIT_SINGLE_OP | FPU_LOAD));
+       SLJIT_ASSERT(arg & SLJIT_MEM);
+
+       if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
+               EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG1, arg & REG_MASK, RM(OFFS_REG(arg)) | ((argw & 0x3) << 7)));
+               arg = SLJIT_MEM | TMP_REG1;
+               argw = 0;
+       }
+
+       /* Fast loads and stores. */
+       if ((arg & REG_MASK)) {
+               if (!(argw & ~0x3fc))
+                       return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, arg & REG_MASK, reg, argw >> 2));
+               if (!(-argw & ~0x3fc))
+                       return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, arg & REG_MASK, reg, (-argw) >> 2));
+       }
+
+       if (compiler->cache_arg == arg) {
+               tmp = argw - compiler->cache_argw;
+               if (!(tmp & ~0x3fc))
+                       return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG3, reg, tmp >> 2));
+               if (!(-tmp & ~0x3fc))
+                       return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, TMP_REG3, reg, -tmp >> 2));
+               if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, tmp) != SLJIT_ERR_UNSUPPORTED) {
+                       FAIL_IF(compiler->error);
+                       compiler->cache_argw = argw;
+                       return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG3, reg, 0));
+               }
+       }
+
+       if (arg & REG_MASK) {
+               if (emit_set_delta(compiler, TMP_REG1, arg & REG_MASK, argw) != SLJIT_ERR_UNSUPPORTED) {
+                       FAIL_IF(compiler->error);
+                       return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG1, reg, 0));
+               }
+               imm = get_imm(argw & ~0x3fc);
+               if (imm) {
+                       EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG1, arg & REG_MASK, imm));
+                       return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG1, reg, (argw & 0x3fc) >> 2));
+               }
+               imm = get_imm(-argw & ~0x3fc);
+               if (imm) {
+                       argw = -argw;
+                       EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(SUB_DP, 0, TMP_REG1, arg & REG_MASK, imm));
+                       return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, TMP_REG1, reg, (argw & 0x3fc) >> 2));
+               }
+       }
+
+       compiler->cache_arg = arg;
+       compiler->cache_argw = argw;
+       if (arg & REG_MASK) {
+               FAIL_IF(load_immediate(compiler, TMP_REG1, argw));
+               EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG3, arg & REG_MASK, reg_map[TMP_REG1]));
+       }
+       else
+               FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
+
+       return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG3, reg, 0));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       sljit_si dst_fr;
+
+       CHECK_ERROR();
+       check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
+       SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100), float_transfer_bit_error);
+
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+       op ^= SLJIT_SINGLE_OP;
+
+       if (GET_OPCODE(op) == SLJIT_CMPD) {
+               if (dst & SLJIT_MEM) {
+                       FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, dst, dstw));
+                       dst = TMP_FREG1;
+               }
+               if (src & SLJIT_MEM) {
+                       FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG2, src, srcw));
+                       src = TMP_FREG2;
+               }
+               EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VCMP_F32, op & SLJIT_SINGLE_OP, dst, src, 0));
+               EMIT_INSTRUCTION(VMRS);
+               return SLJIT_SUCCESS;
+       }
+
+       dst_fr = FAST_IS_REG(dst) ? dst : TMP_FREG1;
+
+       if (src & SLJIT_MEM) {
+               FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, dst_fr, src, srcw));
+               src = dst_fr;
+       }
+
+       switch (GET_OPCODE(op)) {
+               case SLJIT_MOVD:
+                       if (src != dst_fr && dst_fr != TMP_FREG1)
+                               EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VMOV_F32, op & SLJIT_SINGLE_OP, dst_fr, src, 0));
+                       break;
+               case SLJIT_NEGD:
+                       EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VNEG_F32, op & SLJIT_SINGLE_OP, dst_fr, src, 0));
+                       break;
+               case SLJIT_ABSD:
+                       EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VABS_F32, op & SLJIT_SINGLE_OP, dst_fr, src, 0));
+                       break;
+       }
+
+       if (dst_fr == TMP_FREG1) {
+               if (GET_OPCODE(op) == SLJIT_MOVD)
+                       dst_fr = src;
+               FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), dst_fr, dst, dstw));
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       sljit_si dst_fr;
+
+       CHECK_ERROR();
+       check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
+
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+       op ^= SLJIT_SINGLE_OP;
+
+       dst_fr = FAST_IS_REG(dst) ? dst : TMP_FREG1;
+
+       if (src2 & SLJIT_MEM) {
+               FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG2, src2, src2w));
+               src2 = TMP_FREG2;
+       }
+
+       if (src1 & SLJIT_MEM) {
+               FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, src1, src1w));
+               src1 = TMP_FREG1;
+       }
+
+       switch (GET_OPCODE(op)) {
+       case SLJIT_ADDD:
+               EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VADD_F32, op & SLJIT_SINGLE_OP, dst_fr, src2, src1));
+               break;
+
+       case SLJIT_SUBD:
+               EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VSUB_F32, op & SLJIT_SINGLE_OP, dst_fr, src2, src1));
+               break;
+
+       case SLJIT_MULD:
+               EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VMUL_F32, op & SLJIT_SINGLE_OP, dst_fr, src2, src1));
+               break;
+
+       case SLJIT_DIVD:
+               EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VDIV_F32, op & SLJIT_SINGLE_OP, dst_fr, src2, src1));
+               break;
+       }
+
+       if (dst_fr == TMP_FREG1)
+               FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), TMP_FREG1, dst, dstw));
+
+       return SLJIT_SUCCESS;
+}
+
+#undef FPU_LOAD
+#undef EMIT_FPU_DATA_TRANSFER
+#undef EMIT_FPU_OPERATION
+
+/* --------------------------------------------------------------------- */
+/*  Other instructions                                                   */
+/* --------------------------------------------------------------------- */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw)
+{
+       CHECK_ERROR();
+       check_sljit_emit_fast_enter(compiler, dst, dstw);
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+
+       /* For UNUSED dst. Uncommon, but possible. */
+       if (dst == SLJIT_UNUSED)
+               return SLJIT_SUCCESS;
+
+       if (FAST_IS_REG(dst))
+               return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, RM(TMP_REG3)));
+
+       /* Memory. */
+       if (getput_arg_fast(compiler, WORD_DATA, TMP_REG3, dst, dstw))
+               return compiler->error;
+       /* TMP_REG3 is used for caching. */
+       EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG2, SLJIT_UNUSED, RM(TMP_REG3)));
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+       return getput_arg(compiler, WORD_DATA, TMP_REG2, dst, dstw, 0, 0);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw)
+{
+       CHECK_ERROR();
+       check_sljit_emit_fast_return(compiler, src, srcw);
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       if (FAST_IS_REG(src))
+               EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(src)));
+       else if (src & SLJIT_MEM) {
+               if (getput_arg_fast(compiler, WORD_DATA | LOAD_DATA, TMP_REG3, src, srcw))
+                       FAIL_IF(compiler->error);
+               else {
+                       compiler->cache_arg = 0;
+                       compiler->cache_argw = 0;
+                       FAIL_IF(getput_arg(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src, srcw, 0, 0));
+                       EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(TMP_REG2)));
+               }
+       }
+       else if (src & SLJIT_IMM)
+               FAIL_IF(load_immediate(compiler, TMP_REG3, srcw));
+       return push_inst(compiler, BLX | RM(TMP_REG3));
+}
+
+/* --------------------------------------------------------------------- */
+/*  Conditional instructions                                             */
+/* --------------------------------------------------------------------- */
+
+static sljit_uw get_cc(sljit_si type)
+{
+       switch (type) {
+       case SLJIT_C_EQUAL:
+       case SLJIT_C_MUL_NOT_OVERFLOW:
+       case SLJIT_C_FLOAT_EQUAL:
+               return 0x00000000;
+
+       case SLJIT_C_NOT_EQUAL:
+       case SLJIT_C_MUL_OVERFLOW:
+       case SLJIT_C_FLOAT_NOT_EQUAL:
+               return 0x10000000;
+
+       case SLJIT_C_LESS:
+       case SLJIT_C_FLOAT_LESS:
+               return 0x30000000;
+
+       case SLJIT_C_GREATER_EQUAL:
+       case SLJIT_C_FLOAT_GREATER_EQUAL:
+               return 0x20000000;
+
+       case SLJIT_C_GREATER:
+       case SLJIT_C_FLOAT_GREATER:
+               return 0x80000000;
+
+       case SLJIT_C_LESS_EQUAL:
+       case SLJIT_C_FLOAT_LESS_EQUAL:
+               return 0x90000000;
+
+       case SLJIT_C_SIG_LESS:
+               return 0xb0000000;
+
+       case SLJIT_C_SIG_GREATER_EQUAL:
+               return 0xa0000000;
+
+       case SLJIT_C_SIG_GREATER:
+               return 0xc0000000;
+
+       case SLJIT_C_SIG_LESS_EQUAL:
+               return 0xd0000000;
+
+       case SLJIT_C_OVERFLOW:
+       case SLJIT_C_FLOAT_UNORDERED:
+               return 0x60000000;
+
+       case SLJIT_C_NOT_OVERFLOW:
+       case SLJIT_C_FLOAT_ORDERED:
+               return 0x70000000;
+
+       default: /* SLJIT_JUMP */
+               return 0xe0000000;
+       }
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
+{
+       struct sljit_label *label;
+
+       CHECK_ERROR_PTR();
+       check_sljit_emit_label(compiler);
+
+       if (compiler->last_label && compiler->last_label->size == compiler->size)
+               return compiler->last_label;
+
+       label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
+       PTR_FAIL_IF(!label);
+       set_label(label, compiler);
+       return label;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type)
+{
+       struct sljit_jump *jump;
+
+       CHECK_ERROR_PTR();
+       check_sljit_emit_jump(compiler, type);
+
+       jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
+       PTR_FAIL_IF(!jump);
+       set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
+       type &= 0xff;
+
+       /* In ARM, we don't need to touch the arguments. */
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+       if (type >= SLJIT_FAST_CALL)
+               PTR_FAIL_IF(prepare_blx(compiler));
+       PTR_FAIL_IF(push_inst_with_unique_literal(compiler, ((EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0,
+               type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0)) & ~COND_MASK) | get_cc(type), 0));
+
+       if (jump->flags & SLJIT_REWRITABLE_JUMP) {
+               jump->addr = compiler->size;
+               compiler->patches++;
+       }
+
+       if (type >= SLJIT_FAST_CALL) {
+               jump->flags |= IS_BL;
+               PTR_FAIL_IF(emit_blx(compiler));
+       }
+
+       if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
+               jump->addr = compiler->size;
+#else
+       if (type >= SLJIT_FAST_CALL)
+               jump->flags |= IS_BL;
+       PTR_FAIL_IF(emit_imm(compiler, TMP_REG1, 0));
+       PTR_FAIL_IF(push_inst(compiler, (((type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)) & ~COND_MASK) | get_cc(type)));
+       jump->addr = compiler->size;
+#endif
+       return jump;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw)
+{
+       struct sljit_jump *jump;
+
+       CHECK_ERROR();
+       check_sljit_emit_ijump(compiler, type, src, srcw);
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       /* In ARM, we don't need to touch the arguments. */
+       if (!(src & SLJIT_IMM)) {
+               if (FAST_IS_REG(src))
+                       return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(src));
+
+               SLJIT_ASSERT(src & SLJIT_MEM);
+               FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src, srcw));
+               return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG2));
+       }
+
+       jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
+       FAIL_IF(!jump);
+       set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0));
+       jump->u.target = srcw;
+
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+       if (type >= SLJIT_FAST_CALL)
+               FAIL_IF(prepare_blx(compiler));
+       FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0), 0));
+       if (type >= SLJIT_FAST_CALL)
+               FAIL_IF(emit_blx(compiler));
+#else
+       FAIL_IF(emit_imm(compiler, TMP_REG1, 0));
+       FAIL_IF(push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)));
+#endif
+       jump->addr = compiler->size;
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw,
+       sljit_si type)
+{
+       sljit_si dst_r, flags = GET_ALL_FLAGS(op);
+       sljit_uw cc, ins;
+
+       CHECK_ERROR();
+       check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type);
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       if (dst == SLJIT_UNUSED)
+               return SLJIT_SUCCESS;
+
+       op = GET_OPCODE(op);
+       cc = get_cc(type);
+       dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
+
+       if (op < SLJIT_ADD) {
+               EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_r, SLJIT_UNUSED, SRC2_IMM | 0));
+               EMIT_INSTRUCTION((EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_r, SLJIT_UNUSED, SRC2_IMM | 1) & ~COND_MASK) | cc);
+               return (dst_r == TMP_REG2) ? emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw) : SLJIT_SUCCESS;
+       }
+
+       ins = (op == SLJIT_AND ? AND_DP : (op == SLJIT_OR ? ORR_DP : EOR_DP));
+       if ((op == SLJIT_OR || op == SLJIT_XOR) && FAST_IS_REG(dst) && dst == src) {
+               EMIT_INSTRUCTION((EMIT_DATA_PROCESS_INS(ins, 0, dst, dst, SRC2_IMM | 1) & ~COND_MASK) | cc);
+               /* The condition must always be set, even if the ORR/EOR is not executed above. */
+               return (flags & SLJIT_SET_E) ? push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, SET_FLAGS, TMP_REG1, SLJIT_UNUSED, RM(dst))) : SLJIT_SUCCESS;
+       }
+
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+       if (src & SLJIT_MEM) {
+               FAIL_IF(emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
+               src = TMP_REG1;
+               srcw = 0;
+       } else if (src & SLJIT_IMM) {
+               FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
+               src = TMP_REG1;
+               srcw = 0;
+       }
+
+       EMIT_INSTRUCTION((EMIT_DATA_PROCESS_INS(ins, 0, dst_r, src, SRC2_IMM | 1) & ~COND_MASK) | cc);
+       EMIT_INSTRUCTION((EMIT_DATA_PROCESS_INS(ins, 0, dst_r, src, SRC2_IMM | 0) & ~COND_MASK) | (cc ^ 0x10000000));
+       if (dst_r == TMP_REG2)
+               FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG2, dst, dstw, 0, 0));
+
+       return (flags & SLJIT_SET_E) ? push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, SET_FLAGS, TMP_REG1, SLJIT_UNUSED, RM(dst_r))) : SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value)
+{
+       struct sljit_const *const_;
+       sljit_si reg;
+
+       CHECK_ERROR_PTR();
+       check_sljit_emit_const(compiler, dst, dstw, init_value);
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+
+       const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
+       PTR_FAIL_IF(!const_);
+
+       reg = SLOW_IS_REG(dst) ? dst : TMP_REG2;
+
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+       PTR_FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, reg, TMP_PC, 0), init_value));
+       compiler->patches++;
+#else
+       PTR_FAIL_IF(emit_imm(compiler, reg, init_value));
+#endif
+       set_const(const_, compiler);
+
+       if (dst & SLJIT_MEM)
+               PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw));
+       return const_;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
+{
+       inline_set_jump_addr(addr, new_addr, 1);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
+{
+       inline_set_const(addr, new_constant, 1);
+}
diff --git a/ext/pcre/pcrelib/sljit/sljitNativeARM_64.c b/ext/pcre/pcrelib/sljit/sljitNativeARM_64.c
new file mode 100644 (file)
index 0000000..cfd1a38
--- /dev/null
@@ -0,0 +1,1902 @@
+/*
+ *    Stack-less Just-In-Time compiler
+ *
+ *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice, this list of
+ *      conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *      of conditions and the following disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
+{
+       return "ARM-64" SLJIT_CPUINFO;
+}
+
+/* Length of an instruction word */
+typedef sljit_ui sljit_ins;
+
+#define TMP_ZERO       0
+
+#define TMP_REG1       (SLJIT_NO_REGISTERS + 1)
+#define TMP_REG2       (SLJIT_NO_REGISTERS + 2)
+#define TMP_REG3       (SLJIT_NO_REGISTERS + 3)
+#define TMP_REG4       (SLJIT_NO_REGISTERS + 4)
+#define TMP_LR         (SLJIT_NO_REGISTERS + 5)
+#define TMP_SP         (SLJIT_NO_REGISTERS + 6)
+
+#define TMP_FREG1      (0)
+#define TMP_FREG2      (SLJIT_FLOAT_REG6 + 1)
+
+static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 7] = {
+  31, 0, 1, 2, 3, 4, 19, 20, 21, 22, 23, 29, 9, 10, 11, 12, 30, 31
+};
+
+#define W_OP (1 << 31)
+#define RD(rd) (reg_map[rd])
+#define RT(rt) (reg_map[rt])
+#define RN(rn) (reg_map[rn] << 5)
+#define RT2(rt2) (reg_map[rt2] << 10)
+#define RM(rm) (reg_map[rm] << 16)
+#define VD(vd) (vd)
+#define VT(vt) (vt)
+#define VN(vn) ((vn) << 5)
+#define VM(vm) ((vm) << 16)
+
+/* --------------------------------------------------------------------- */
+/*  Instrucion forms                                                     */
+/* --------------------------------------------------------------------- */
+
+#define ADC 0x9a000000
+#define ADD 0x8b000000
+#define ADDI 0x91000000
+#define AND 0x8a000000
+#define ANDI 0x92000000
+#define ASRV 0x9ac02800
+#define B 0x14000000
+#define B_CC 0x54000000
+#define BL 0x94000000
+#define BLR 0xd63f0000
+#define BR 0xd61f0000
+#define BRK 0xd4200000
+#define CBZ 0xb4000000
+#define CLZ 0xdac01000
+#define CSINC 0x9a800400
+#define EOR 0xca000000
+#define EORI 0xd2000000
+#define FABS 0x1e60c000
+#define FADD 0x1e602800
+#define FCMP 0x1e602000
+#define FDIV 0x1e601800
+#define FMOV 0x1e604000
+#define FMUL 0x1e600800
+#define FNEG 0x1e614000
+#define FSUB 0x1e603800
+#define LDRI 0xf9400000
+#define LDP 0xa9400000
+#define LDP_PST 0xa8c00000
+#define LSLV 0x9ac02000
+#define LSRV 0x9ac02400
+#define MADD 0x9b000000
+#define MOVK 0xf2800000
+#define MOVN 0x92800000
+#define MOVZ 0xd2800000
+#define NOP 0xd503201f
+#define ORN 0xaa200000
+#define ORR 0xaa000000
+#define ORRI 0xb2000000
+#define RET 0xd65f0000
+#define SBC 0xda000000
+#define SBFM 0x93000000
+#define SDIV 0x9ac00c00
+#define SMADDL 0x9b200000
+#define SMULH 0x9b403c00
+#define STP 0xa9000000
+#define STP_PRE 0xa9800000
+#define STRI 0xf9000000
+#define STR_FI 0x3d000000
+#define STR_FR 0x3c206800
+#define STUR_FI 0x3c000000
+#define SUB 0xcb000000
+#define SUBI 0xd1000000
+#define SUBS 0xeb000000
+#define UBFM 0xd3000000
+#define UDIV 0x9ac00800
+#define UMULH 0x9bc03c00
+
+/* dest_reg is the absolute name of the register
+   Useful for reordering instructions in the delay slot. */
+static sljit_si push_inst(struct sljit_compiler *compiler, sljit_ins ins)
+{
+       sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
+       FAIL_IF(!ptr);
+       *ptr = ins;
+       compiler->size++;
+       return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_si emit_imm64_const(struct sljit_compiler *compiler, sljit_si dst, sljit_uw imm)
+{
+       FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((imm & 0xffff) << 5)));
+       FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((imm >> 16) & 0xffff) << 5) | (1 << 21)));
+       FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((imm >> 32) & 0xffff) << 5) | (2 << 21)));
+       return push_inst(compiler, MOVK | RD(dst) | ((imm >> 48) << 5) | (3 << 21));
+}
+
+static SLJIT_INLINE void modify_imm64_const(sljit_ins* inst, sljit_uw new_imm)
+{
+       sljit_si dst = inst[0] & 0x1f;
+       SLJIT_ASSERT((inst[0] & 0xffe00000) == MOVZ && (inst[1] & 0xffe00000) == (MOVK | (1 << 21)));
+       inst[0] = MOVZ | dst | ((new_imm & 0xffff) << 5);
+       inst[1] = MOVK | dst | (((new_imm >> 16) & 0xffff) << 5) | (1 << 21);
+       inst[2] = MOVK | dst | (((new_imm >> 32) & 0xffff) << 5) | (2 << 21);
+       inst[3] = MOVK | dst | ((new_imm >> 48) << 5) | (3 << 21);
+}
+
+static SLJIT_INLINE sljit_si detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code)
+{
+       sljit_sw diff;
+       sljit_uw target_addr;
+
+       if (jump->flags & SLJIT_REWRITABLE_JUMP) {
+               jump->flags |= PATCH_ABS64;
+               return 0;
+       }
+
+       if (jump->flags & JUMP_ADDR)
+               target_addr = jump->u.target;
+       else {
+               SLJIT_ASSERT(jump->flags & JUMP_LABEL);
+               target_addr = (sljit_uw)(code + jump->u.label->size);
+       }
+       diff = (sljit_sw)target_addr - (sljit_sw)(code_ptr + 4);
+
+       if (jump->flags & IS_COND) {
+               diff += sizeof(sljit_ins);
+               if (diff <= 0xfffff && diff >= -0x100000) {
+                       code_ptr[-5] ^= (jump->flags & IS_CBZ) ? (0x1 << 24) : 0x1;
+                       jump->addr -= sizeof(sljit_ins);
+                       jump->flags |= PATCH_COND;
+                       return 5;
+               }
+               diff -= sizeof(sljit_ins);
+       }
+
+       if (diff <= 0x7ffffff && diff >= -0x8000000) {
+               jump->flags |= PATCH_B;
+               return 4;
+       }
+
+       if (target_addr <= 0xffffffffl) {
+               if (jump->flags & IS_COND)
+                       code_ptr[-5] -= (2 << 5);
+               code_ptr[-2] = code_ptr[0];
+               return 2;
+       }
+       if (target_addr <= 0xffffffffffffl) {
+               if (jump->flags & IS_COND)
+                       code_ptr[-5] -= (1 << 5);
+               jump->flags |= PATCH_ABS48;
+               code_ptr[-1] = code_ptr[0];
+               return 1;
+       }
+
+       jump->flags |= PATCH_ABS64;
+       return 0;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
+{
+       struct sljit_memory_fragment *buf;
+       sljit_ins *code;
+       sljit_ins *code_ptr;
+       sljit_ins *buf_ptr;
+       sljit_ins *buf_end;
+       sljit_uw word_count;
+       sljit_uw addr;
+       sljit_si dst;
+
+       struct sljit_label *label;
+       struct sljit_jump *jump;
+       struct sljit_const *const_;
+
+       CHECK_ERROR_PTR();
+       check_sljit_generate_code(compiler);
+       reverse_buf(compiler);
+
+       code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins));
+       PTR_FAIL_WITH_EXEC_IF(code);
+       buf = compiler->buf;
+
+       code_ptr = code;
+       word_count = 0;
+       label = compiler->labels;
+       jump = compiler->jumps;
+       const_ = compiler->consts;
+
+       do {
+               buf_ptr = (sljit_ins*)buf->memory;
+               buf_end = buf_ptr + (buf->used_size >> 2);
+               do {
+                       *code_ptr = *buf_ptr++;
+                       /* These structures are ordered by their address. */
+                       SLJIT_ASSERT(!label || label->size >= word_count);
+                       SLJIT_ASSERT(!jump || jump->addr >= word_count);
+                       SLJIT_ASSERT(!const_ || const_->addr >= word_count);
+                       if (label && label->size == word_count) {
+                               label->addr = (sljit_uw)code_ptr;
+                               label->size = code_ptr - code;
+                               label = label->next;
+                       }
+                       if (jump && jump->addr == word_count) {
+                                       jump->addr = (sljit_uw)(code_ptr - 4);
+                                       code_ptr -= detect_jump_type(jump, code_ptr, code);
+                                       jump = jump->next;
+                       }
+                       if (const_ && const_->addr == word_count) {
+                               const_->addr = (sljit_uw)code_ptr;
+                               const_ = const_->next;
+                       }
+                       code_ptr ++;
+                       word_count ++;
+               } while (buf_ptr < buf_end);
+
+               buf = buf->next;
+       } while (buf);
+
+       if (label && label->size == word_count) {
+               label->addr = (sljit_uw)code_ptr;
+               label->size = code_ptr - code;
+               label = label->next;
+       }
+
+       SLJIT_ASSERT(!label);
+       SLJIT_ASSERT(!jump);
+       SLJIT_ASSERT(!const_);
+       SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
+
+       jump = compiler->jumps;
+       while (jump) {
+               do {
+                       addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
+                       buf_ptr = (sljit_ins*)jump->addr;
+                       if (jump->flags & PATCH_B) {
+                               addr = (sljit_sw)(addr - jump->addr) >> 2;
+                               SLJIT_ASSERT((sljit_sw)addr <= 0x1ffffff && (sljit_sw)addr >= -0x2000000);
+                               buf_ptr[0] = ((jump->flags & IS_BL) ? BL : B) | (addr & 0x3ffffff);
+                               if (jump->flags & IS_COND)
+                                       buf_ptr[-1] -= (4 << 5);
+                               break;
+                       }
+                       if (jump->flags & PATCH_COND) {
+                               addr = (sljit_sw)(addr - jump->addr) >> 2;
+                               SLJIT_ASSERT((sljit_sw)addr <= 0x3ffff && (sljit_sw)addr >= -0x40000);
+                               buf_ptr[0] = (buf_ptr[0] & ~0xffffe0) | ((addr & 0x7ffff) << 5);
+                               break;
+                       }
+
+                       SLJIT_ASSERT((jump->flags & (PATCH_ABS48 | PATCH_ABS64)) || addr <= 0xffffffffl);
+                       SLJIT_ASSERT((jump->flags & PATCH_ABS64) || addr <= 0xffffffffffffl);
+
+                       dst = buf_ptr[0] & 0x1f;
+                       buf_ptr[0] = MOVZ | dst | ((addr & 0xffff) << 5);
+                       buf_ptr[1] = MOVK | dst | (((addr >> 16) & 0xffff) << 5) | (1 << 21);
+                       if (jump->flags & (PATCH_ABS48 | PATCH_ABS64))
+                               buf_ptr[2] = MOVK | dst | (((addr >> 32) & 0xffff) << 5) | (2 << 21);
+                       if (jump->flags & PATCH_ABS64)
+                               buf_ptr[3] = MOVK | dst | (((addr >> 48) & 0xffff) << 5) | (3 << 21);
+               } while (0);
+               jump = jump->next;
+       }
+
+       compiler->error = SLJIT_ERR_COMPILED;
+       compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
+       SLJIT_CACHE_FLUSH(code, code_ptr);
+       return code;
+}
+
+/* --------------------------------------------------------------------- */
+/*  Core code generator functions.                                       */
+/* --------------------------------------------------------------------- */
+
+#define COUNT_TRAILING_ZERO(value, result) \
+       result = 0; \
+       if (!(value & 0xffffffff)) { \
+               result += 32; \
+               value >>= 32; \
+       } \
+       if (!(value & 0xffff)) { \
+               result += 16; \
+               value >>= 16; \
+       } \
+       if (!(value & 0xff)) { \
+               result += 8; \
+               value >>= 8; \
+       } \
+       if (!(value & 0xf)) { \
+               result += 4; \
+               value >>= 4; \
+       } \
+       if (!(value & 0x3)) { \
+               result += 2; \
+               value >>= 2; \
+       } \
+       if (!(value & 0x1)) { \
+               result += 1; \
+               value >>= 1; \
+       }
+
+#define LOGICAL_IMM_CHECK 0x100
+
+static sljit_ins logical_imm(sljit_sw imm, sljit_si len)
+{
+       sljit_si negated, ones, right;
+       sljit_uw mask, uimm;
+       sljit_ins ins;
+
+       if (len & LOGICAL_IMM_CHECK) {
+               len &= ~LOGICAL_IMM_CHECK;
+               if (len == 32 && (imm == 0 || imm == -1))
+                       return 0;
+               if (len == 16 && ((sljit_si)imm == 0 || (sljit_si)imm == -1))
+                       return 0;
+       }
+
+       SLJIT_ASSERT((len == 32 && imm != 0 && imm != -1)
+               || (len == 16 && (sljit_si)imm != 0 && (sljit_si)imm != -1));
+       uimm = (sljit_uw)imm;
+       while (1) {
+               if (len <= 0) {
+                       SLJIT_ASSERT_STOP();
+                       return 0;
+               }
+               mask = ((sljit_uw)1 << len) - 1;
+               if ((uimm & mask) != ((uimm >> len) & mask))
+                       break;
+               len >>= 1;
+       }
+
+       len <<= 1;
+
+       negated = 0;
+       if (uimm & 0x1) {
+               negated = 1;
+               uimm = ~uimm;
+       }
+
+       if (len < 64)
+               uimm &= ((sljit_uw)1 << len) - 1;
+
+       /* Unsigned right shift. */
+       COUNT_TRAILING_ZERO(uimm, right);
+
+       /* Signed shift. We also know that the highest bit is set. */
+       imm = (sljit_sw)~uimm;
+       SLJIT_ASSERT(imm < 0);
+
+       COUNT_TRAILING_ZERO(imm, ones);
+
+       if (~imm)
+               return 0;
+
+       if (len == 64)
+               ins = 1 << 22;
+       else
+               ins = (0x3f - ((len << 1) - 1)) << 10;
+
+       if (negated)
+               return ins | ((len - ones - 1) << 10) | ((len - ones - right) << 16);
+
+       return ins | ((ones - 1) << 10) | ((len - right) << 16);
+}
+
+#undef COUNT_TRAILING_ZERO
+
+static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si dst, sljit_sw simm)
+{
+       sljit_uw imm = (sljit_uw)simm;
+       sljit_si i, zeros, ones, first;
+       sljit_ins bitmask;
+
+       if (imm <= 0xffff)
+               return push_inst(compiler, MOVZ | RD(dst) | (imm << 5));
+
+       if (simm >= -0x10000 && simm < 0)
+               return push_inst(compiler, MOVN | RD(dst) | ((~imm & 0xffff) << 5));
+
+       if (imm <= 0xffffffffl) {
+               if ((imm & 0xffff0000l) == 0xffff0000)
+                       return push_inst(compiler, (MOVN ^ W_OP) | RD(dst) | ((~imm & 0xffff) << 5));
+               if ((imm & 0xffff) == 0xffff)
+                       return push_inst(compiler, (MOVN ^ W_OP) | RD(dst) | ((~imm & 0xffff0000l) >> (16 - 5)) | (1 << 21));
+               bitmask = logical_imm(simm, 16);
+               if (bitmask != 0)
+                       return push_inst(compiler, (ORRI ^ W_OP) | RD(dst) | RN(TMP_ZERO) | bitmask);
+       }
+       else {
+               bitmask = logical_imm(simm, 32);
+               if (bitmask != 0)
+                       return push_inst(compiler, ORRI | RD(dst) | RN(TMP_ZERO) | bitmask);
+       }
+
+       if (imm <= 0xffffffffl) {
+               FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((imm & 0xffff) << 5)));
+               return push_inst(compiler, MOVK | RD(dst) | ((imm & 0xffff0000l) >> (16 - 5)) | (1 << 21));
+       }
+
+       if (simm >= -0x100000000l && simm < 0) {
+               FAIL_IF(push_inst(compiler, MOVN | RD(dst) | ((~imm & 0xffff) << 5)));
+               return push_inst(compiler, MOVK | RD(dst) | ((imm & 0xffff0000l) >> (16 - 5)) | (1 << 21));
+       }
+
+       /* A large amount of number can be constructed from ORR and MOVx,
+       but computing them is costly. We don't  */
+
+       zeros = 0;
+       ones = 0;
+       for (i = 4; i > 0; i--) {
+               if ((simm & 0xffff) == 0)
+                       zeros++;
+               if ((simm & 0xffff) == 0xffff)
+                       ones++;
+               simm >>= 16;
+       }
+
+       simm = (sljit_sw)imm;
+       first = 1;
+       if (ones > zeros) {
+               simm = ~simm;
+               for (i = 0; i < 4; i++) {
+                       if (!(simm & 0xffff)) {
+                               simm >>= 16;
+                               continue;
+                       }
+                       if (first) {
+                               first = 0;
+                               FAIL_IF(push_inst(compiler, MOVN | RD(dst) | ((simm & 0xffff) << 5) | (i << 21)));
+                       }
+                       else
+                               FAIL_IF(push_inst(compiler, MOVK | RD(dst) | ((~simm & 0xffff) << 5) | (i << 21)));
+                       simm >>= 16;
+               }
+               return SLJIT_SUCCESS;
+       }
+
+       for (i = 0; i < 4; i++) {
+               if (!(simm & 0xffff)) {
+                       simm >>= 16;
+                       continue;
+               }
+               if (first) {
+                       first = 0;
+                       FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((simm & 0xffff) << 5) | (i << 21)));
+               }
+               else
+                       FAIL_IF(push_inst(compiler, MOVK | RD(dst) | ((simm & 0xffff) << 5) | (i << 21)));
+               simm >>= 16;
+       }
+       return SLJIT_SUCCESS;
+}
+
+#define ARG1_IMM       0x0010000
+#define ARG2_IMM       0x0020000
+#define INT_OP         0x0040000
+#define SET_FLAGS      0x0080000
+#define UNUSED_RETURN  0x0100000
+#define SLOW_DEST      0x0200000
+#define SLOW_SRC1      0x0400000
+#define SLOW_SRC2      0x0800000
+
+#define CHECK_FLAGS(flag_bits) \
+       if (flags & SET_FLAGS) { \
+               inv_bits |= flag_bits; \
+               if (flags & UNUSED_RETURN) \
+                       dst = TMP_ZERO; \
+       }
+
+static sljit_si emit_op_imm(struct sljit_compiler *compiler, sljit_si flags, sljit_si dst, sljit_sw arg1, sljit_sw arg2)
+{
+       /* dst must be register, TMP_REG1
+          arg1 must be register, TMP_REG1, imm
+          arg2 must be register, TMP_REG2, imm */
+       sljit_ins inv_bits = (flags & INT_OP) ? (1 << 31) : 0;
+       sljit_ins inst_bits;
+       sljit_si op = (flags & 0xffff);
+       sljit_si reg;
+       sljit_sw imm, nimm;
+
+       if (SLJIT_UNLIKELY((flags & (ARG1_IMM | ARG2_IMM)) == (ARG1_IMM | ARG2_IMM))) {
+               /* Both are immediates. */
+               flags &= ~ARG1_IMM;
+               if (arg1 == 0 && op != SLJIT_ADD && op != SLJIT_SUB)
+                       arg1 = TMP_ZERO;
+               else {
+                       FAIL_IF(load_immediate(compiler, TMP_REG1, arg1));
+                       arg1 = TMP_REG1;
+               }
+       }
+
+       if (flags & (ARG1_IMM | ARG2_IMM)) {
+               reg = (flags & ARG2_IMM) ? arg1 : arg2;
+               imm = (flags & ARG2_IMM) ? arg2 : arg1;
+
+               switch (op) {
+               case SLJIT_MUL:
+               case SLJIT_NEG:
+               case SLJIT_CLZ:
+               case SLJIT_ADDC:
+               case SLJIT_SUBC:
+                       /* No form with immediate operand (except imm 0, which
+                       is represented by a ZERO register). */
+                       break;
+               case SLJIT_MOV:
+                       SLJIT_ASSERT(!(flags & SET_FLAGS) && (flags & ARG2_IMM) && arg1 == TMP_REG1);
+                       return load_immediate(compiler, dst, imm);
+               case SLJIT_NOT:
+                       SLJIT_ASSERT(flags & ARG2_IMM);
+                       FAIL_IF(load_immediate(compiler, dst, (flags & INT_OP) ? (~imm & 0xffffffff) : ~imm));
+                       goto set_flags;
+               case SLJIT_SUB:
+                       if (flags & ARG1_IMM)
+                               break;
+                       imm = -imm;
+                       /* Fall through. */
+               case SLJIT_ADD:
+                       if (imm == 0) {
+                               CHECK_FLAGS(1 << 29);
+                               return push_inst(compiler, ((op == SLJIT_ADD ? ADDI : SUBI) ^ inv_bits) | RD(dst) | RN(reg));
+                       }
+                       if (imm > 0 && imm <= 0xfff) {
+                               CHECK_FLAGS(1 << 29);
+                               return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | (imm << 10));
+                       }
+                       nimm = -imm;
+                       if (nimm > 0 && nimm <= 0xfff) {
+                               CHECK_FLAGS(1 << 29);
+                               return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | (nimm << 10));
+                       }
+                       if (imm > 0 && imm <= 0xffffff && !(imm & 0xfff)) {
+                               CHECK_FLAGS(1 << 29);
+                               return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | ((imm >> 12) << 10) | (1 << 22));
+                       }
+                       if (nimm > 0 && nimm <= 0xffffff && !(nimm & 0xfff)) {
+                               CHECK_FLAGS(1 << 29);
+                               return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | ((nimm >> 12) << 10) | (1 << 22));
+                       }
+                       if (imm > 0 && imm <= 0xffffff && !(flags & SET_FLAGS)) {
+                               FAIL_IF(push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | ((imm >> 12) << 10) | (1 << 22)));
+                               return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(dst) | ((imm & 0xfff) << 10));
+                       }
+                       if (nimm > 0 && nimm <= 0xffffff && !(flags & SET_FLAGS)) {
+                               FAIL_IF(push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | ((nimm >> 12) << 10) | (1 << 22)));
+                               return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(dst) | ((nimm & 0xfff) << 10));
+                       }
+                       break;
+               case SLJIT_AND:
+                       inst_bits = logical_imm(imm, LOGICAL_IMM_CHECK | ((flags & INT_OP) ? 16 : 32));
+                       if (!inst_bits)
+                               break;
+                       CHECK_FLAGS(3 << 29);
+                       return push_inst(compiler, (ANDI ^ inv_bits) | RD(dst) | RN(reg) | inst_bits);
+               case SLJIT_OR:
+               case SLJIT_XOR:
+                       inst_bits = logical_imm(imm, LOGICAL_IMM_CHECK | ((flags & INT_OP) ? 16 : 32));
+                       if (!inst_bits)
+                               break;
+                       if (op == SLJIT_OR)
+                               inst_bits |= ORRI;
+                       else
+                               inst_bits |= EORI;
+                       FAIL_IF(push_inst(compiler, (inst_bits ^ inv_bits) | RD(dst) | RN(reg)));
+                       goto set_flags;
+               case SLJIT_SHL:
+                       if (flags & ARG1_IMM)
+                               break;
+                       if (flags & INT_OP) {
+                               imm &= 0x1f;
+                               FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | ((-imm & 0x1f) << 16) | ((31 - imm) << 10)));
+                       }
+                       else {
+                               imm &= 0x3f;
+                               FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | (1 << 22) | ((-imm & 0x3f) << 16) | ((63 - imm) << 10)));
+                       }
+                       goto set_flags;
+               case SLJIT_LSHR:
+               case SLJIT_ASHR:
+                       if (flags & ARG1_IMM)
+                               break;
+                       if (op == SLJIT_ASHR)
+                               inv_bits |= 1 << 30;
+                       if (flags & INT_OP) {
+                               imm &= 0x1f;
+                               FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | (imm << 16) | (31 << 10)));
+                       }
+                       else {
+                               imm &= 0x3f;
+                               FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | (1 << 22) | (imm << 16) | (63 << 10)));
+                       }
+                       goto set_flags;
+               default:
+                       SLJIT_ASSERT_STOP();
+                       break;
+               }
+
+               if (flags & ARG2_IMM) {
+                       if (arg2 == 0)
+                               arg2 = TMP_ZERO;
+                       else {
+                               FAIL_IF(load_immediate(compiler, TMP_REG2, arg2));
+                               arg2 = TMP_REG2;
+                       }
+               }
+               else {
+                       if (arg1 == 0)
+                               arg1 = TMP_ZERO;
+                       else {
+                               FAIL_IF(load_immediate(compiler, TMP_REG1, arg1));
+                               arg1 = TMP_REG1;
+                       }
+               }
+       }
+
+       /* Both arguments are registers. */
+       switch (op) {
+       case SLJIT_MOV:
+       case SLJIT_MOV_P:
+       case SLJIT_MOVU:
+       case SLJIT_MOVU_P:
+               SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
+               if (dst == arg2)
+                       return SLJIT_SUCCESS;
+               return push_inst(compiler, ORR | RD(dst) | RN(TMP_ZERO) | RM(arg2));
+       case SLJIT_MOV_UB:
+       case SLJIT_MOVU_UB:
+               SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
+               return push_inst(compiler, (UBFM ^ (1 << 31)) | RD(dst) | RN(arg2) | (7 << 10));
+       case SLJIT_MOV_SB:
+       case SLJIT_MOVU_SB:
+               SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
+               if (!(flags & INT_OP))
+                       inv_bits |= 1 << 22;
+               return push_inst(compiler, (SBFM ^ inv_bits) | RD(dst) | RN(arg2) | (7 << 10));
+       case SLJIT_MOV_UH:
+       case SLJIT_MOVU_UH:
+               SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
+               return push_inst(compiler, (UBFM ^ (1 << 31)) | RD(dst) | RN(arg2) | (15 << 10));
+       case SLJIT_MOV_SH:
+       case SLJIT_MOVU_SH:
+               SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
+               if (!(flags & INT_OP))
+                       inv_bits |= 1 << 22;
+               return push_inst(compiler, (SBFM ^ inv_bits) | RD(dst) | RN(arg2) | (15 << 10));
+       case SLJIT_MOV_UI:
+       case SLJIT_MOVU_UI:
+               SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
+               if ((flags & INT_OP) && dst == arg2)
+                       return SLJIT_SUCCESS;
+               return push_inst(compiler, (ORR ^ (1 << 31)) | RD(dst) | RN(TMP_ZERO) | RM(arg2));
+       case SLJIT_MOV_SI:
+       case SLJIT_MOVU_SI:
+               SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
+               if ((flags & INT_OP) && dst == arg2)
+                       return SLJIT_SUCCESS;
+               return push_inst(compiler, SBFM | (1 << 22) | RD(dst) | RN(arg2) | (31 << 10));
+       case SLJIT_NOT:
+               SLJIT_ASSERT(arg1 == TMP_REG1);
+               FAIL_IF(push_inst(compiler, (ORN ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(arg2)));
+               goto set_flags;
+       case SLJIT_NEG:
+               SLJIT_ASSERT(arg1 == TMP_REG1);
+               if (flags & SET_FLAGS)
+                       inv_bits |= 1 << 29;
+               return push_inst(compiler, (SUB ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(arg2));
+       case SLJIT_CLZ:
+               SLJIT_ASSERT(arg1 == TMP_REG1);
+               FAIL_IF(push_inst(compiler, (CLZ ^ inv_bits) | RD(dst) | RN(arg2)));
+               goto set_flags;
+       case SLJIT_ADD:
+               CHECK_FLAGS(1 << 29);
+               return push_inst(compiler, (ADD ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
+       case SLJIT_ADDC:
+               CHECK_FLAGS(1 << 29);
+               return push_inst(compiler, (ADC ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
+       case SLJIT_SUB:
+               CHECK_FLAGS(1 << 29);
+               return push_inst(compiler, (SUB ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
+       case SLJIT_SUBC:
+               CHECK_FLAGS(1 << 29);
+               return push_inst(compiler, (SBC ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
+       case SLJIT_MUL:
+               if (!(flags & SET_FLAGS))
+                       return push_inst(compiler, (MADD ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2) | RT2(TMP_ZERO));
+               if (flags & INT_OP) {
+                       FAIL_IF(push_inst(compiler, SMADDL | RD(dst) | RN(arg1) | RM(arg2) | (31 << 10)));
+                       FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG4) | RN(TMP_ZERO) | RM(dst) | (2 << 22) | (31 << 10)));
+                       return push_inst(compiler, SUBS | RD(TMP_ZERO) | RN(TMP_REG4) | RM(dst) | (2 << 22) | (63 << 10));
+               }
+               FAIL_IF(push_inst(compiler, SMULH | RD(TMP_REG4) | RN(arg1) | RM(arg2)));
+               FAIL_IF(push_inst(compiler, MADD | RD(dst) | RN(arg1) | RM(arg2) | RT2(TMP_ZERO)));
+               return push_inst(compiler, SUBS | RD(TMP_ZERO) | RN(TMP_REG4) | RM(dst) | (2 << 22) | (63 << 10));
+       case SLJIT_AND:
+               CHECK_FLAGS(3 << 29);
+               return push_inst(compiler, (AND ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
+       case SLJIT_OR:
+               FAIL_IF(push_inst(compiler, (ORR ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
+               goto set_flags;
+       case SLJIT_XOR:
+               FAIL_IF(push_inst(compiler, (EOR ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
+               goto set_flags;
+       case SLJIT_SHL:
+               FAIL_IF(push_inst(compiler, (LSLV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
+               goto set_flags;
+       case SLJIT_LSHR:
+               FAIL_IF(push_inst(compiler, (LSRV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
+               goto set_flags;
+       case SLJIT_ASHR:
+               FAIL_IF(push_inst(compiler, (ASRV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
+               goto set_flags;
+       }
+
+       SLJIT_ASSERT_STOP();
+       return SLJIT_SUCCESS;
+
+set_flags:
+       if (flags & SET_FLAGS)
+               return push_inst(compiler, (SUBS ^ inv_bits) | RD(TMP_ZERO) | RN(dst) | RM(TMP_ZERO));
+       return SLJIT_SUCCESS;
+}
+
+#define STORE          0x01
+#define SIGNED         0x02
+
+#define UPDATE         0x04
+#define ARG_TEST       0x08
+
+#define BYTE_SIZE      0x000
+#define HALF_SIZE      0x100
+#define INT_SIZE       0x200
+#define WORD_SIZE      0x300
+
+#define MEM_SIZE_SHIFT(flags) ((flags) >> 8)
+
+static SLJIT_CONST sljit_ins sljit_mem_imm[4] = {
+/* u l */ 0x39400000 /* ldrb [reg,imm] */,
+/* u s */ 0x39000000 /* strb [reg,imm] */,
+/* s l */ 0x39800000 /* ldrsb [reg,imm] */,
+/* s s */ 0x39000000 /* strb [reg,imm] */,
+};
+
+static SLJIT_CONST sljit_ins sljit_mem_simm[4] = {
+/* u l */ 0x38400000 /* ldurb [reg,imm] */,
+/* u s */ 0x38000000 /* sturb [reg,imm] */,
+/* s l */ 0x38800000 /* ldursb [reg,imm] */,
+/* s s */ 0x38000000 /* sturb [reg,imm] */,
+};
+
+static SLJIT_CONST sljit_ins sljit_mem_pre_simm[4] = {
+/* u l */ 0x38400c00 /* ldrb [reg,imm]! */,
+/* u s */ 0x38000c00 /* strb [reg,imm]! */,
+/* s l */ 0x38800c00 /* ldrsb [reg,imm]! */,
+/* s s */ 0x38000c00 /* strb [reg,imm]! */,
+};
+
+static SLJIT_CONST sljit_ins sljit_mem_reg[4] = {
+/* u l */ 0x38606800 /* ldrb [reg,reg] */,
+/* u s */ 0x38206800 /* strb [reg,reg] */,
+/* s l */ 0x38a06800 /* ldrsb [reg,reg] */,
+/* s s */ 0x38206800 /* strb [reg,reg] */,
+};
+
+/* Helper function. Dst should be reg + value, using at most 1 instruction, flags does not set. */
+static sljit_si emit_set_delta(struct sljit_compiler *compiler, sljit_si dst, sljit_si reg, sljit_sw value)
+{
+       if (value >= 0) {
+               if (value <= 0xfff)
+                       return push_inst(compiler, ADDI | RD(dst) | RN(reg) | (value << 10));
+               if (value <= 0xffffff && !(value & 0xfff))
+                       return push_inst(compiler, ADDI | (1 << 22) | RD(dst) | RN(reg) | (value >> 2));
+       }
+       else {
+               value = -value;
+               if (value <= 0xfff)
+                       return push_inst(compiler, SUBI | RD(dst) | RN(reg) | (value << 10));
+               if (value <= 0xffffff && !(value & 0xfff))
+                       return push_inst(compiler, SUBI | (1 << 22) | RD(dst) | RN(reg) | (value >> 2));
+       }
+       return SLJIT_ERR_UNSUPPORTED;
+}
+
+/* Can perform an operation using at most 1 instruction. */
+static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw)
+{
+       sljit_ui shift = MEM_SIZE_SHIFT(flags);
+
+       SLJIT_ASSERT(arg & SLJIT_MEM);
+
+       if (SLJIT_UNLIKELY(flags & UPDATE)) {
+               if ((arg & REG_MASK) && !(arg & OFFS_REG_MASK) && argw <= 255 && argw >= -256) {
+                       if (SLJIT_UNLIKELY(flags & ARG_TEST))
+                               return 1;
+
+                       arg &= REG_MASK;
+                       argw &= 0x1ff;
+                       FAIL_IF(push_inst(compiler, sljit_mem_pre_simm[flags & 0x3]
+                               | (shift << 30) | RT(reg) | RN(arg) | (argw << 12)));
+                       return -1;
+               }
+               return 0;
+       }
+
+       if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
+               argw &= 0x3;
+               if (argw && argw != shift)
+                       return 0;
+
+               if (SLJIT_UNLIKELY(flags & ARG_TEST))
+                       return 1;
+
+               FAIL_IF(push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg)
+                       | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw ? (1 << 12) : 0)));
+               return -1;
+       }
+
+       arg &= REG_MASK;
+       if (argw >= 0 && (argw >> shift) <= 0xfff && (argw & ((1 << shift) - 1)) == 0) {
+               if (SLJIT_UNLIKELY(flags & ARG_TEST))
+                       return 1;
+
+               FAIL_IF(push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30)
+                       | RT(reg) | RN(arg) | (argw << (10 - shift))));
+               return -1;
+       }
+
+       if (argw > 255 || argw < -256)
+               return 0;
+
+       if (SLJIT_UNLIKELY(flags & ARG_TEST))
+               return 1;
+
+       FAIL_IF(push_inst(compiler, sljit_mem_simm[flags & 0x3] | (shift << 30)
+               | RT(reg) | RN(arg) | ((argw & 0x1ff) << 12)));
+       return -1;
+}
+
+/* see getput_arg below.
+   Note: can_cache is called only for binary operators. Those
+   operators always uses word arguments without write back. */
+static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw)
+{
+       sljit_sw diff;
+       if ((arg & OFFS_REG_MASK) || !(next_arg & SLJIT_MEM))
+               return 0;
+
+       if (!(arg & REG_MASK)) {
+               diff = argw - next_argw;
+               if (diff <= 0xfff && diff >= -0xfff)
+                       return 1;
+               return 0;
+       }
+
+       if (argw == next_argw)
+               return 1;
+
+       diff = argw - next_argw;
+       if (arg == next_arg && diff <= 0xfff && diff >= -0xfff)
+               return 1;
+
+       return 0;
+}
+
+/* Emit the necessary instructions. See can_cache above. */
+static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg,
+       sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw)
+{
+       sljit_ui shift = MEM_SIZE_SHIFT(flags);
+       sljit_si tmp_r, other_r;
+       sljit_sw diff;
+
+       SLJIT_ASSERT(arg & SLJIT_MEM);
+       if (!(next_arg & SLJIT_MEM)) {
+               next_arg = 0;
+               next_argw = 0;
+       }
+
+       tmp_r = (flags & STORE) ? TMP_REG3 : reg;
+
+       if (SLJIT_UNLIKELY((flags & UPDATE) && (arg & REG_MASK))) {
+               /* Update only applies if a base register exists. */
+               other_r = OFFS_REG(arg);
+               if (!other_r) {
+                       other_r = arg & REG_MASK;
+                       if (other_r != reg && argw >= 0 && argw <= 0xffffff) {
+                               if ((argw & 0xfff) != 0)
+                                       FAIL_IF(push_inst(compiler, ADDI | RD(other_r) | RN(other_r) | ((argw & 0xfff) << 10)));
+                               if (argw >> 12)
+                                       FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(other_r) | RN(other_r) | ((argw >> 12) << 10)));
+                               return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(other_r));
+                       }
+                       else if (other_r != reg && argw < 0 && argw >= -0xffffff) {
+                               argw = -argw;
+                               if ((argw & 0xfff) != 0)
+                                       FAIL_IF(push_inst(compiler, SUBI | RD(other_r) | RN(other_r) | ((argw & 0xfff) << 10)));
+                               if (argw >> 12)
+                                       FAIL_IF(push_inst(compiler, SUBI | (1 << 22) | RD(other_r) | RN(other_r) | ((argw >> 12) << 10)));
+                               return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(other_r));
+                       }
+
+                       if (compiler->cache_arg == SLJIT_MEM) {
+                               if (argw == compiler->cache_argw) {
+                                       other_r = TMP_REG3;
+                                       argw = 0;
+                               }
+                               else if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) {
+                                       FAIL_IF(compiler->error);
+                                       compiler->cache_argw = argw;
+                                       other_r = TMP_REG3;
+                                       argw = 0;
+                               }
+                       }
+
+                       if (argw) {
+                               FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
+                               compiler->cache_arg = SLJIT_MEM;
+                               compiler->cache_argw = argw;
+                               other_r = TMP_REG3;
+                               argw = 0;
+                       }
+               }
+
+               /* No caching here. */
+               arg &= REG_MASK;
+               argw &= 0x3;
+               if (!argw || argw == shift) {
+                       FAIL_IF(push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg) | RM(other_r) | (argw ? (1 << 12) : 0)));
+                       return push_inst(compiler, ADD | RD(arg) | RN(arg) | RM(other_r) | (argw << 10));
+               }
+               if (arg != reg) {
+                       FAIL_IF(push_inst(compiler, ADD | RD(arg) | RN(arg) | RM(other_r) | (argw << 10)));
+                       return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg));
+               }
+               FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG4) | RN(arg) | RM(other_r) | (argw << 10)));
+               FAIL_IF(push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(TMP_REG4)));
+               return push_inst(compiler, ORR | RD(arg) | RN(TMP_ZERO) | RM(TMP_REG4));
+       }
+
+       if (arg & OFFS_REG_MASK) {
+               other_r = OFFS_REG(arg);
+               arg &= REG_MASK;
+               FAIL_IF(push_inst(compiler, ADD | RD(tmp_r) | RN(arg) | RM(other_r) | ((argw & 0x3) << 10)));
+               return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(tmp_r));
+       }
+
+       if (compiler->cache_arg == arg) {
+               diff = argw - compiler->cache_argw;
+               if (diff <= 255 && diff >= -256)
+                       return push_inst(compiler, sljit_mem_simm[flags & 0x3] | (shift << 30)
+                               | RT(reg) | RN(TMP_REG3) | ((diff & 0x1ff) << 12));
+               if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, diff) != SLJIT_ERR_UNSUPPORTED) {
+                       FAIL_IF(compiler->error);
+                       return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg));
+               }
+       }
+
+       if (argw >= 0 && argw <= 0xffffff && (argw & ((1 << shift) - 1)) == 0) {
+               FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(tmp_r) | RN(arg & REG_MASK) | ((argw >> 12) << 10)));
+               return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30)
+                       | RT(reg) | RN(tmp_r) | ((argw & 0xfff) << (10 - shift)));
+       }
+
+       diff = argw - next_argw;
+       next_arg = (arg & REG_MASK) && (arg == next_arg) && diff <= 0xfff && diff >= -0xfff && diff != 0;
+       arg &= REG_MASK;
+
+       if (arg && compiler->cache_arg == SLJIT_MEM) {
+               if (compiler->cache_argw == argw)
+                       return push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg) | RM(TMP_REG3));
+               if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) {
+                       FAIL_IF(compiler->error);
+                       compiler->cache_argw = argw;
+                       return push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg) | RM(TMP_REG3));
+               }
+       }
+
+       compiler->cache_argw = argw;
+       if (next_arg && emit_set_delta(compiler, TMP_REG3, arg, argw) != SLJIT_ERR_UNSUPPORTED) {
+               FAIL_IF(compiler->error);
+               compiler->cache_arg = SLJIT_MEM | arg;
+               arg = 0;
+       }
+       else {
+               FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
+               compiler->cache_arg = SLJIT_MEM;
+
+               if (next_arg) {
+                       FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG3) | RN(TMP_REG3) | RM(arg)));
+                       compiler->cache_arg = SLJIT_MEM | arg;
+                       arg = 0;
+               }
+       }
+
+       if (arg)
+               return push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg) | RM(TMP_REG3));
+       return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(TMP_REG3));
+}
+
+static SLJIT_INLINE sljit_si emit_op_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw)
+{
+       if (getput_arg_fast(compiler, flags, reg, arg, argw))
+               return compiler->error;
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+       return getput_arg(compiler, flags, reg, arg, argw, 0, 0);
+}
+
+static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg1, sljit_sw arg1w, sljit_si arg2, sljit_sw arg2w)
+{
+       if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
+               return compiler->error;
+       return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
+}
+
+/* --------------------------------------------------------------------- */
+/*  Entry, exit                                                          */
+/* --------------------------------------------------------------------- */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
+{
+       CHECK_ERROR();
+       check_sljit_emit_enter(compiler, args, scratches, saveds, local_size);
+
+       compiler->scratches = scratches;
+       compiler->saveds = saveds;
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       compiler->logical_local_size = local_size;
+#endif
+       compiler->locals_offset = (2 + saveds) * sizeof(sljit_sw);
+       local_size = (compiler->locals_offset + local_size + 15) & ~15;
+       compiler->local_size = local_size;
+
+       if (local_size <= (64 << 3))
+               FAIL_IF(push_inst(compiler, STP_PRE | 29 | RT2(TMP_LR)
+                       | RN(TMP_SP) | ((-(local_size >> 3) & 0x7f) << 15)));
+       else {
+               local_size -= (64 << 3);
+               if (local_size > 0xfff) {
+                       FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | ((local_size >> 12) << 10) | (1 << 22)));
+                       local_size &= 0xfff;
+               }
+               if (local_size)
+                       FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | (local_size << 10)));
+               FAIL_IF(push_inst(compiler, STP_PRE | 29 | RT2(TMP_LR) | RN(TMP_SP) | (0x40 << 15)));
+       }
+
+       FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_LOCALS_REG) | RN(TMP_SP)));
+
+       if (saveds >= 2)
+               FAIL_IF(push_inst(compiler, STP | RT(SLJIT_SAVED_REG1) | RT2(SLJIT_SAVED_REG2) | RN(TMP_SP) | (2 << 15)));
+       if (saveds >= 4)
+               FAIL_IF(push_inst(compiler, STP | RT(SLJIT_SAVED_REG3) | RT2(SLJIT_SAVED_EREG1) | RN(TMP_SP) | (4 << 15)));
+       if (saveds == 1)
+               FAIL_IF(push_inst(compiler, STRI | RT(SLJIT_SAVED_REG1) | RN(TMP_SP) | (2 << 10)));
+       if (saveds == 3)
+               FAIL_IF(push_inst(compiler, STRI | RT(SLJIT_SAVED_REG3) | RN(TMP_SP) | (4 << 10)));
+       if (saveds == 5)
+               FAIL_IF(push_inst(compiler, STRI | RT(SLJIT_SAVED_EREG2) | RN(TMP_SP) | (6 << 10)));
+
+       if (args >= 1)
+               FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_SAVED_REG1) | RN(TMP_ZERO) | RM(SLJIT_SCRATCH_REG1)));
+       if (args >= 2)
+               FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_SAVED_REG2) | RN(TMP_ZERO) | RM(SLJIT_SCRATCH_REG2)));
+       if (args >= 3)
+               FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_SAVED_REG3) | RN(TMP_ZERO) | RM(SLJIT_SCRATCH_REG3)));
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
+{
+       CHECK_ERROR_VOID();
+       check_sljit_set_context(compiler, args, scratches, saveds, local_size);
+
+       compiler->scratches = scratches;
+       compiler->saveds = saveds;
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       compiler->logical_local_size = local_size;
+#endif
+       compiler->locals_offset = (2 + saveds) * sizeof(sljit_sw);
+       compiler->local_size = (compiler->locals_offset + local_size + 15) & ~15;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
+{
+       sljit_si saveds, local_size;
+
+       CHECK_ERROR();
+       check_sljit_emit_return(compiler, op, src, srcw);
+
+       FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
+
+       saveds = compiler->saveds;
+
+       if (saveds >= 2)
+               FAIL_IF(push_inst(compiler, LDP | RT(SLJIT_SAVED_REG1) | RT2(SLJIT_SAVED_REG2) | RN(TMP_SP) | (2 << 15)));
+       if (saveds >= 4)
+               FAIL_IF(push_inst(compiler, LDP | RT(SLJIT_SAVED_REG3) | RT2(SLJIT_SAVED_EREG1) | RN(TMP_SP) | (4 << 15)));
+       if (saveds == 1)
+               FAIL_IF(push_inst(compiler, LDRI | RT(SLJIT_SAVED_REG1) | RN(TMP_SP) | (2 << 10)));
+       if (saveds == 3)
+               FAIL_IF(push_inst(compiler, LDRI | RT(SLJIT_SAVED_REG3) | RN(TMP_SP) | (4 << 10)));
+       if (saveds == 5)
+               FAIL_IF(push_inst(compiler, LDRI | RT(SLJIT_SAVED_EREG2) | RN(TMP_SP) | (6 << 10)));
+
+       local_size = compiler->local_size;
+
+       if (local_size <= (62 << 3))
+               FAIL_IF(push_inst(compiler, LDP_PST | 29 | RT2(TMP_LR)
+                       | RN(TMP_SP) | (((local_size >> 3) & 0x7f) << 15)));
+       else {
+               FAIL_IF(push_inst(compiler, LDP_PST | 29 | RT2(TMP_LR) | RN(TMP_SP) | (0x3e << 15)));
+               local_size -= (62 << 3);
+               if (local_size > 0xfff) {
+                       FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | ((local_size >> 12) << 10) | (1 << 22)));
+                       local_size &= 0xfff;
+               }
+               if (local_size)
+                       FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | (local_size << 10)));
+       }
+
+       FAIL_IF(push_inst(compiler, RET | RN(TMP_LR)));
+       return SLJIT_SUCCESS;
+}
+
+/* --------------------------------------------------------------------- */
+/*  Operators                                                            */
+/* --------------------------------------------------------------------- */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op)
+{
+       sljit_ins inv_bits = (op & SLJIT_INT_OP) ? (1 << 31) : 0;
+
+       CHECK_ERROR();
+       check_sljit_emit_op0(compiler, op);
+
+       op = GET_OPCODE(op);
+       switch (op) {
+       case SLJIT_BREAKPOINT:
+               return push_inst(compiler, BRK);
+       case SLJIT_NOP:
+               return push_inst(compiler, NOP);
+       case SLJIT_UMUL:
+       case SLJIT_SMUL:
+               FAIL_IF(push_inst(compiler, ORR | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_SCRATCH_REG1)));
+               FAIL_IF(push_inst(compiler, MADD | RD(SLJIT_SCRATCH_REG1) | RN(SLJIT_SCRATCH_REG1) | RM(SLJIT_SCRATCH_REG2) | RT2(TMP_ZERO)));
+               return push_inst(compiler, (op == SLJIT_SMUL ? SMULH : UMULH) | RD(SLJIT_SCRATCH_REG2) | RN(TMP_REG1) | RM(SLJIT_SCRATCH_REG2));
+       case SLJIT_UDIV:
+       case SLJIT_SDIV:
+               FAIL_IF(push_inst(compiler, (ORR ^ inv_bits) | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_SCRATCH_REG1)));
+               FAIL_IF(push_inst(compiler, ((op == SLJIT_SDIV ? SDIV : UDIV) ^ inv_bits) | RD(SLJIT_SCRATCH_REG1) | RN(SLJIT_SCRATCH_REG1) | RM(SLJIT_SCRATCH_REG2)));
+               FAIL_IF(push_inst(compiler, (MADD ^ inv_bits) | RD(SLJIT_SCRATCH_REG2) | RN(SLJIT_SCRATCH_REG1) | RM(SLJIT_SCRATCH_REG2) | RT2(TMP_ZERO)));
+               return push_inst(compiler, (SUB ^ inv_bits) | RD(SLJIT_SCRATCH_REG2) | RN(TMP_REG1) | RM(SLJIT_SCRATCH_REG2));
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       sljit_si dst_r, flags, mem_flags;
+       sljit_si op_flags = GET_ALL_FLAGS(op);
+
+       CHECK_ERROR();
+       check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+
+       dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
+
+       op = GET_OPCODE(op);
+       if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) {
+               switch (op) {
+               case SLJIT_MOV:
+               case SLJIT_MOV_P:
+                       flags = WORD_SIZE;
+                       break;
+               case SLJIT_MOV_UB:
+                       flags = BYTE_SIZE;
+                       if (src & SLJIT_IMM)
+                               srcw = (sljit_ub)srcw;
+                       break;
+               case SLJIT_MOV_SB:
+                       flags = BYTE_SIZE | SIGNED;
+                       if (src & SLJIT_IMM)
+                               srcw = (sljit_sb)srcw;
+                       break;
+               case SLJIT_MOV_UH:
+                       flags = HALF_SIZE;
+                       if (src & SLJIT_IMM)
+                               srcw = (sljit_uh)srcw;
+                       break;
+               case SLJIT_MOV_SH:
+                       flags = HALF_SIZE | SIGNED;
+                       if (src & SLJIT_IMM)
+                               srcw = (sljit_sh)srcw;
+                       break;
+               case SLJIT_MOV_UI:
+                       flags = INT_SIZE;
+                       if (src & SLJIT_IMM)
+                               srcw = (sljit_ui)srcw;
+                       break;
+               case SLJIT_MOV_SI:
+                       flags = INT_SIZE | SIGNED;
+                       if (src & SLJIT_IMM)
+                               srcw = (sljit_si)srcw;
+                       break;
+               case SLJIT_MOVU:
+               case SLJIT_MOVU_P:
+                       flags = WORD_SIZE | UPDATE;
+                       break;
+               case SLJIT_MOVU_UB:
+                       flags = BYTE_SIZE | UPDATE;
+                       if (src & SLJIT_IMM)
+                               srcw = (sljit_ub)srcw;
+                       break;
+               case SLJIT_MOVU_SB:
+                       flags = BYTE_SIZE | SIGNED | UPDATE;
+                       if (src & SLJIT_IMM)
+                               srcw = (sljit_sb)srcw;
+                       break;
+               case SLJIT_MOVU_UH:
+                       flags = HALF_SIZE | UPDATE;
+                       if (src & SLJIT_IMM)
+                               srcw = (sljit_uh)srcw;
+                       break;
+               case SLJIT_MOVU_SH:
+                       flags = HALF_SIZE | SIGNED | UPDATE;
+                       if (src & SLJIT_IMM)
+                               srcw = (sljit_sh)srcw;
+                       break;
+               case SLJIT_MOVU_UI:
+                       flags = INT_SIZE | UPDATE;
+                       if (src & SLJIT_IMM)
+                               srcw = (sljit_ui)srcw;
+                       break;
+               case SLJIT_MOVU_SI:
+                       flags = INT_SIZE | SIGNED | UPDATE;
+                       if (src & SLJIT_IMM)
+                               srcw = (sljit_si)srcw;
+                       break;
+               default:
+                       SLJIT_ASSERT_STOP();
+                       flags = 0;
+                       break;
+               }
+
+               if (src & SLJIT_IMM)
+                       FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG1, srcw));
+               else if (src & SLJIT_MEM) {
+                       if (getput_arg_fast(compiler, flags, dst_r, src, srcw))
+                               FAIL_IF(compiler->error);
+                       else
+                               FAIL_IF(getput_arg(compiler, flags, dst_r, src, srcw, dst, dstw));
+               } else {
+                       if (dst_r != TMP_REG1)
+                               return emit_op_imm(compiler, op | ((op_flags & SLJIT_INT_OP) ? INT_OP : 0), dst_r, TMP_REG1, src);
+                       dst_r = src;
+               }
+
+               if (dst & SLJIT_MEM) {
+                       if (getput_arg_fast(compiler, flags | STORE, dst_r, dst, dstw))
+                               return compiler->error;
+                       else
+                               return getput_arg(compiler, flags | STORE, dst_r, dst, dstw, 0, 0);
+               }
+               return SLJIT_SUCCESS;
+       }
+
+       flags = GET_FLAGS(op_flags) ? SET_FLAGS : 0;
+       mem_flags = WORD_SIZE;
+       if (op_flags & SLJIT_INT_OP) {
+               flags |= INT_OP;
+               mem_flags = INT_SIZE;
+       }
+
+       if (dst == SLJIT_UNUSED)
+               flags |= UNUSED_RETURN;
+
+       if (src & SLJIT_MEM) {
+               if (getput_arg_fast(compiler, mem_flags, TMP_REG2, src, srcw))
+                       FAIL_IF(compiler->error);
+               else
+                       FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG2, src, srcw, dst, dstw));
+               src = TMP_REG2;
+       }
+
+       if (src & SLJIT_IMM) {
+               flags |= ARG2_IMM;
+               if (op_flags & SLJIT_INT_OP)
+                       srcw = (sljit_si)srcw;
+       } else
+               srcw = src;
+
+       emit_op_imm(compiler, flags | op, dst_r, TMP_REG1, srcw);
+
+       if (dst & SLJIT_MEM) {
+               if (getput_arg_fast(compiler, mem_flags | STORE, dst_r, dst, dstw))
+                       return compiler->error;
+               else
+                       return getput_arg(compiler, mem_flags | STORE, dst_r, dst, dstw, 0, 0);
+       }
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       sljit_si dst_r, flags, mem_flags;
+
+       CHECK_ERROR();
+       check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+       ADJUST_LOCAL_OFFSET(src1, src1w);
+       ADJUST_LOCAL_OFFSET(src2, src2w);
+
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+
+       dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
+       flags = GET_FLAGS(op) ? SET_FLAGS : 0;
+       mem_flags = WORD_SIZE;
+       if (op & SLJIT_INT_OP) {
+               flags |= INT_OP;
+               mem_flags = INT_SIZE;
+       }
+
+       if (dst == SLJIT_UNUSED)
+               flags |= UNUSED_RETURN;
+
+       if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, mem_flags | STORE | ARG_TEST, TMP_REG1, dst, dstw))
+               flags |= SLOW_DEST;
+
+       if (src1 & SLJIT_MEM) {
+               if (getput_arg_fast(compiler, mem_flags, TMP_REG1, src1, src1w))
+                       FAIL_IF(compiler->error);
+               else
+                       flags |= SLOW_SRC1;
+       }
+       if (src2 & SLJIT_MEM) {
+               if (getput_arg_fast(compiler, mem_flags, TMP_REG2, src2, src2w))
+                       FAIL_IF(compiler->error);
+               else
+                       flags |= SLOW_SRC2;
+       }
+
+       if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
+               if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
+                       FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG2, src2, src2w, src1, src1w));
+                       FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG1, src1, src1w, dst, dstw));
+               }
+               else {
+                       FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG1, src1, src1w, src2, src2w));
+                       FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG2, src2, src2w, dst, dstw));
+               }
+       }
+       else if (flags & SLOW_SRC1)
+               FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG1, src1, src1w, dst, dstw));
+       else if (flags & SLOW_SRC2)
+               FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG2, src2, src2w, dst, dstw));
+
+       if (src1 & SLJIT_MEM)
+               src1 = TMP_REG1;
+       if (src2 & SLJIT_MEM)
+               src2 = TMP_REG2;
+
+       if (src1 & SLJIT_IMM)
+               flags |= ARG1_IMM;
+       else
+               src1w = src1;
+       if (src2 & SLJIT_IMM)
+               flags |= ARG2_IMM;
+       else
+               src2w = src2;
+
+       emit_op_imm(compiler, flags | GET_OPCODE(op), dst_r, src1w, src2w);
+
+       if (dst & SLJIT_MEM) {
+               if (!(flags & SLOW_DEST)) {
+                       getput_arg_fast(compiler, mem_flags | STORE, dst_r, dst, dstw);
+                       return compiler->error;
+               }
+               return getput_arg(compiler, mem_flags | STORE, TMP_REG1, dst, dstw, 0, 0);
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
+{
+       check_sljit_get_register_index(reg);
+       return reg_map[reg];
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg)
+{
+       check_sljit_get_float_register_index(reg);
+       return reg;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
+       void *instruction, sljit_si size)
+{
+       CHECK_ERROR();
+       check_sljit_emit_op_custom(compiler, instruction, size);
+       SLJIT_ASSERT(size == 4);
+
+       return push_inst(compiler, *(sljit_ins*)instruction);
+}
+
+/* --------------------------------------------------------------------- */
+/*  Floating point operators                                             */
+/* --------------------------------------------------------------------- */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
+{
+#ifdef SLJIT_IS_FPU_AVAILABLE
+       return SLJIT_IS_FPU_AVAILABLE;
+#else
+       /* Available by default. */
+       return 1;
+#endif
+}
+
+static sljit_si emit_fop_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw)
+{
+       sljit_ui shift = MEM_SIZE_SHIFT(flags);
+       sljit_ins ins_bits = (shift << 30);
+       sljit_si other_r;
+       sljit_sw diff;
+
+       SLJIT_ASSERT(arg & SLJIT_MEM);
+
+       if (!(flags & STORE))
+               ins_bits |= 1 << 22;
+
+       if (arg & OFFS_REG_MASK) {
+               argw &= 3;
+               if (!argw || argw == shift)
+                       return push_inst(compiler, STR_FR | ins_bits | VT(reg)
+                               | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw ? (1 << 12) : 0));
+               other_r = OFFS_REG(arg);
+               arg &= REG_MASK;
+               FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg) | RM(other_r) | (argw << 10)));
+               arg = TMP_REG1;
+               argw = 0;
+       }
+
+       arg &= REG_MASK;
+       if (arg && argw >= 0 && ((argw >> shift) <= 0xfff) && (argw & ((1 << shift) - 1)) == 0)
+               return push_inst(compiler, STR_FI | ins_bits | VT(reg) | RN(arg) | (argw << (10 - shift)));
+
+       if (arg && argw <= 255 && argw >= -256)
+               return push_inst(compiler, STUR_FI | ins_bits | VT(reg) | RN(arg) | ((argw & 0x1ff) << 12));
+
+       /* Slow cases */
+       if (compiler->cache_arg == SLJIT_MEM && argw != compiler->cache_argw) {
+               diff = argw - compiler->cache_argw;
+               if (!arg && diff <= 255 && diff >= -256)
+                       return push_inst(compiler, STUR_FI | ins_bits | VT(reg) | RN(TMP_REG3) | ((diff & 0x1ff) << 12));
+               if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) {
+                       FAIL_IF(compiler->error);
+                       compiler->cache_argw = argw;
+               }
+       }
+
+       if (compiler->cache_arg != SLJIT_MEM || argw != compiler->cache_argw) {
+               compiler->cache_arg = SLJIT_MEM;
+               compiler->cache_argw = argw;
+               FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
+       }
+
+       if (arg & REG_MASK)
+               return push_inst(compiler, STR_FR | ins_bits | VT(reg) | RN(arg) | RM(TMP_REG3));
+       return push_inst(compiler, STR_FI | ins_bits | VT(reg) | RN(TMP_REG3));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       sljit_si dst_r, mem_flags = (op & SLJIT_SINGLE_OP) ? INT_SIZE : WORD_SIZE;
+       sljit_ins inv_bits = (op & SLJIT_SINGLE_OP) ? (1 << 22) : 0;
+
+       CHECK_ERROR();
+       check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
+
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+
+       if (GET_OPCODE(op) == SLJIT_CMPD) {
+               if (dst & SLJIT_MEM) {
+                       emit_fop_mem(compiler, mem_flags, TMP_FREG1, dst, dstw);
+                       dst = TMP_FREG1;
+               }
+               if (src & SLJIT_MEM) {
+                       emit_fop_mem(compiler, mem_flags, TMP_FREG2, src, srcw);
+                       src = TMP_FREG2;
+               }
+               return push_inst(compiler, (FCMP ^ inv_bits) | VN(dst) | VM(src));
+       }
+
+       dst_r = (dst <= REG_MASK) ? dst : TMP_FREG1;
+       if (src & SLJIT_MEM) {
+               emit_fop_mem(compiler, mem_flags, dst_r, src, srcw);
+               src = dst_r;
+       }
+
+       switch (GET_OPCODE(op)) {
+       case SLJIT_MOVD:
+               if (src != dst_r)
+                       FAIL_IF(push_inst(compiler, (FMOV ^ inv_bits) | VD(dst_r) | VN(src)));
+               break;
+       case SLJIT_NEGD:
+               FAIL_IF(push_inst(compiler, (FNEG ^ inv_bits) | VD(dst_r) | VN(src)));
+               break;
+       case SLJIT_ABSD:
+               FAIL_IF(push_inst(compiler, (FABS ^ inv_bits) | VD(dst_r) | VN(src)));
+               break;
+       }
+
+       if (!(dst & SLJIT_MEM))
+               return SLJIT_SUCCESS;
+       return emit_fop_mem(compiler, mem_flags | STORE, TMP_FREG1, dst, dstw);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       sljit_si dst_r, mem_flags = (op & SLJIT_SINGLE_OP) ? INT_SIZE : WORD_SIZE;
+       sljit_ins inv_bits = (op & SLJIT_SINGLE_OP) ? (1 << 22) : 0;
+
+       CHECK_ERROR();
+       check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
+
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+
+       dst_r = (dst <= REG_MASK) ? dst : TMP_FREG1;
+       if (src1 & SLJIT_MEM) {
+               emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w);
+               src1 = TMP_FREG1;
+       }
+       if (src2 & SLJIT_MEM) {
+               emit_fop_mem(compiler, mem_flags, TMP_FREG2, src2, src2w);
+               src2 = TMP_FREG2;
+       }
+
+       switch (GET_OPCODE(op)) {
+       case SLJIT_ADDD:
+               FAIL_IF(push_inst(compiler, (FADD ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2)));
+               break;
+       case SLJIT_SUBD:
+               FAIL_IF(push_inst(compiler, (FSUB ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2)));
+               break;
+       case SLJIT_MULD:
+               FAIL_IF(push_inst(compiler, (FMUL ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2)));
+               break;
+       case SLJIT_DIVD:
+               FAIL_IF(push_inst(compiler, (FDIV ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2)));
+               break;
+       }
+
+       if (!(dst & SLJIT_MEM))
+               return SLJIT_SUCCESS;
+       return emit_fop_mem(compiler, mem_flags | STORE, TMP_FREG1, dst, dstw);
+}
+
+/* --------------------------------------------------------------------- */
+/*  Other instructions                                                   */
+/* --------------------------------------------------------------------- */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw)
+{
+       CHECK_ERROR();
+       check_sljit_emit_fast_enter(compiler, dst, dstw);
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+
+       /* For UNUSED dst. Uncommon, but possible. */
+       if (dst == SLJIT_UNUSED)
+               return SLJIT_SUCCESS;
+
+       if (dst <= REG_MASK)
+               return push_inst(compiler, ORR | RD(dst) | RN(TMP_ZERO) | RM(TMP_LR));
+
+       /* Memory. */
+       return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_LR, dst, dstw);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw)
+{
+       CHECK_ERROR();
+       check_sljit_emit_fast_return(compiler, src, srcw);
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       if (src <= REG_MASK)
+               FAIL_IF(push_inst(compiler, ORR | RD(TMP_LR) | RN(TMP_ZERO) | RM(src)));
+       else if (src & SLJIT_MEM)
+               FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_LR, src, srcw));
+       else if (src & SLJIT_IMM)
+               FAIL_IF(load_immediate(compiler, TMP_LR, srcw));
+
+       return push_inst(compiler, RET | RN(TMP_LR));
+}
+
+/* --------------------------------------------------------------------- */
+/*  Conditional instructions                                             */
+/* --------------------------------------------------------------------- */
+
+static sljit_uw get_cc(sljit_si type)
+{
+       switch (type) {
+       case SLJIT_C_EQUAL:
+       case SLJIT_C_MUL_NOT_OVERFLOW:
+       case SLJIT_C_FLOAT_EQUAL:
+               return 0x1;
+
+       case SLJIT_C_NOT_EQUAL:
+       case SLJIT_C_MUL_OVERFLOW:
+       case SLJIT_C_FLOAT_NOT_EQUAL:
+               return 0x0;
+
+       case SLJIT_C_LESS:
+       case SLJIT_C_FLOAT_LESS:
+               return 0x2;
+
+       case SLJIT_C_GREATER_EQUAL:
+       case SLJIT_C_FLOAT_GREATER_EQUAL:
+               return 0x3;
+
+       case SLJIT_C_GREATER:
+       case SLJIT_C_FLOAT_GREATER:
+               return 0x9;
+
+       case SLJIT_C_LESS_EQUAL:
+       case SLJIT_C_FLOAT_LESS_EQUAL:
+               return 0x8;
+
+       case SLJIT_C_SIG_LESS:
+               return 0xa;
+
+       case SLJIT_C_SIG_GREATER_EQUAL:
+               return 0xb;
+
+       case SLJIT_C_SIG_GREATER:
+               return 0xd;
+
+       case SLJIT_C_SIG_LESS_EQUAL:
+               return 0xc;
+
+       case SLJIT_C_OVERFLOW:
+       case SLJIT_C_FLOAT_UNORDERED:
+               return 0x7;
+
+       case SLJIT_C_NOT_OVERFLOW:
+       case SLJIT_C_FLOAT_ORDERED:
+               return 0x6;
+
+       default:
+               SLJIT_ASSERT_STOP();
+               return 0xe;
+       }
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
+{
+       struct sljit_label *label;
+
+       CHECK_ERROR_PTR();
+       check_sljit_emit_label(compiler);
+
+       if (compiler->last_label && compiler->last_label->size == compiler->size)
+               return compiler->last_label;
+
+       label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
+       PTR_FAIL_IF(!label);
+       set_label(label, compiler);
+       return label;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type)
+{
+       struct sljit_jump *jump;
+
+       CHECK_ERROR_PTR();
+       check_sljit_emit_jump(compiler, type);
+
+       jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
+       PTR_FAIL_IF(!jump);
+       set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
+       type &= 0xff;
+
+       if (type < SLJIT_JUMP) {
+               jump->flags |= IS_COND;
+               PTR_FAIL_IF(push_inst(compiler, B_CC | (6 << 5) | get_cc(type)));
+       }
+       else if (type >= SLJIT_FAST_CALL)
+               jump->flags |= IS_BL;
+
+       PTR_FAIL_IF(emit_imm64_const(compiler, TMP_REG1, 0));
+       jump->addr = compiler->size;
+       PTR_FAIL_IF(push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(TMP_REG1)));
+
+       return jump;
+}
+
+static SLJIT_INLINE struct sljit_jump* emit_cmp_to0(struct sljit_compiler *compiler, sljit_si type,
+       sljit_si src, sljit_sw srcw)
+{
+       struct sljit_jump *jump;
+       sljit_ins inv_bits = (type & SLJIT_INT_OP) ? (1 << 31) : 0;
+
+       SLJIT_ASSERT((type & 0xff) == SLJIT_C_EQUAL || (type & 0xff) == SLJIT_C_NOT_EQUAL);
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
+       PTR_FAIL_IF(!jump);
+       set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
+       jump->flags |= IS_CBZ | IS_COND;
+
+       if (src & SLJIT_MEM) {
+               PTR_FAIL_IF(emit_op_mem(compiler, inv_bits ? INT_SIZE : WORD_SIZE, TMP_REG1, src, srcw));
+               src = TMP_REG1;
+       }
+       else if (src & SLJIT_IMM) {
+               PTR_FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
+               src = TMP_REG1;
+       }
+       SLJIT_ASSERT(FAST_IS_REG(src));
+
+       if ((type & 0xff) == SLJIT_C_EQUAL)
+               inv_bits |= 1 << 24;
+
+       PTR_FAIL_IF(push_inst(compiler, (CBZ ^ inv_bits) | (6 << 5) | RT(src)));
+       PTR_FAIL_IF(emit_imm64_const(compiler, TMP_REG1, 0));
+       jump->addr = compiler->size;
+       PTR_FAIL_IF(push_inst(compiler, BR | RN(TMP_REG1)));
+       return jump;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw)
+{
+       struct sljit_jump *jump;
+
+       CHECK_ERROR();
+       check_sljit_emit_ijump(compiler, type, src, srcw);
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       /* In ARM, we don't need to touch the arguments. */
+       if (!(src & SLJIT_IMM)) {
+               if (src & SLJIT_MEM) {
+                       FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw));
+                       src = TMP_REG1;
+               }
+               return push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(src));
+       }
+
+       jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
+       FAIL_IF(!jump);
+       set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0));
+       jump->u.target = srcw;
+
+       FAIL_IF(emit_imm64_const(compiler, TMP_REG1, 0));
+       jump->addr = compiler->size;
+       return push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(TMP_REG1));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw,
+       sljit_si type)
+{
+       sljit_si dst_r, flags, mem_flags;
+       sljit_ins cc;
+
+       CHECK_ERROR();
+       check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type);
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       if (dst == SLJIT_UNUSED)
+               return SLJIT_SUCCESS;
+
+       cc = get_cc(type);
+       dst_r = (dst <= REG_MASK) ? dst : TMP_REG1;
+
+       if (GET_OPCODE(op) < SLJIT_ADD) {
+               FAIL_IF(push_inst(compiler, CSINC | (cc << 12) | RD(dst_r) | RN(TMP_ZERO) | RM(TMP_ZERO)));
+               if (dst_r != TMP_REG1)
+                       return SLJIT_SUCCESS;
+               return emit_op_mem(compiler, (GET_OPCODE(op) == SLJIT_MOV ? WORD_SIZE : INT_SIZE) | STORE, TMP_REG1, dst, dstw);
+       }
+
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+       flags = GET_FLAGS(op) ? SET_FLAGS : 0;
+       mem_flags = WORD_SIZE;
+       if (op & SLJIT_INT_OP) {
+               flags |= INT_OP;
+               mem_flags = INT_SIZE;
+       }
+
+       if (src & SLJIT_MEM) {
+               FAIL_IF(emit_op_mem2(compiler, mem_flags, TMP_REG1, src, srcw, dst, dstw));
+               src = TMP_REG1;
+               srcw = 0;
+       } else if (src & SLJIT_IMM)
+               flags |= ARG1_IMM;
+
+       FAIL_IF(push_inst(compiler, CSINC | (cc << 12) | RD(TMP_REG2) | RN(TMP_ZERO) | RM(TMP_ZERO)));
+       emit_op_imm(compiler, flags | GET_OPCODE(op), dst_r, src, TMP_REG2);
+
+       if (dst_r != TMP_REG1)
+               return SLJIT_SUCCESS;
+       return emit_op_mem2(compiler, mem_flags | STORE, TMP_REG1, dst, dstw, 0, 0);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value)
+{
+       struct sljit_const *const_;
+       sljit_si dst_r;
+
+       CHECK_ERROR_PTR();
+       check_sljit_emit_const(compiler, dst, dstw, init_value);
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+
+       const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
+       PTR_FAIL_IF(!const_);
+       set_const(const_, compiler);
+
+       dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
+       PTR_FAIL_IF(emit_imm64_const(compiler, dst_r, init_value));
+
+       if (dst & SLJIT_MEM)
+               PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw));
+       return const_;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
+{
+       sljit_ins* inst = (sljit_ins*)addr;
+       modify_imm64_const(inst, new_addr);
+       SLJIT_CACHE_FLUSH(inst, inst + 4);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
+{
+       sljit_ins* inst = (sljit_ins*)addr;
+       modify_imm64_const(inst, new_constant);
+       SLJIT_CACHE_FLUSH(inst, inst + 4);
+}
diff --git a/ext/pcre/pcrelib/sljit/sljitNativeARM_T2_32.c b/ext/pcre/pcrelib/sljit/sljitNativeARM_T2_32.c
new file mode 100644 (file)
index 0000000..682f964
--- /dev/null
@@ -0,0 +1,2007 @@
+/*
+ *    Stack-less Just-In-Time compiler
+ *
+ *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice, this list of
+ *      conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *      of conditions and the following disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
+{
+       return "ARM-Thumb2" SLJIT_CPUINFO;
+}
+
+/* Length of an instruction word. */
+typedef sljit_ui sljit_ins;
+
+/* Last register + 1. */
+#define TMP_REG1       (SLJIT_NO_REGISTERS + 1)
+#define TMP_REG2       (SLJIT_NO_REGISTERS + 2)
+#define TMP_REG3       (SLJIT_NO_REGISTERS + 3)
+#define TMP_PC         (SLJIT_NO_REGISTERS + 4)
+
+#define TMP_FREG1      (0)
+#define TMP_FREG2      (SLJIT_FLOAT_REG6 + 1)
+
+/* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */
+static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = {
+       0, 0, 1, 2, 12, 5, 6, 7, 8, 10, 11, 13, 3, 4, 14, 15
+};
+
+#define COPY_BITS(src, from, to, bits) \
+       ((from >= to ? (src >> (from - to)) : (src << (to - from))) & (((1 << bits) - 1) << to))
+
+/* Thumb16 encodings. */
+#define RD3(rd) (reg_map[rd])
+#define RN3(rn) (reg_map[rn] << 3)
+#define RM3(rm) (reg_map[rm] << 6)
+#define RDN3(rdn) (reg_map[rdn] << 8)
+#define IMM3(imm) (imm << 6)
+#define IMM8(imm) (imm)
+
+/* Thumb16 helpers. */
+#define SET_REGS44(rd, rn) \
+       ((reg_map[rn] << 3) | (reg_map[rd] & 0x7) | ((reg_map[rd] & 0x8) << 4))
+#define IS_2_LO_REGS(reg1, reg2) \
+       (reg_map[reg1] <= 7 && reg_map[reg2] <= 7)
+#define IS_3_LO_REGS(reg1, reg2, reg3) \
+       (reg_map[reg1] <= 7 && reg_map[reg2] <= 7 && reg_map[reg3] <= 7)
+
+/* Thumb32 encodings. */
+#define RD4(rd) (reg_map[rd] << 8)
+#define RN4(rn) (reg_map[rn] << 16)
+#define RM4(rm) (reg_map[rm])
+#define RT4(rt) (reg_map[rt] << 12)
+#define DD4(dd) ((dd) << 12)
+#define DN4(dn) ((dn) << 16)
+#define DM4(dm) (dm)
+#define IMM5(imm) \
+       (COPY_BITS(imm, 2, 12, 3) | ((imm & 0x3) << 6))
+#define IMM12(imm) \
+       (COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff))
+
+/* --------------------------------------------------------------------- */
+/*  Instrucion forms                                                     */
+/* --------------------------------------------------------------------- */
+
+/* dot '.' changed to _
+   I immediate form (possibly followed by number of immediate bits). */
+#define ADCI           0xf1400000
+#define ADCS           0x4140
+#define ADC_W          0xeb400000
+#define ADD            0x4400
+#define ADDS           0x1800
+#define ADDSI3         0x1c00
+#define ADDSI8         0x3000
+#define ADD_W          0xeb000000
+#define ADDWI          0xf2000000
+#define ADD_SP         0xb000
+#define ADD_W          0xeb000000
+#define ADD_WI         0xf1000000
+#define ANDI           0xf0000000
+#define ANDS           0x4000
+#define AND_W          0xea000000
+#define ASRS           0x4100
+#define ASRSI          0x1000
+#define ASR_W          0xfa40f000
+#define ASR_WI         0xea4f0020
+#define BICI           0xf0200000
+#define BKPT           0xbe00
+#define BLX            0x4780
+#define BX             0x4700
+#define CLZ            0xfab0f080
+#define CMPI           0x2800
+#define CMP_W          0xebb00f00
+#define EORI           0xf0800000
+#define EORS           0x4040
+#define EOR_W          0xea800000
+#define IT             0xbf00
+#define LSLS           0x4080
+#define LSLSI          0x0000
+#define LSL_W          0xfa00f000
+#define LSL_WI         0xea4f0000
+#define LSRS           0x40c0
+#define LSRSI          0x0800
+#define LSR_W          0xfa20f000
+#define LSR_WI         0xea4f0010
+#define MOV            0x4600
+#define MOVS           0x0000
+#define MOVSI          0x2000
+#define MOVT           0xf2c00000
+#define MOVW           0xf2400000
+#define MOV_W          0xea4f0000
+#define MOV_WI         0xf04f0000
+#define MUL            0xfb00f000
+#define MVNS           0x43c0
+#define MVN_W          0xea6f0000
+#define MVN_WI         0xf06f0000
+#define NOP            0xbf00
+#define ORNI           0xf0600000
+#define ORRI           0xf0400000
+#define ORRS           0x4300
+#define ORR_W          0xea400000
+#define POP            0xbd00
+#define POP_W          0xe8bd0000
+#define PUSH           0xb500
+#define PUSH_W         0xe92d0000
+#define RSB_WI         0xf1c00000
+#define RSBSI          0x4240
+#define SBCI           0xf1600000
+#define SBCS           0x4180
+#define SBC_W          0xeb600000
+#define SMULL          0xfb800000
+#define STR_SP         0x9000
+#define SUBS           0x1a00
+#define SUBSI3         0x1e00
+#define SUBSI8         0x3800
+#define SUB_W          0xeba00000
+#define SUBWI          0xf2a00000
+#define SUB_SP         0xb080
+#define SUB_WI         0xf1a00000
+#define SXTB           0xb240
+#define SXTB_W         0xfa4ff080
+#define SXTH           0xb200
+#define SXTH_W         0xfa0ff080
+#define TST            0x4200
+#define UMULL          0xfba00000
+#define UXTB           0xb2c0
+#define UXTB_W         0xfa5ff080
+#define UXTH           0xb280
+#define UXTH_W         0xfa1ff080
+#define VABS_F32       0xeeb00ac0
+#define VADD_F32       0xee300a00
+#define VCMP_F32       0xeeb40a40
+#define VDIV_F32       0xee800a00
+#define VMOV_F32       0xeeb00a40
+#define VMRS           0xeef1fa10
+#define VMUL_F32       0xee200a00
+#define VNEG_F32       0xeeb10a40
+#define VSTR_F32       0xed000a00
+#define VSUB_F32       0xee300a40
+
+static sljit_si push_inst16(struct sljit_compiler *compiler, sljit_ins inst)
+{
+       sljit_uh *ptr;
+       SLJIT_ASSERT(!(inst & 0xffff0000));
+
+       ptr = (sljit_uh*)ensure_buf(compiler, sizeof(sljit_uh));
+       FAIL_IF(!ptr);
+       *ptr = inst;
+       compiler->size++;
+       return SLJIT_SUCCESS;
+}
+
+static sljit_si push_inst32(struct sljit_compiler *compiler, sljit_ins inst)
+{
+       sljit_uh *ptr = (sljit_uh*)ensure_buf(compiler, sizeof(sljit_ins));
+       FAIL_IF(!ptr);
+       *ptr++ = inst >> 16;
+       *ptr = inst;
+       compiler->size += 2;
+       return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_si emit_imm32_const(struct sljit_compiler *compiler, sljit_si dst, sljit_uw imm)
+{
+       FAIL_IF(push_inst32(compiler, MOVW | RD4(dst) |
+               COPY_BITS(imm, 12, 16, 4) | COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff)));
+       return push_inst32(compiler, MOVT | RD4(dst) |
+               COPY_BITS(imm, 12 + 16, 16, 4) | COPY_BITS(imm, 11 + 16, 26, 1) | COPY_BITS(imm, 8 + 16, 12, 3) | ((imm & 0xff0000) >> 16));
+}
+
+static SLJIT_INLINE void modify_imm32_const(sljit_uh *inst, sljit_uw new_imm)
+{
+       sljit_si dst = inst[1] & 0x0f00;
+       SLJIT_ASSERT(((inst[0] & 0xfbf0) == (MOVW >> 16)) && ((inst[2] & 0xfbf0) == (MOVT >> 16)) && dst == (inst[3] & 0x0f00));
+       inst[0] = (MOVW >> 16) | COPY_BITS(new_imm, 12, 0, 4) | COPY_BITS(new_imm, 11, 10, 1);
+       inst[1] = dst | COPY_BITS(new_imm, 8, 12, 3) | (new_imm & 0xff);
+       inst[2] = (MOVT >> 16) | COPY_BITS(new_imm, 12 + 16, 0, 4) | COPY_BITS(new_imm, 11 + 16, 10, 1);
+       inst[3] = dst | COPY_BITS(new_imm, 8 + 16, 12, 3) | ((new_imm & 0xff0000) >> 16);
+}
+
+static SLJIT_INLINE sljit_si detect_jump_type(struct sljit_jump *jump, sljit_uh *code_ptr, sljit_uh *code)
+{
+       sljit_sw diff;
+
+       if (jump->flags & SLJIT_REWRITABLE_JUMP)
+               return 0;
+
+       if (jump->flags & JUMP_ADDR) {
+               /* Branch to ARM code is not optimized yet. */
+               if (!(jump->u.target & 0x1))
+                       return 0;
+               diff = ((sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2)) >> 1;
+       }
+       else {
+               SLJIT_ASSERT(jump->flags & JUMP_LABEL);
+               diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)(code_ptr + 2)) >> 1;
+       }
+
+       if (jump->flags & IS_COND) {
+               SLJIT_ASSERT(!(jump->flags & IS_BL));
+               if (diff <= 127 && diff >= -128) {
+                       jump->flags |= PATCH_TYPE1;
+                       return 5;
+               }
+               if (diff <= 524287 && diff >= -524288) {
+                       jump->flags |= PATCH_TYPE2;
+                       return 4;
+               }
+               /* +1 comes from the prefix IT instruction. */
+               diff--;
+               if (diff <= 8388607 && diff >= -8388608) {
+                       jump->flags |= PATCH_TYPE3;
+                       return 3;
+               }
+       }
+       else if (jump->flags & IS_BL) {
+               if (diff <= 8388607 && diff >= -8388608) {
+                       jump->flags |= PATCH_BL;
+                       return 3;
+               }
+       }
+       else {
+               if (diff <= 1023 && diff >= -1024) {
+                       jump->flags |= PATCH_TYPE4;
+                       return 4;
+               }
+               if (diff <= 8388607 && diff >= -8388608) {
+                       jump->flags |= PATCH_TYPE5;
+                       return 3;
+               }
+       }
+
+       return 0;
+}
+
+static SLJIT_INLINE void set_jump_instruction(struct sljit_jump *jump)
+{
+       sljit_si type = (jump->flags >> 4) & 0xf;
+       sljit_sw diff;
+       sljit_uh *jump_inst;
+       sljit_si s, j1, j2;
+
+       if (SLJIT_UNLIKELY(type == 0)) {
+               modify_imm32_const((sljit_uh*)jump->addr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target);
+               return;
+       }
+
+       if (jump->flags & JUMP_ADDR) {
+               SLJIT_ASSERT(jump->u.target & 0x1);
+               diff = ((sljit_sw)jump->u.target - (sljit_sw)(jump->addr + 4)) >> 1;
+       }
+       else
+               diff = ((sljit_sw)(jump->u.label->addr) - (sljit_sw)(jump->addr + 4)) >> 1;
+       jump_inst = (sljit_uh*)jump->addr;
+
+       switch (type) {
+       case 1:
+               /* Encoding T1 of 'B' instruction */
+               SLJIT_ASSERT(diff <= 127 && diff >= -128 && (jump->flags & IS_COND));
+               jump_inst[0] = 0xd000 | (jump->flags & 0xf00) | (diff & 0xff);
+               return;
+       case 2:
+               /* Encoding T3 of 'B' instruction */
+               SLJIT_ASSERT(diff <= 524287 && diff >= -524288 && (jump->flags & IS_COND));
+               jump_inst[0] = 0xf000 | COPY_BITS(jump->flags, 8, 6, 4) | COPY_BITS(diff, 11, 0, 6) | COPY_BITS(diff, 19, 10, 1);
+               jump_inst[1] = 0x8000 | COPY_BITS(diff, 17, 13, 1) | COPY_BITS(diff, 18, 11, 1) | (diff & 0x7ff);
+               return;
+       case 3:
+               SLJIT_ASSERT(jump->flags & IS_COND);
+               *jump_inst++ = IT | ((jump->flags >> 4) & 0xf0) | 0x8;
+               diff--;
+               type = 5;
+               break;
+       case 4:
+               /* Encoding T2 of 'B' instruction */
+               SLJIT_ASSERT(diff <= 1023 && diff >= -1024 && !(jump->flags & IS_COND));
+               jump_inst[0] = 0xe000 | (diff & 0x7ff);
+               return;
+       }
+
+       SLJIT_ASSERT(diff <= 8388607 && diff >= -8388608);
+
+       /* Really complex instruction form for branches. */
+       s = (diff >> 23) & 0x1;
+       j1 = (~(diff >> 21) ^ s) & 0x1;
+       j2 = (~(diff >> 22) ^ s) & 0x1;
+       jump_inst[0] = 0xf000 | (s << 10) | COPY_BITS(diff, 11, 0, 10);
+       jump_inst[1] = (j1 << 13) | (j2 << 11) | (diff & 0x7ff);
+
+       /* The others have a common form. */
+       if (type == 5) /* Encoding T4 of 'B' instruction */
+               jump_inst[1] |= 0x9000;
+       else if (type == 6) /* Encoding T1 of 'BL' instruction */
+               jump_inst[1] |= 0xd000;
+       else
+               SLJIT_ASSERT_STOP();
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
+{
+       struct sljit_memory_fragment *buf;
+       sljit_uh *code;
+       sljit_uh *code_ptr;
+       sljit_uh *buf_ptr;
+       sljit_uh *buf_end;
+       sljit_uw half_count;
+
+       struct sljit_label *label;
+       struct sljit_jump *jump;
+       struct sljit_const *const_;
+
+       CHECK_ERROR_PTR();
+       check_sljit_generate_code(compiler);
+       reverse_buf(compiler);
+
+       code = (sljit_uh*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_uh));
+       PTR_FAIL_WITH_EXEC_IF(code);
+       buf = compiler->buf;
+
+       code_ptr = code;
+       half_count = 0;
+       label = compiler->labels;
+       jump = compiler->jumps;
+       const_ = compiler->consts;
+
+       do {
+               buf_ptr = (sljit_uh*)buf->memory;
+               buf_end = buf_ptr + (buf->used_size >> 1);
+               do {
+                       *code_ptr = *buf_ptr++;
+                       /* These structures are ordered by their address. */
+                       SLJIT_ASSERT(!label || label->size >= half_count);
+                       SLJIT_ASSERT(!jump || jump->addr >= half_count);
+                       SLJIT_ASSERT(!const_ || const_->addr >= half_count);
+                       if (label && label->size == half_count) {
+                               label->addr = ((sljit_uw)code_ptr) | 0x1;
+                               label->size = code_ptr - code;
+                               label = label->next;
+                       }
+                       if (jump && jump->addr == half_count) {
+                                       jump->addr = (sljit_uw)code_ptr - ((jump->flags & IS_COND) ? 10 : 8);
+                                       code_ptr -= detect_jump_type(jump, code_ptr, code);
+                                       jump = jump->next;
+                       }
+                       if (const_ && const_->addr == half_count) {
+                               const_->addr = (sljit_uw)code_ptr;
+                               const_ = const_->next;
+                       }
+                       code_ptr ++;
+                       half_count ++;
+               } while (buf_ptr < buf_end);
+
+               buf = buf->next;
+       } while (buf);
+
+       if (label && label->size == half_count) {
+               label->addr = ((sljit_uw)code_ptr) | 0x1;
+               label->size = code_ptr - code;
+               label = label->next;
+       }
+
+       SLJIT_ASSERT(!label);
+       SLJIT_ASSERT(!jump);
+       SLJIT_ASSERT(!const_);
+       SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
+
+       jump = compiler->jumps;
+       while (jump) {
+               set_jump_instruction(jump);
+               jump = jump->next;
+       }
+
+       compiler->error = SLJIT_ERR_COMPILED;
+       compiler->executable_size = (code_ptr - code) * sizeof(sljit_uh);
+       SLJIT_CACHE_FLUSH(code, code_ptr);
+       /* Set thumb mode flag. */
+       return (void*)((sljit_uw)code | 0x1);
+}
+
+/* --------------------------------------------------------------------- */
+/*  Core code generator functions.                                       */
+/* --------------------------------------------------------------------- */
+
+#define INVALID_IMM    0x80000000
+static sljit_uw get_imm(sljit_uw imm)
+{
+       /* Thumb immediate form. */
+       sljit_si counter;
+
+       if (imm <= 0xff)
+               return imm;
+
+       if ((imm & 0xffff) == (imm >> 16)) {
+               /* Some special cases. */
+               if (!(imm & 0xff00))
+                       return (1 << 12) | (imm & 0xff);
+               if (!(imm & 0xff))
+                       return (2 << 12) | ((imm >> 8) & 0xff);
+               if ((imm & 0xff00) == ((imm & 0xff) << 8))
+                       return (3 << 12) | (imm & 0xff);
+       }
+
+       /* Assembly optimization: count leading zeroes? */
+       counter = 8;
+       if (!(imm & 0xffff0000)) {
+               counter += 16;
+               imm <<= 16;
+       }
+       if (!(imm & 0xff000000)) {
+               counter += 8;
+               imm <<= 8;
+       }
+       if (!(imm & 0xf0000000)) {
+               counter += 4;
+               imm <<= 4;
+       }
+       if (!(imm & 0xc0000000)) {
+               counter += 2;
+               imm <<= 2;
+       }
+       if (!(imm & 0x80000000)) {
+               counter += 1;
+               imm <<= 1;
+       }
+       /* Since imm >= 128, this must be true. */
+       SLJIT_ASSERT(counter <= 31);
+
+       if (imm & 0x00ffffff)
+               return INVALID_IMM; /* Cannot be encoded. */
+
+       return ((imm >> 24) & 0x7f) | COPY_BITS(counter, 4, 26, 1) | COPY_BITS(counter, 1, 12, 3) | COPY_BITS(counter, 0, 7, 1);
+}
+
+static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si dst, sljit_uw imm)
+{
+       sljit_uw tmp;
+
+       if (imm >= 0x10000) {
+               tmp = get_imm(imm);
+               if (tmp != INVALID_IMM)
+                       return push_inst32(compiler, MOV_WI | RD4(dst) | tmp);
+               tmp = get_imm(~imm);
+               if (tmp != INVALID_IMM)
+                       return push_inst32(compiler, MVN_WI | RD4(dst) | tmp);
+       }
+
+       /* set low 16 bits, set hi 16 bits to 0. */
+       FAIL_IF(push_inst32(compiler, MOVW | RD4(dst) |
+               COPY_BITS(imm, 12, 16, 4) | COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff)));
+
+       /* set hi 16 bit if needed. */
+       if (imm >= 0x10000)
+               return push_inst32(compiler, MOVT | RD4(dst) |
+                       COPY_BITS(imm, 12 + 16, 16, 4) | COPY_BITS(imm, 11 + 16, 26, 1) | COPY_BITS(imm, 8 + 16, 12, 3) | ((imm & 0xff0000) >> 16));
+       return SLJIT_SUCCESS;
+}
+
+#define ARG1_IMM       0x0010000
+#define ARG2_IMM       0x0020000
+#define KEEP_FLAGS     0x0040000
+/* SET_FLAGS must be 0x100000 as it is also the value of S bit (can be used for optimization). */
+#define SET_FLAGS      0x0100000
+#define UNUSED_RETURN  0x0200000
+#define SLOW_DEST      0x0400000
+#define SLOW_SRC1      0x0800000
+#define SLOW_SRC2      0x1000000
+
+static sljit_si emit_op_imm(struct sljit_compiler *compiler, sljit_si flags, sljit_si dst, sljit_uw arg1, sljit_uw arg2)
+{
+       /* dst must be register, TMP_REG1
+          arg1 must be register, TMP_REG1, imm
+          arg2 must be register, TMP_REG2, imm */
+       sljit_si reg;
+       sljit_uw imm, nimm;
+
+       if (SLJIT_UNLIKELY((flags & (ARG1_IMM | ARG2_IMM)) == (ARG1_IMM | ARG2_IMM))) {
+               /* Both are immediates. */
+               flags &= ~ARG1_IMM;
+               FAIL_IF(load_immediate(compiler, TMP_REG1, arg1));
+               arg1 = TMP_REG1;
+       }
+
+       if (flags & (ARG1_IMM | ARG2_IMM)) {
+               reg = (flags & ARG2_IMM) ? arg1 : arg2;
+               imm = (flags & ARG2_IMM) ? arg2 : arg1;
+
+               switch (flags & 0xffff) {
+               case SLJIT_CLZ:
+               case SLJIT_MUL:
+                       /* No form with immediate operand. */
+                       break;
+               case SLJIT_MOV:
+                       SLJIT_ASSERT(!(flags & SET_FLAGS) && (flags & ARG2_IMM) && arg1 == TMP_REG1);
+                       return load_immediate(compiler, dst, imm);
+               case SLJIT_NOT:
+                       if (!(flags & SET_FLAGS))
+                               return load_immediate(compiler, dst, ~imm);
+                       /* Since the flags should be set, we just fallback to the register mode.
+                          Although some clever things could be done here, "NOT IMM" does not worth the efforts. */
+                       break;
+               case SLJIT_ADD:
+                       nimm = -imm;
+                       if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(reg, dst)) {
+                               if (imm <= 0x7)
+                                       return push_inst16(compiler, ADDSI3 | IMM3(imm) | RD3(dst) | RN3(reg));
+                               if (nimm <= 0x7)
+                                       return push_inst16(compiler, SUBSI3 | IMM3(nimm) | RD3(dst) | RN3(reg));
+                               if (reg == dst) {
+                                       if (imm <= 0xff)
+                                               return push_inst16(compiler, ADDSI8 | IMM8(imm) | RDN3(dst));
+                                       if (nimm <= 0xff)
+                                               return push_inst16(compiler, SUBSI8 | IMM8(nimm) | RDN3(dst));
+                               }
+                       }
+                       if (!(flags & SET_FLAGS)) {
+                               if (imm <= 0xfff)
+                                       return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(imm));
+                               if (nimm <= 0xfff)
+                                       return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(nimm));
+                       }
+                       imm = get_imm(imm);
+                       if (imm != INVALID_IMM)
+                               return push_inst32(compiler, ADD_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
+                       break;
+               case SLJIT_ADDC:
+                       imm = get_imm(imm);
+                       if (imm != INVALID_IMM)
+                               return push_inst32(compiler, ADCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
+                       break;
+               case SLJIT_SUB:
+                       if (flags & ARG1_IMM) {
+                               if (!(flags & KEEP_FLAGS) && imm == 0 && IS_2_LO_REGS(reg, dst))
+                                       return push_inst16(compiler, RSBSI | RD3(dst) | RN3(reg));
+                               imm = get_imm(imm);
+                               if (imm != INVALID_IMM)
+                                       return push_inst32(compiler, RSB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
+                               break;
+                       }
+                       nimm = -imm;
+                       if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(reg, dst)) {
+                               if (imm <= 0x7)
+                                       return push_inst16(compiler, SUBSI3 | IMM3(imm) | RD3(dst) | RN3(reg));
+                               if (nimm <= 0x7)
+                                       return push_inst16(compiler, ADDSI3 | IMM3(nimm) | RD3(dst) | RN3(reg));
+                               if (reg == dst) {
+                                       if (imm <= 0xff)
+                                               return push_inst16(compiler, SUBSI8 | IMM8(imm) | RDN3(dst));
+                                       if (nimm <= 0xff)
+                                               return push_inst16(compiler, ADDSI8 | IMM8(nimm) | RDN3(dst));
+                               }
+                               if (imm <= 0xff && (flags & UNUSED_RETURN))
+                                       return push_inst16(compiler, CMPI | IMM8(imm) | RDN3(reg));
+                       }
+                       if (!(flags & SET_FLAGS)) {
+                               if (imm <= 0xfff)
+                                       return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(imm));
+                               if (nimm <= 0xfff)
+                                       return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(nimm));
+                       }
+                       imm = get_imm(imm);
+                       if (imm != INVALID_IMM)
+                               return push_inst32(compiler, SUB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
+                       break;
+               case SLJIT_SUBC:
+                       if (flags & ARG1_IMM)
+                               break;
+                       imm = get_imm(imm);
+                       if (imm != INVALID_IMM)
+                               return push_inst32(compiler, SBCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
+                       break;
+               case SLJIT_AND:
+                       nimm = get_imm(imm);
+                       if (nimm != INVALID_IMM)
+                               return push_inst32(compiler, ANDI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | nimm);
+                       imm = get_imm(imm);
+                       if (imm != INVALID_IMM)
+                               return push_inst32(compiler, BICI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
+                       break;
+               case SLJIT_OR:
+                       nimm = get_imm(imm);
+                       if (nimm != INVALID_IMM)
+                               return push_inst32(compiler, ORRI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | nimm);
+                       imm = get_imm(imm);
+                       if (imm != INVALID_IMM)
+                               return push_inst32(compiler, ORNI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
+                       break;
+               case SLJIT_XOR:
+                       imm = get_imm(imm);
+                       if (imm != INVALID_IMM)
+                               return push_inst32(compiler, EORI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
+                       break;
+               case SLJIT_SHL:
+               case SLJIT_LSHR:
+               case SLJIT_ASHR:
+                       if (flags & ARG1_IMM)
+                               break;
+                       imm &= 0x1f;
+                       if (imm == 0) {
+                               if (!(flags & SET_FLAGS))
+                                       return push_inst16(compiler, MOV | SET_REGS44(dst, reg));
+                               if (IS_2_LO_REGS(dst, reg))
+                                       return push_inst16(compiler, MOVS | RD3(dst) | RN3(reg));
+                               return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(dst) | RM4(reg));
+                       }
+                       switch (flags & 0xffff) {
+                       case SLJIT_SHL:
+                               if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, reg))
+                                       return push_inst16(compiler, LSLSI | RD3(dst) | RN3(reg) | (imm << 6));
+                               return push_inst32(compiler, LSL_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm));
+                       case SLJIT_LSHR:
+                               if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, reg))
+                                       return push_inst16(compiler, LSRSI | RD3(dst) | RN3(reg) | (imm << 6));
+                               return push_inst32(compiler, LSR_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm));
+                       default: /* SLJIT_ASHR */
+                               if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, reg))
+                                       return push_inst16(compiler, ASRSI | RD3(dst) | RN3(reg) | (imm << 6));
+                               return push_inst32(compiler, ASR_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm));
+                       }
+               default:
+                       SLJIT_ASSERT_STOP();
+                       break;
+               }
+
+               if (flags & ARG2_IMM) {
+                       FAIL_IF(load_immediate(compiler, TMP_REG2, arg2));
+                       arg2 = TMP_REG2;
+               }
+               else {
+                       FAIL_IF(load_immediate(compiler, TMP_REG1, arg1));
+                       arg1 = TMP_REG1;
+               }
+       }
+
+       /* Both arguments are registers. */
+       switch (flags & 0xffff) {
+       case SLJIT_MOV:
+       case SLJIT_MOV_UI:
+       case SLJIT_MOV_SI:
+       case SLJIT_MOV_P:
+       case SLJIT_MOVU:
+       case SLJIT_MOVU_UI:
+       case SLJIT_MOVU_SI:
+       case SLJIT_MOVU_P:
+               SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
+               if (dst == arg2)
+                       return SLJIT_SUCCESS;
+               return push_inst16(compiler, MOV | SET_REGS44(dst, arg2));
+       case SLJIT_MOV_UB:
+       case SLJIT_MOVU_UB:
+               SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
+               if (IS_2_LO_REGS(dst, arg2))
+                       return push_inst16(compiler, UXTB | RD3(dst) | RN3(arg2));
+               return push_inst32(compiler, UXTB_W | RD4(dst) | RM4(arg2));
+       case SLJIT_MOV_SB:
+       case SLJIT_MOVU_SB:
+               SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
+               if (IS_2_LO_REGS(dst, arg2))
+                       return push_inst16(compiler, SXTB | RD3(dst) | RN3(arg2));
+               return push_inst32(compiler, SXTB_W | RD4(dst) | RM4(arg2));
+       case SLJIT_MOV_UH:
+       case SLJIT_MOVU_UH:
+               SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
+               if (IS_2_LO_REGS(dst, arg2))
+                       return push_inst16(compiler, UXTH | RD3(dst) | RN3(arg2));
+               return push_inst32(compiler, UXTH_W | RD4(dst) | RM4(arg2));
+       case SLJIT_MOV_SH:
+       case SLJIT_MOVU_SH:
+               SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
+               if (IS_2_LO_REGS(dst, arg2))
+                       return push_inst16(compiler, SXTH | RD3(dst) | RN3(arg2));
+               return push_inst32(compiler, SXTH_W | RD4(dst) | RM4(arg2));
+       case SLJIT_NOT:
+               SLJIT_ASSERT(arg1 == TMP_REG1);
+               if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2))
+                       return push_inst16(compiler, MVNS | RD3(dst) | RN3(arg2));
+               return push_inst32(compiler, MVN_W | (flags & SET_FLAGS) | RD4(dst) | RM4(arg2));
+       case SLJIT_CLZ:
+               SLJIT_ASSERT(arg1 == TMP_REG1);
+               FAIL_IF(push_inst32(compiler, CLZ | RN4(arg2) | RD4(dst) | RM4(arg2)));
+               if (flags & SET_FLAGS) {
+                       if (reg_map[dst] <= 7)
+                               return push_inst16(compiler, CMPI | RDN3(dst));
+                       return push_inst32(compiler, ADD_WI | SET_FLAGS | RN4(dst) | RD4(dst));
+               }
+               return SLJIT_SUCCESS;
+       case SLJIT_ADD:
+               if (!(flags & KEEP_FLAGS) && IS_3_LO_REGS(dst, arg1, arg2))
+                       return push_inst16(compiler, ADDS | RD3(dst) | RN3(arg1) | RM3(arg2));
+               if (dst == arg1 && !(flags & SET_FLAGS))
+                       return push_inst16(compiler, ADD | SET_REGS44(dst, arg2));
+               return push_inst32(compiler, ADD_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
+       case SLJIT_ADDC:
+               if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2))
+                       return push_inst16(compiler, ADCS | RD3(dst) | RN3(arg2));
+               return push_inst32(compiler, ADC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
+       case SLJIT_SUB:
+               if (!(flags & KEEP_FLAGS) && IS_3_LO_REGS(dst, arg1, arg2))
+                       return push_inst16(compiler, SUBS | RD3(dst) | RN3(arg1) | RM3(arg2));
+               return push_inst32(compiler, SUB_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
+       case SLJIT_SUBC:
+               if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2))
+                       return push_inst16(compiler, SBCS | RD3(dst) | RN3(arg2));
+               return push_inst32(compiler, SBC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
+       case SLJIT_MUL:
+               if (!(flags & SET_FLAGS))
+                       return push_inst32(compiler, MUL | RD4(dst) | RN4(arg1) | RM4(arg2));
+               SLJIT_ASSERT(reg_map[TMP_REG2] <= 7 && dst != TMP_REG2);
+               FAIL_IF(push_inst32(compiler, SMULL | RT4(dst) | RD4(TMP_REG2) | RN4(arg1) | RM4(arg2)));
+               /* cmp TMP_REG2, dst asr #31. */
+               return push_inst32(compiler, CMP_W | RN4(TMP_REG2) | 0x70e0 | RM4(dst));
+       case SLJIT_AND:
+               if (!(flags & KEEP_FLAGS)) {
+                       if (dst == arg1 && IS_2_LO_REGS(dst, arg2))
+                               return push_inst16(compiler, ANDS | RD3(dst) | RN3(arg2));
+                       if ((flags & UNUSED_RETURN) && IS_2_LO_REGS(arg1, arg2))
+                               return push_inst16(compiler, TST | RD3(arg1) | RN3(arg2));
+               }
+               return push_inst32(compiler, AND_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
+       case SLJIT_OR:
+               if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2))
+                       return push_inst16(compiler, ORRS | RD3(dst) | RN3(arg2));
+               return push_inst32(compiler, ORR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
+       case SLJIT_XOR:
+               if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2))
+                       return push_inst16(compiler, EORS | RD3(dst) | RN3(arg2));
+               return push_inst32(compiler, EOR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
+       case SLJIT_SHL:
+               if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2))
+                       return push_inst16(compiler, LSLS | RD3(dst) | RN3(arg2));
+               return push_inst32(compiler, LSL_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
+       case SLJIT_LSHR:
+               if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2))
+                       return push_inst16(compiler, LSRS | RD3(dst) | RN3(arg2));
+               return push_inst32(compiler, LSR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
+       case SLJIT_ASHR:
+               if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2))
+                       return push_inst16(compiler, ASRS | RD3(dst) | RN3(arg2));
+               return push_inst32(compiler, ASR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
+       }
+
+       SLJIT_ASSERT_STOP();
+       return SLJIT_SUCCESS;
+}
+
+#define STORE          0x01
+#define SIGNED         0x02
+
+#define WORD_SIZE      0x00
+#define BYTE_SIZE      0x04
+#define HALF_SIZE      0x08
+
+#define UPDATE         0x10
+#define ARG_TEST       0x20
+
+#define IS_WORD_SIZE(flags)            (!(flags & (BYTE_SIZE | HALF_SIZE)))
+#define OFFSET_CHECK(imm, shift)       (!(argw & ~(imm << shift)))
+
+/*
+  1st letter:
+  w = word
+  b = byte
+  h = half
+
+  2nd letter:
+  s = signed
+  u = unsigned
+
+  3rd letter:
+  l = load
+  s = store
+*/
+
+static SLJIT_CONST sljit_ins sljit_mem16[12] = {
+/* w u l */ 0x5800 /* ldr */,
+/* w u s */ 0x5000 /* str */,
+/* w s l */ 0x5800 /* ldr */,
+/* w s s */ 0x5000 /* str */,
+
+/* b u l */ 0x5c00 /* ldrb */,
+/* b u s */ 0x5400 /* strb */,
+/* b s l */ 0x5600 /* ldrsb */,
+/* b s s */ 0x5400 /* strb */,
+
+/* h u l */ 0x5a00 /* ldrh */,
+/* h u s */ 0x5200 /* strh */,
+/* h s l */ 0x5e00 /* ldrsh */,
+/* h s s */ 0x5200 /* strh */,
+};
+
+static SLJIT_CONST sljit_ins sljit_mem16_imm5[12] = {
+/* w u l */ 0x6800 /* ldr imm5 */,
+/* w u s */ 0x6000 /* str imm5 */,
+/* w s l */ 0x6800 /* ldr imm5 */,
+/* w s s */ 0x6000 /* str imm5 */,
+
+/* b u l */ 0x7800 /* ldrb imm5 */,
+/* b u s */ 0x7000 /* strb imm5 */,
+/* b s l */ 0x0000 /* not allowed */,
+/* b s s */ 0x7000 /* strb imm5 */,
+
+/* h u l */ 0x8800 /* ldrh imm5 */,
+/* h u s */ 0x8000 /* strh imm5 */,
+/* h s l */ 0x0000 /* not allowed */,
+/* h s s */ 0x8000 /* strh imm5 */,
+};
+
+#define MEM_IMM8       0xc00
+#define MEM_IMM12      0x800000
+static SLJIT_CONST sljit_ins sljit_mem32[12] = {
+/* w u l */ 0xf8500000 /* ldr.w */,
+/* w u s */ 0xf8400000 /* str.w */,
+/* w s l */ 0xf8500000 /* ldr.w */,
+/* w s s */ 0xf8400000 /* str.w */,
+
+/* b u l */ 0xf8100000 /* ldrb.w */,
+/* b u s */ 0xf8000000 /* strb.w */,
+/* b s l */ 0xf9100000 /* ldrsb.w */,
+/* b s s */ 0xf8000000 /* strb.w */,
+
+/* h u l */ 0xf8300000 /* ldrh.w */,
+/* h u s */ 0xf8200000 /* strsh.w */,
+/* h s l */ 0xf9300000 /* ldrsh.w */,
+/* h s s */ 0xf8200000 /* strsh.w */,
+};
+
+/* Helper function. Dst should be reg + value, using at most 1 instruction, flags does not set. */
+static sljit_si emit_set_delta(struct sljit_compiler *compiler, sljit_si dst, sljit_si reg, sljit_sw value)
+{
+       if (value >= 0) {
+               if (value <= 0xfff)
+                       return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(value));
+               value = get_imm(value);
+               if (value != INVALID_IMM)
+                       return push_inst32(compiler, ADD_WI | RD4(dst) | RN4(reg) | value);
+       }
+       else {
+               value = -value;
+               if (value <= 0xfff)
+                       return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(value));
+               value = get_imm(value);
+               if (value != INVALID_IMM)
+                       return push_inst32(compiler, SUB_WI | RD4(dst) | RN4(reg) | value);
+       }
+       return SLJIT_ERR_UNSUPPORTED;
+}
+
+/* Can perform an operation using at most 1 instruction. */
+static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw)
+{
+       sljit_si other_r, shift;
+
+       SLJIT_ASSERT(arg & SLJIT_MEM);
+
+       if (SLJIT_UNLIKELY(flags & UPDATE)) {
+               if ((arg & REG_MASK) && !(arg & OFFS_REG_MASK) && argw <= 0xff && argw >= -0xff) {
+                       if (SLJIT_UNLIKELY(flags & ARG_TEST))
+                               return 1;
+
+                       flags &= ~UPDATE;
+                       arg &= 0xf;
+                       if (argw >= 0)
+                               argw |= 0x200;
+                       else {
+                               argw = -argw;
+                       }
+
+                       SLJIT_ASSERT(argw >= 0 && (argw & 0xff) <= 0xff);
+                       FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(arg) | 0x100 | argw));
+                       return -1;
+               }
+               return 0;
+       }
+
+       if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
+               if (SLJIT_UNLIKELY(flags & ARG_TEST))
+                       return 1;
+
+               argw &= 0x3;
+               other_r = OFFS_REG(arg);
+               arg &= 0xf;
+
+               if (!argw && IS_3_LO_REGS(reg, arg, other_r))
+                       FAIL_IF(push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(other_r)));
+               else
+                       FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(other_r) | (argw << 4)));
+               return -1;
+       }
+
+       if (!(arg & REG_MASK) || argw > 0xfff || argw < -0xff)
+               return 0;
+
+       if (SLJIT_UNLIKELY(flags & ARG_TEST))
+               return 1;
+
+       arg &= 0xf;
+       if (IS_2_LO_REGS(reg, arg) && sljit_mem16_imm5[flags]) {
+               shift = 3;
+               if (IS_WORD_SIZE(flags)) {
+                       if (OFFSET_CHECK(0x1f, 2))
+                               shift = 2;
+               }
+               else if (flags & BYTE_SIZE)
+               {
+                       if (OFFSET_CHECK(0x1f, 0))
+                               shift = 0;
+               }
+               else {
+                       SLJIT_ASSERT(flags & HALF_SIZE);
+                       if (OFFSET_CHECK(0x1f, 1))
+                               shift = 1;
+               }
+
+               if (shift != 3) {
+                       FAIL_IF(push_inst16(compiler, sljit_mem16_imm5[flags] | RD3(reg) | RN3(arg) | (argw << (6 - shift))));
+                       return -1;
+               }
+       }
+
+       /* SP based immediate. */
+       if (SLJIT_UNLIKELY(arg == SLJIT_LOCALS_REG) && OFFSET_CHECK(0xff, 2) && IS_WORD_SIZE(flags) && reg_map[reg] <= 7) {
+               FAIL_IF(push_inst16(compiler, STR_SP | ((flags & STORE) ? 0 : 0x800) | RDN3(reg) | (argw >> 2)));
+               return -1;
+       }
+
+       if (argw >= 0)
+               FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(arg) | argw));
+       else
+               FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(arg) | -argw));
+       return -1;
+}
+
+/* see getput_arg below.
+   Note: can_cache is called only for binary operators. Those
+   operators always uses word arguments without write back. */
+static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw)
+{
+       sljit_sw diff;
+       if ((arg & OFFS_REG_MASK) || !(next_arg & SLJIT_MEM))
+               return 0;
+
+       if (!(arg & REG_MASK)) {
+               diff = argw - next_argw;
+               if (diff <= 0xfff && diff >= -0xfff)
+                       return 1;
+               return 0;
+       }
+
+       if (argw == next_argw)
+               return 1;
+
+       diff = argw - next_argw;
+       if (arg == next_arg && diff <= 0xfff && diff >= -0xfff)
+               return 1;
+
+       return 0;
+}
+
+/* Emit the necessary instructions. See can_cache above. */
+static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg,
+       sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw)
+{
+       sljit_si tmp_r, other_r;
+       sljit_sw diff;
+
+       SLJIT_ASSERT(arg & SLJIT_MEM);
+       if (!(next_arg & SLJIT_MEM)) {
+               next_arg = 0;
+               next_argw = 0;
+       }
+
+       tmp_r = (flags & STORE) ? TMP_REG3 : reg;
+
+       if (SLJIT_UNLIKELY((flags & UPDATE) && (arg & REG_MASK))) {
+               /* Update only applies if a base register exists. */
+               /* There is no caching here. */
+               other_r = OFFS_REG(arg);
+               arg &= 0xf;
+               flags &= ~UPDATE;
+
+               if (!other_r) {
+                       if (!(argw & ~0xfff)) {
+                               FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(arg) | argw));
+                               return push_inst32(compiler, ADDWI | RD4(arg) | RN4(arg) | IMM12(argw));
+                       }
+
+                       if (compiler->cache_arg == SLJIT_MEM) {
+                               if (argw == compiler->cache_argw) {
+                                       other_r = TMP_REG3;
+                                       argw = 0;
+                               }
+                               else if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) {
+                                       FAIL_IF(compiler->error);
+                                       compiler->cache_argw = argw;
+                                       other_r = TMP_REG3;
+                                       argw = 0;
+                               }
+                       }
+
+                       if (argw) {
+                               FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
+                               compiler->cache_arg = SLJIT_MEM;
+                               compiler->cache_argw = argw;
+                               other_r = TMP_REG3;
+                               argw = 0;
+                       }
+               }
+
+               argw &= 0x3;
+               if (!argw && IS_3_LO_REGS(reg, arg, other_r)) {
+                       FAIL_IF(push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(other_r)));
+                       return push_inst16(compiler, ADD | SET_REGS44(arg, other_r));
+               }
+               FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(other_r) | (argw << 4)));
+               return push_inst32(compiler, ADD_W | RD4(arg) | RN4(arg) | RM4(other_r) | (argw << 6));
+       }
+       flags &= ~UPDATE;
+
+       SLJIT_ASSERT(!(arg & OFFS_REG_MASK));
+
+       if (compiler->cache_arg == arg) {
+               diff = argw - compiler->cache_argw;
+               if (!(diff & ~0xfff))
+                       return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(TMP_REG3) | diff);
+               if (!((compiler->cache_argw - argw) & ~0xff))
+                       return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(TMP_REG3) | (compiler->cache_argw - argw));
+               if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, diff) != SLJIT_ERR_UNSUPPORTED) {
+                       FAIL_IF(compiler->error);
+                       return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(TMP_REG3) | 0);
+               }
+       }
+
+       next_arg = (arg & REG_MASK) && (arg == next_arg) && (argw != next_argw);
+       arg &= 0xf;
+       if (arg && compiler->cache_arg == SLJIT_MEM) {
+               if (compiler->cache_argw == argw)
+                       return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(TMP_REG3));
+               if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) {
+                       FAIL_IF(compiler->error);
+                       compiler->cache_argw = argw;
+                       return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(TMP_REG3));
+               }
+       }
+
+       compiler->cache_argw = argw;
+       if (next_arg && emit_set_delta(compiler, TMP_REG3, arg, argw) != SLJIT_ERR_UNSUPPORTED) {
+               FAIL_IF(compiler->error);
+               compiler->cache_arg = SLJIT_MEM | arg;
+               arg = 0;
+       }
+       else {
+               FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
+               compiler->cache_arg = SLJIT_MEM;
+
+               diff = argw - next_argw;
+               if (next_arg && diff <= 0xfff && diff >= -0xfff) {
+                       FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG3, arg)));
+                       compiler->cache_arg = SLJIT_MEM | arg;
+                       arg = 0;
+               }
+       }
+
+       if (arg)
+               return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(TMP_REG3));
+       return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(TMP_REG3) | 0);
+}
+
+static SLJIT_INLINE sljit_si emit_op_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw)
+{
+       if (getput_arg_fast(compiler, flags, reg, arg, argw))
+               return compiler->error;
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+       return getput_arg(compiler, flags, reg, arg, argw, 0, 0);
+}
+
+static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg1, sljit_sw arg1w, sljit_si arg2, sljit_sw arg2w)
+{
+       if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
+               return compiler->error;
+       return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
+}
+
+/* --------------------------------------------------------------------- */
+/*  Entry, exit                                                          */
+/* --------------------------------------------------------------------- */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
+{
+       sljit_si size;
+       sljit_ins push;
+
+       CHECK_ERROR();
+       check_sljit_emit_enter(compiler, args, scratches, saveds, local_size);
+
+       compiler->scratches = scratches;
+       compiler->saveds = saveds;
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       compiler->logical_local_size = local_size;
+#endif
+
+       push = (1 << 4);
+       if (saveds >= 5)
+               push |= 1 << 11;
+       if (saveds >= 4)
+               push |= 1 << 10;
+       if (saveds >= 3)
+               push |= 1 << 8;
+       if (saveds >= 2)
+               push |= 1 << 7;
+       if (saveds >= 1)
+               push |= 1 << 6;
+        if (scratches >= 5)
+               push |= 1 << 5;
+       FAIL_IF(saveds >= 3
+               ? push_inst32(compiler, PUSH_W | (1 << 14) | push)
+               : push_inst16(compiler, PUSH | push));
+
+       /* Stack must be aligned to 8 bytes: */
+       size = (3 + saveds) * sizeof(sljit_uw);
+       local_size += size;
+       local_size = (local_size + 7) & ~7;
+       local_size -= size;
+       compiler->local_size = local_size;
+       if (local_size > 0) {
+               if (local_size <= (127 << 2))
+                       FAIL_IF(push_inst16(compiler, SUB_SP | (local_size >> 2)));
+               else
+                       FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_LOCALS_REG, SLJIT_LOCALS_REG, local_size));
+       }
+
+       if (args >= 1)
+               FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_SAVED_REG1, SLJIT_SCRATCH_REG1)));
+       if (args >= 2)
+               FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_SAVED_REG2, SLJIT_SCRATCH_REG2)));
+       if (args >= 3)
+               FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_SAVED_REG3, SLJIT_SCRATCH_REG3)));
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
+{
+       sljit_si size;
+
+       CHECK_ERROR_VOID();
+       check_sljit_set_context(compiler, args, scratches, saveds, local_size);
+
+       compiler->scratches = scratches;
+       compiler->saveds = saveds;
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       compiler->logical_local_size = local_size;
+#endif
+
+       size = (3 + saveds) * sizeof(sljit_uw);
+       local_size += size;
+       local_size = (local_size + 7) & ~7;
+       local_size -= size;
+       compiler->local_size = local_size;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
+{
+       sljit_ins pop;
+
+       CHECK_ERROR();
+       check_sljit_emit_return(compiler, op, src, srcw);
+
+       FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
+
+       if (compiler->local_size > 0) {
+               if (compiler->local_size <= (127 << 2))
+                       FAIL_IF(push_inst16(compiler, ADD_SP | (compiler->local_size >> 2)));
+               else
+                       FAIL_IF(emit_op_imm(compiler, SLJIT_ADD | ARG2_IMM, SLJIT_LOCALS_REG, SLJIT_LOCALS_REG, compiler->local_size));
+       }
+
+       pop = (1 << 4);
+       if (compiler->saveds >= 5)
+               pop |= 1 << 11;
+       if (compiler->saveds >= 4)
+               pop |= 1 << 10;
+       if (compiler->saveds >= 3)
+               pop |= 1 << 8;
+       if (compiler->saveds >= 2)
+               pop |= 1 << 7;
+       if (compiler->saveds >= 1)
+               pop |= 1 << 6;
+        if (compiler->scratches >= 5)
+               pop |= 1 << 5;
+       return compiler->saveds >= 3
+               ? push_inst32(compiler, POP_W | (1 << 15) | pop)
+               : push_inst16(compiler, POP | pop);
+}
+
+/* --------------------------------------------------------------------- */
+/*  Operators                                                            */
+/* --------------------------------------------------------------------- */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined(__GNUC__)
+extern unsigned int __aeabi_uidivmod(unsigned int numerator, int unsigned denominator);
+extern int __aeabi_idivmod(int numerator, int denominator);
+#else
+#error "Software divmod functions are needed"
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op)
+{
+       CHECK_ERROR();
+       check_sljit_emit_op0(compiler, op);
+
+       op = GET_OPCODE(op);
+       switch (op) {
+       case SLJIT_BREAKPOINT:
+               return push_inst16(compiler, BKPT);
+       case SLJIT_NOP:
+               return push_inst16(compiler, NOP);
+       case SLJIT_UMUL:
+       case SLJIT_SMUL:
+               return push_inst32(compiler, (op == SLJIT_UMUL ? UMULL : SMULL)
+                       | (reg_map[SLJIT_SCRATCH_REG2] << 8)
+                       | (reg_map[SLJIT_SCRATCH_REG1] << 12)
+                       | (reg_map[SLJIT_SCRATCH_REG1] << 16)
+                       | reg_map[SLJIT_SCRATCH_REG2]);
+       case SLJIT_UDIV:
+       case SLJIT_SDIV:
+               if (compiler->scratches >= 4) {
+                       FAIL_IF(push_inst32(compiler, 0xf84d2d04 /* str r2, [sp, #-4]! */));
+                       FAIL_IF(push_inst32(compiler, 0xf84dcd04 /* str ip, [sp, #-4]! */));
+               } else if (compiler->scratches >= 3)
+                       FAIL_IF(push_inst32(compiler, 0xf84d2d08 /* str r2, [sp, #-8]! */));
+#if defined(__GNUC__)
+               FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
+                       (op == SLJIT_UDIV ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod))));
+#else
+#error "Software divmod functions are needed"
+#endif
+               if (compiler->scratches >= 4) {
+                       FAIL_IF(push_inst32(compiler, 0xf85dcb04 /* ldr ip, [sp], #4 */));
+                       return push_inst32(compiler, 0xf85d2b04 /* ldr r2, [sp], #4 */);
+               } else if (compiler->scratches >= 3)
+                       return push_inst32(compiler, 0xf85d2b08 /* ldr r2, [sp], #8 */);
+               return SLJIT_SUCCESS;
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       sljit_si dst_r, flags;
+       sljit_si op_flags = GET_ALL_FLAGS(op);
+
+       CHECK_ERROR();
+       check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+
+       dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
+
+       op = GET_OPCODE(op);
+       if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) {
+               switch (op) {
+               case SLJIT_MOV:
+               case SLJIT_MOV_UI:
+               case SLJIT_MOV_SI:
+               case SLJIT_MOV_P:
+                       flags = WORD_SIZE;
+                       break;
+               case SLJIT_MOV_UB:
+                       flags = BYTE_SIZE;
+                       if (src & SLJIT_IMM)
+                               srcw = (sljit_ub)srcw;
+                       break;
+               case SLJIT_MOV_SB:
+                       flags = BYTE_SIZE | SIGNED;
+                       if (src & SLJIT_IMM)
+                               srcw = (sljit_sb)srcw;
+                       break;
+               case SLJIT_MOV_UH:
+                       flags = HALF_SIZE;
+                       if (src & SLJIT_IMM)
+                               srcw = (sljit_uh)srcw;
+                       break;
+               case SLJIT_MOV_SH:
+                       flags = HALF_SIZE | SIGNED;
+                       if (src & SLJIT_IMM)
+                               srcw = (sljit_sh)srcw;
+                       break;
+               case SLJIT_MOVU:
+               case SLJIT_MOVU_UI:
+               case SLJIT_MOVU_SI:
+               case SLJIT_MOVU_P:
+                       flags = WORD_SIZE | UPDATE;
+                       break;
+               case SLJIT_MOVU_UB:
+                       flags = BYTE_SIZE | UPDATE;
+                       if (src & SLJIT_IMM)
+                               srcw = (sljit_ub)srcw;
+                       break;
+               case SLJIT_MOVU_SB:
+                       flags = BYTE_SIZE | SIGNED | UPDATE;
+                       if (src & SLJIT_IMM)
+                               srcw = (sljit_sb)srcw;
+                       break;
+               case SLJIT_MOVU_UH:
+                       flags = HALF_SIZE | UPDATE;
+                       if (src & SLJIT_IMM)
+                               srcw = (sljit_uh)srcw;
+                       break;
+               case SLJIT_MOVU_SH:
+                       flags = HALF_SIZE | SIGNED | UPDATE;
+                       if (src & SLJIT_IMM)
+                               srcw = (sljit_sh)srcw;
+                       break;
+               default:
+                       SLJIT_ASSERT_STOP();
+                       flags = 0;
+                       break;
+               }
+
+               if (src & SLJIT_IMM)
+                       FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG1, srcw));
+               else if (src & SLJIT_MEM) {
+                       if (getput_arg_fast(compiler, flags, dst_r, src, srcw))
+                               FAIL_IF(compiler->error);
+                       else
+                               FAIL_IF(getput_arg(compiler, flags, dst_r, src, srcw, dst, dstw));
+               } else {
+                       if (dst_r != TMP_REG1)
+                               return emit_op_imm(compiler, op, dst_r, TMP_REG1, src);
+                       dst_r = src;
+               }
+
+               if (dst & SLJIT_MEM) {
+                       if (getput_arg_fast(compiler, flags | STORE, dst_r, dst, dstw))
+                               return compiler->error;
+                       else
+                               return getput_arg(compiler, flags | STORE, dst_r, dst, dstw, 0, 0);
+               }
+               return SLJIT_SUCCESS;
+       }
+
+       if (op == SLJIT_NEG) {
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
+               compiler->skip_checks = 1;
+#endif
+               return sljit_emit_op2(compiler, SLJIT_SUB | op_flags, dst, dstw, SLJIT_IMM, 0, src, srcw);
+       }
+
+       flags = (GET_FLAGS(op_flags) ? SET_FLAGS : 0) | ((op_flags & SLJIT_KEEP_FLAGS) ? KEEP_FLAGS : 0);
+       if (src & SLJIT_MEM) {
+               if (getput_arg_fast(compiler, WORD_SIZE, TMP_REG2, src, srcw))
+                       FAIL_IF(compiler->error);
+               else
+                       FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src, srcw, dst, dstw));
+               src = TMP_REG2;
+       }
+
+       if (src & SLJIT_IMM)
+               flags |= ARG2_IMM;
+       else
+               srcw = src;
+
+       emit_op_imm(compiler, flags | op, dst_r, TMP_REG1, srcw);
+
+       if (dst & SLJIT_MEM) {
+               if (getput_arg_fast(compiler, flags | STORE, dst_r, dst, dstw))
+                       return compiler->error;
+               else
+                       return getput_arg(compiler, flags | STORE, dst_r, dst, dstw, 0, 0);
+       }
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       sljit_si dst_r, flags;
+
+       CHECK_ERROR();
+       check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+       ADJUST_LOCAL_OFFSET(src1, src1w);
+       ADJUST_LOCAL_OFFSET(src2, src2w);
+
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+
+       dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
+       flags = (GET_FLAGS(op) ? SET_FLAGS : 0) | ((op & SLJIT_KEEP_FLAGS) ? KEEP_FLAGS : 0);
+
+       if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, WORD_SIZE | STORE | ARG_TEST, TMP_REG1, dst, dstw))
+               flags |= SLOW_DEST;
+
+       if (src1 & SLJIT_MEM) {
+               if (getput_arg_fast(compiler, WORD_SIZE, TMP_REG1, src1, src1w))
+                       FAIL_IF(compiler->error);
+               else
+                       flags |= SLOW_SRC1;
+       }
+       if (src2 & SLJIT_MEM) {
+               if (getput_arg_fast(compiler, WORD_SIZE, TMP_REG2, src2, src2w))
+                       FAIL_IF(compiler->error);
+               else
+                       flags |= SLOW_SRC2;
+       }
+
+       if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
+               if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
+                       FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src2, src2w, src1, src1w));
+                       FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG1, src1, src1w, dst, dstw));
+               }
+               else {
+                       FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG1, src1, src1w, src2, src2w));
+                       FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src2, src2w, dst, dstw));
+               }
+       }
+       else if (flags & SLOW_SRC1)
+               FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG1, src1, src1w, dst, dstw));
+       else if (flags & SLOW_SRC2)
+               FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src2, src2w, dst, dstw));
+
+       if (src1 & SLJIT_MEM)
+               src1 = TMP_REG1;
+       if (src2 & SLJIT_MEM)
+               src2 = TMP_REG2;
+
+       if (src1 & SLJIT_IMM)
+               flags |= ARG1_IMM;
+       else
+               src1w = src1;
+       if (src2 & SLJIT_IMM)
+               flags |= ARG2_IMM;
+       else
+               src2w = src2;
+
+       if (dst == SLJIT_UNUSED)
+               flags |= UNUSED_RETURN;
+
+       emit_op_imm(compiler, flags | GET_OPCODE(op), dst_r, src1w, src2w);
+
+       if (dst & SLJIT_MEM) {
+               if (!(flags & SLOW_DEST)) {
+                       getput_arg_fast(compiler, WORD_SIZE | STORE, dst_r, dst, dstw);
+                       return compiler->error;
+               }
+               return getput_arg(compiler, WORD_SIZE | STORE, TMP_REG1, dst, dstw, 0, 0);
+       }
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
+{
+       check_sljit_get_register_index(reg);
+       return reg_map[reg];
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg)
+{
+       check_sljit_get_float_register_index(reg);
+       return reg;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
+       void *instruction, sljit_si size)
+{
+       CHECK_ERROR();
+       check_sljit_emit_op_custom(compiler, instruction, size);
+       SLJIT_ASSERT(size == 2 || size == 4);
+
+       if (size == 2)
+               return push_inst16(compiler, *(sljit_uh*)instruction);
+       return push_inst32(compiler, *(sljit_ins*)instruction);
+}
+
+/* --------------------------------------------------------------------- */
+/*  Floating point operators                                             */
+/* --------------------------------------------------------------------- */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
+{
+#ifdef SLJIT_IS_FPU_AVAILABLE
+       return SLJIT_IS_FPU_AVAILABLE;
+#else
+       /* Available by default. */
+       return 1;
+#endif
+}
+
+#define FPU_LOAD (1 << 20)
+
+static sljit_si emit_fop_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw)
+{
+       sljit_sw tmp;
+       sljit_uw imm;
+       sljit_sw inst = VSTR_F32 | (flags & (SLJIT_SINGLE_OP | FPU_LOAD));
+
+       SLJIT_ASSERT(arg & SLJIT_MEM);
+
+       /* Fast loads and stores. */
+       if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
+               FAIL_IF(push_inst32(compiler, ADD_W | RD4(TMP_REG2) | RN4(arg & REG_MASK) | RM4(OFFS_REG(arg)) | ((argw & 0x3) << 6)));
+               arg = SLJIT_MEM | TMP_REG2;
+               argw = 0;
+       }
+
+       if ((arg & REG_MASK) && (argw & 0x3) == 0) {
+               if (!(argw & ~0x3fc))
+                       return push_inst32(compiler, inst | 0x800000 | RN4(arg & REG_MASK) | DD4(reg) | (argw >> 2));
+               if (!(-argw & ~0x3fc))
+                       return push_inst32(compiler, inst | RN4(arg & REG_MASK) | DD4(reg) | (-argw >> 2));
+       }
+
+       /* Slow cases */
+       SLJIT_ASSERT(!(arg & OFFS_REG_MASK));
+       if (compiler->cache_arg == arg) {
+               tmp = argw - compiler->cache_argw;
+               if (!(tmp & ~0x3fc))
+                       return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG3) | DD4(reg) | (tmp >> 2));
+               if (!(-tmp & ~0x3fc))
+                       return push_inst32(compiler, inst | RN4(TMP_REG3) | DD4(reg) | (-tmp >> 2));
+               if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, tmp) != SLJIT_ERR_UNSUPPORTED) {
+                       FAIL_IF(compiler->error);
+                       compiler->cache_argw = argw;
+                       return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG3) | DD4(reg));
+               }
+       }
+
+       if (arg & REG_MASK) {
+               if (emit_set_delta(compiler, TMP_REG1, arg & REG_MASK, argw) != SLJIT_ERR_UNSUPPORTED) {
+                       FAIL_IF(compiler->error);
+                       return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | DD4(reg));
+               }
+               imm = get_imm(argw & ~0x3fc);
+               if (imm != INVALID_IMM) {
+                       FAIL_IF(push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(arg & REG_MASK) | imm));
+                       return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | DD4(reg) | ((argw & 0x3fc) >> 2));
+               }
+               imm = get_imm(-argw & ~0x3fc);
+               if (imm != INVALID_IMM) {
+                       argw = -argw;
+                       FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(arg & REG_MASK) | imm));
+                       return push_inst32(compiler, inst | RN4(TMP_REG1) | DD4(reg) | ((argw & 0x3fc) >> 2));
+               }
+       }
+
+       compiler->cache_arg = arg;
+       compiler->cache_argw = argw;
+
+       FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
+       if (arg & REG_MASK)
+               FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG3, (arg & REG_MASK))));
+       return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG3) | DD4(reg));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       sljit_si dst_r;
+
+       CHECK_ERROR();
+       check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
+       SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100), float_transfer_bit_error);
+
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+       op ^= SLJIT_SINGLE_OP;
+
+       if (GET_OPCODE(op) == SLJIT_CMPD) {
+               if (dst & SLJIT_MEM) {
+                       emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, dst, dstw);
+                       dst = TMP_FREG1;
+               }
+               if (src & SLJIT_MEM) {
+                       emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG2, src, srcw);
+                       src = TMP_FREG2;
+               }
+               FAIL_IF(push_inst32(compiler, VCMP_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst) | DM4(src)));
+               return push_inst32(compiler, VMRS);
+       }
+
+       dst_r = (dst <= REG_MASK) ? dst : TMP_FREG1;
+       if (src & SLJIT_MEM) {
+               emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, dst_r, src, srcw);
+               src = dst_r;
+       }
+
+       switch (GET_OPCODE(op)) {
+       case SLJIT_MOVD:
+               if (src != dst_r)
+                       FAIL_IF(push_inst32(compiler, VMOV_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DM4(src)));
+               break;
+       case SLJIT_NEGD:
+               FAIL_IF(push_inst32(compiler, VNEG_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DM4(src)));
+               break;
+       case SLJIT_ABSD:
+               FAIL_IF(push_inst32(compiler, VABS_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DM4(src)));
+               break;
+       }
+
+       if (!(dst & SLJIT_MEM))
+               return SLJIT_SUCCESS;
+       return emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), TMP_FREG1, dst, dstw);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       sljit_si dst_r;
+
+       CHECK_ERROR();
+       check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
+
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+       op ^= SLJIT_SINGLE_OP;
+
+       dst_r = (dst <= REG_MASK) ? dst : TMP_FREG1;
+       if (src1 & SLJIT_MEM) {
+               emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, src1, src1w);
+               src1 = TMP_FREG1;
+       }
+       if (src2 & SLJIT_MEM) {
+               emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG2, src2, src2w);
+               src2 = TMP_FREG2;
+       }
+
+       switch (GET_OPCODE(op)) {
+       case SLJIT_ADDD:
+               FAIL_IF(push_inst32(compiler, VADD_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DN4(src1) | DM4(src2)));
+               break;
+       case SLJIT_SUBD:
+               FAIL_IF(push_inst32(compiler, VSUB_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DN4(src1) | DM4(src2)));
+               break;
+       case SLJIT_MULD:
+               FAIL_IF(push_inst32(compiler, VMUL_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DN4(src1) | DM4(src2)));
+               break;
+       case SLJIT_DIVD:
+               FAIL_IF(push_inst32(compiler, VDIV_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DN4(src1) | DM4(src2)));
+               break;
+       }
+
+       if (!(dst & SLJIT_MEM))
+               return SLJIT_SUCCESS;
+       return emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), TMP_FREG1, dst, dstw);
+}
+
+#undef FPU_LOAD
+
+/* --------------------------------------------------------------------- */
+/*  Other instructions                                                   */
+/* --------------------------------------------------------------------- */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw)
+{
+       CHECK_ERROR();
+       check_sljit_emit_fast_enter(compiler, dst, dstw);
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+
+       /* For UNUSED dst. Uncommon, but possible. */
+       if (dst == SLJIT_UNUSED)
+               return SLJIT_SUCCESS;
+
+       if (dst <= REG_MASK)
+               return push_inst16(compiler, MOV | SET_REGS44(dst, TMP_REG3));
+
+       /* Memory. */
+       if (getput_arg_fast(compiler, WORD_SIZE | STORE, TMP_REG3, dst, dstw))
+               return compiler->error;
+       /* TMP_REG3 is used for caching. */
+       FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG2, TMP_REG3)));
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+       return getput_arg(compiler, WORD_SIZE | STORE, TMP_REG2, dst, dstw, 0, 0);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw)
+{
+       CHECK_ERROR();
+       check_sljit_emit_fast_return(compiler, src, srcw);
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       if (src <= REG_MASK)
+               FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG3, src)));
+       else if (src & SLJIT_MEM) {
+               if (getput_arg_fast(compiler, WORD_SIZE, TMP_REG3, src, srcw))
+                       FAIL_IF(compiler->error);
+               else {
+                       compiler->cache_arg = 0;
+                       compiler->cache_argw = 0;
+                       FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src, srcw, 0, 0));
+                       FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG3, TMP_REG2)));
+               }
+       }
+       else if (src & SLJIT_IMM)
+               FAIL_IF(load_immediate(compiler, TMP_REG3, srcw));
+       return push_inst16(compiler, BLX | RN3(TMP_REG3));
+}
+
+/* --------------------------------------------------------------------- */
+/*  Conditional instructions                                             */
+/* --------------------------------------------------------------------- */
+
+static sljit_uw get_cc(sljit_si type)
+{
+       switch (type) {
+       case SLJIT_C_EQUAL:
+       case SLJIT_C_MUL_NOT_OVERFLOW:
+       case SLJIT_C_FLOAT_EQUAL:
+               return 0x0;
+
+       case SLJIT_C_NOT_EQUAL:
+       case SLJIT_C_MUL_OVERFLOW:
+       case SLJIT_C_FLOAT_NOT_EQUAL:
+               return 0x1;
+
+       case SLJIT_C_LESS:
+       case SLJIT_C_FLOAT_LESS:
+               return 0x3;
+
+       case SLJIT_C_GREATER_EQUAL:
+       case SLJIT_C_FLOAT_GREATER_EQUAL:
+               return 0x2;
+
+       case SLJIT_C_GREATER:
+       case SLJIT_C_FLOAT_GREATER:
+               return 0x8;
+
+       case SLJIT_C_LESS_EQUAL:
+       case SLJIT_C_FLOAT_LESS_EQUAL:
+               return 0x9;
+
+       case SLJIT_C_SIG_LESS:
+               return 0xb;
+
+       case SLJIT_C_SIG_GREATER_EQUAL:
+               return 0xa;
+
+       case SLJIT_C_SIG_GREATER:
+               return 0xc;
+
+       case SLJIT_C_SIG_LESS_EQUAL:
+               return 0xd;
+
+       case SLJIT_C_OVERFLOW:
+       case SLJIT_C_FLOAT_UNORDERED:
+               return 0x6;
+
+       case SLJIT_C_NOT_OVERFLOW:
+       case SLJIT_C_FLOAT_ORDERED:
+               return 0x7;
+
+       default: /* SLJIT_JUMP */
+               return 0xe;
+       }
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
+{
+       struct sljit_label *label;
+
+       CHECK_ERROR_PTR();
+       check_sljit_emit_label(compiler);
+
+       if (compiler->last_label && compiler->last_label->size == compiler->size)
+               return compiler->last_label;
+
+       label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
+       PTR_FAIL_IF(!label);
+       set_label(label, compiler);
+       return label;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type)
+{
+       struct sljit_jump *jump;
+       sljit_ins cc;
+
+       CHECK_ERROR_PTR();
+       check_sljit_emit_jump(compiler, type);
+
+       jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
+       PTR_FAIL_IF(!jump);
+       set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
+       type &= 0xff;
+
+       /* In ARM, we don't need to touch the arguments. */
+       PTR_FAIL_IF(emit_imm32_const(compiler, TMP_REG1, 0));
+       if (type < SLJIT_JUMP) {
+               jump->flags |= IS_COND;
+               cc = get_cc(type);
+               jump->flags |= cc << 8;
+               PTR_FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
+       }
+
+       jump->addr = compiler->size;
+       if (type <= SLJIT_JUMP)
+               PTR_FAIL_IF(push_inst16(compiler, BX | RN3(TMP_REG1)));
+       else {
+               jump->flags |= IS_BL;
+               PTR_FAIL_IF(push_inst16(compiler, BLX | RN3(TMP_REG1)));
+       }
+
+       return jump;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw)
+{
+       struct sljit_jump *jump;
+
+       CHECK_ERROR();
+       check_sljit_emit_ijump(compiler, type, src, srcw);
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       /* In ARM, we don't need to touch the arguments. */
+       if (!(src & SLJIT_IMM)) {
+               if (FAST_IS_REG(src))
+                       return push_inst16(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RN3(src));
+
+               FAIL_IF(emit_op_mem(compiler, WORD_SIZE, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, src, srcw));
+               if (type >= SLJIT_FAST_CALL)
+                       return push_inst16(compiler, BLX | RN3(TMP_REG1));
+       }
+
+       jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
+       FAIL_IF(!jump);
+       set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0));
+       jump->u.target = srcw;
+
+       FAIL_IF(emit_imm32_const(compiler, TMP_REG1, 0));
+       jump->addr = compiler->size;
+       return push_inst16(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RN3(TMP_REG1));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw,
+       sljit_si type)
+{
+       sljit_si dst_r, flags = GET_ALL_FLAGS(op);
+       sljit_ins cc, ins;
+
+       CHECK_ERROR();
+       check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type);
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       if (dst == SLJIT_UNUSED)
+               return SLJIT_SUCCESS;
+
+       op = GET_OPCODE(op);
+       cc = get_cc(type);
+       dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
+
+       if (op < SLJIT_ADD) {
+               FAIL_IF(push_inst16(compiler, IT | (cc << 4) | (((cc & 0x1) ^ 0x1) << 3) | 0x4));
+               if (reg_map[dst_r] > 7) {
+                       FAIL_IF(push_inst32(compiler, MOV_WI | RD4(dst_r) | 1));
+                       FAIL_IF(push_inst32(compiler, MOV_WI | RD4(dst_r) | 0));
+               } else {
+                       FAIL_IF(push_inst16(compiler, MOVSI | RDN3(dst_r) | 1));
+                       FAIL_IF(push_inst16(compiler, MOVSI | RDN3(dst_r) | 0));
+               }
+               if (dst_r != TMP_REG2)
+                       return SLJIT_SUCCESS;
+               return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, dst, dstw);
+       }
+
+       ins = (op == SLJIT_AND ? ANDI : (op == SLJIT_OR ? ORRI : EORI));
+       if ((op == SLJIT_OR || op == SLJIT_XOR) && FAST_IS_REG(dst) && dst == src) {
+               /* Does not change the other bits. */
+               FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
+               FAIL_IF(push_inst32(compiler, ins | RN4(src) | RD4(dst) | 1));
+               if (flags & SLJIT_SET_E) {
+                       /* The condition must always be set, even if the ORRI/EORI is not executed above. */
+                       if (reg_map[dst] <= 7)
+                               return push_inst16(compiler, MOVS | RD3(TMP_REG1) | RN3(dst));
+                       return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(TMP_REG1) | RM4(dst));
+               }
+               return SLJIT_SUCCESS;
+       }
+
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+       if (src & SLJIT_MEM) {
+               FAIL_IF(emit_op_mem2(compiler, WORD_SIZE, TMP_REG2, src, srcw, dst, dstw));
+               src = TMP_REG2;
+               srcw = 0;
+       } else if (src & SLJIT_IMM) {
+               FAIL_IF(load_immediate(compiler, TMP_REG2, srcw));
+               src = TMP_REG2;
+               srcw = 0;
+       }
+
+       if (op == SLJIT_AND || src != dst_r) {
+               FAIL_IF(push_inst16(compiler, IT | (cc << 4) | (((cc & 0x1) ^ 0x1) << 3) | 0x4));
+               FAIL_IF(push_inst32(compiler, ins | RN4(src) | RD4(dst_r) | 1));
+               FAIL_IF(push_inst32(compiler, ins | RN4(src) | RD4(dst_r) | 0));
+       }
+       else {
+               FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
+               FAIL_IF(push_inst32(compiler, ins | RN4(src) | RD4(dst_r) | 1));
+       }
+
+       if (dst_r == TMP_REG2)
+               FAIL_IF(emit_op_mem2(compiler, WORD_SIZE | STORE, TMP_REG2, dst, dstw, 0, 0));
+
+       if (flags & SLJIT_SET_E) {
+               /* The condition must always be set, even if the ORR/EORI is not executed above. */
+               if (reg_map[dst_r] <= 7)
+                       return push_inst16(compiler, MOVS | RD3(TMP_REG1) | RN3(dst_r));
+               return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(TMP_REG1) | RM4(dst_r));
+       }
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value)
+{
+       struct sljit_const *const_;
+       sljit_si dst_r;
+
+       CHECK_ERROR_PTR();
+       check_sljit_emit_const(compiler, dst, dstw, init_value);
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+
+       const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
+       PTR_FAIL_IF(!const_);
+       set_const(const_, compiler);
+
+       dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
+       PTR_FAIL_IF(emit_imm32_const(compiler, dst_r, init_value));
+
+       if (dst & SLJIT_MEM)
+               PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw));
+       return const_;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
+{
+       sljit_uh *inst = (sljit_uh*)addr;
+       modify_imm32_const(inst, new_addr);
+       SLJIT_CACHE_FLUSH(inst, inst + 4);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
+{
+       sljit_uh *inst = (sljit_uh*)addr;
+       modify_imm32_const(inst, new_constant);
+       SLJIT_CACHE_FLUSH(inst, inst + 4);
+}
diff --git a/ext/pcre/pcrelib/sljit/sljitNativeMIPS_32.c b/ext/pcre/pcrelib/sljit/sljitNativeMIPS_32.c
new file mode 100644 (file)
index 0000000..cb7c695
--- /dev/null
@@ -0,0 +1,366 @@
+/*
+ *    Stack-less Just-In-Time compiler
+ *
+ *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice, this list of
+ *      conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *      of conditions and the following disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* mips 32-bit arch dependent functions. */
+
+static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si dst_ar, sljit_sw imm)
+{
+       if (!(imm & ~0xffff))
+               return push_inst(compiler, ORI | SA(0) | TA(dst_ar) | IMM(imm), dst_ar);
+
+       if (imm < 0 && imm >= SIMM_MIN)
+               return push_inst(compiler, ADDIU | SA(0) | TA(dst_ar) | IMM(imm), dst_ar);
+
+       FAIL_IF(push_inst(compiler, LUI | TA(dst_ar) | IMM(imm >> 16), dst_ar));
+       return (imm & 0xffff) ? push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(imm), dst_ar) : SLJIT_SUCCESS;
+}
+
+#define EMIT_LOGICAL(op_imm, op_norm) \
+       if (flags & SRC2_IMM) { \
+               if (op & SLJIT_SET_E) \
+                       FAIL_IF(push_inst(compiler, op_imm | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); \
+               if (CHECK_FLAGS(SLJIT_SET_E)) \
+                       FAIL_IF(push_inst(compiler, op_imm | S(src1) | T(dst) | IMM(src2), DR(dst))); \
+       } \
+       else { \
+               if (op & SLJIT_SET_E) \
+                       FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); \
+               if (CHECK_FLAGS(SLJIT_SET_E)) \
+                       FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | D(dst), DR(dst))); \
+       }
+
+#define EMIT_SHIFT(op_imm, op_v) \
+       if (flags & SRC2_IMM) { \
+               if (op & SLJIT_SET_E) \
+                       FAIL_IF(push_inst(compiler, op_imm | T(src1) | DA(EQUAL_FLAG) | SH_IMM(src2), EQUAL_FLAG)); \
+               if (CHECK_FLAGS(SLJIT_SET_E)) \
+                       FAIL_IF(push_inst(compiler, op_imm | T(src1) | D(dst) | SH_IMM(src2), DR(dst))); \
+       } \
+       else { \
+               if (op & SLJIT_SET_E) \
+                       FAIL_IF(push_inst(compiler, op_v | S(src2) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); \
+               if (CHECK_FLAGS(SLJIT_SET_E)) \
+                       FAIL_IF(push_inst(compiler, op_v | S(src2) | T(src1) | D(dst), DR(dst))); \
+       }
+
+static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags,
+       sljit_si dst, sljit_si src1, sljit_sw src2)
+{
+       switch (GET_OPCODE(op)) {
+       case SLJIT_MOV:
+       case SLJIT_MOV_UI:
+       case SLJIT_MOV_SI:
+       case SLJIT_MOV_P:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+               if (dst != src2)
+                       return push_inst(compiler, ADDU | S(src2) | TA(0) | D(dst), DR(dst));
+               return SLJIT_SUCCESS;
+
+       case SLJIT_MOV_UB:
+       case SLJIT_MOV_SB:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+               if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
+                       if (op == SLJIT_MOV_SB) {
+#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64)
+                               return push_inst(compiler, SEB | T(src2) | D(dst), DR(dst));
+#else
+                               FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(24), DR(dst)));
+                               return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(24), DR(dst));
+#endif
+                       }
+                       return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst));
+               }
+               else if (dst != src2)
+                       SLJIT_ASSERT_STOP();
+               return SLJIT_SUCCESS;
+
+       case SLJIT_MOV_UH:
+       case SLJIT_MOV_SH:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+               if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
+                       if (op == SLJIT_MOV_SH) {
+#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64)
+                               return push_inst(compiler, SEH | T(src2) | D(dst), DR(dst));
+#else
+                               FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(16), DR(dst)));
+                               return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(16), DR(dst));
+#endif
+                       }
+                       return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst));
+               }
+               else if (dst != src2)
+                       SLJIT_ASSERT_STOP();
+               return SLJIT_SUCCESS;
+
+       case SLJIT_NOT:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+               if (op & SLJIT_SET_E)
+                       FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
+               if (CHECK_FLAGS(SLJIT_SET_E))
+                       FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | D(dst), DR(dst)));
+               return SLJIT_SUCCESS;
+
+       case SLJIT_CLZ:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64)
+               if (op & SLJIT_SET_E)
+                       FAIL_IF(push_inst(compiler, CLZ | S(src2) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
+               if (CHECK_FLAGS(SLJIT_SET_E))
+                       FAIL_IF(push_inst(compiler, CLZ | S(src2) | T(dst) | D(dst), DR(dst)));
+#else
+               if (SLJIT_UNLIKELY(flags & UNUSED_DEST)) {
+                       FAIL_IF(push_inst(compiler, SRL | T(src2) | DA(EQUAL_FLAG) | SH_IMM(31), EQUAL_FLAG));
+                       return push_inst(compiler, XORI | SA(EQUAL_FLAG) | TA(EQUAL_FLAG) | IMM(1), EQUAL_FLAG);
+               }
+               /* Nearly all instructions are unmovable in the following sequence. */
+               FAIL_IF(push_inst(compiler, ADDU | S(src2) | TA(0) | D(TMP_REG1), DR(TMP_REG1)));
+               /* Check zero. */
+               FAIL_IF(push_inst(compiler, BEQ | S(TMP_REG1) | TA(0) | IMM(5), UNMOVABLE_INS));
+               FAIL_IF(push_inst(compiler, ORI | SA(0) | T(dst) | IMM(32), UNMOVABLE_INS));
+               FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(dst) | IMM(-1), DR(dst)));
+               /* Loop for searching the highest bit. */
+               FAIL_IF(push_inst(compiler, ADDIU | S(dst) | T(dst) | IMM(1), DR(dst)));
+               FAIL_IF(push_inst(compiler, BGEZ | S(TMP_REG1) | IMM(-2), UNMOVABLE_INS));
+               FAIL_IF(push_inst(compiler, SLL | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(1), UNMOVABLE_INS));
+               if (op & SLJIT_SET_E)
+                       return push_inst(compiler, ADDU | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG);
+#endif
+               return SLJIT_SUCCESS;
+
+       case SLJIT_ADD:
+               if (flags & SRC2_IMM) {
+                       if (op & SLJIT_SET_O) {
+                               if (src2 >= 0)
+                                       FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(TMP_EREG1), TMP_EREG1));
+                               else
+                                       FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(TMP_EREG1), TMP_EREG1));
+                       }
+                       if (op & SLJIT_SET_E)
+                               FAIL_IF(push_inst(compiler, ADDIU | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
+                       if (op & (SLJIT_SET_C | SLJIT_SET_O)) {
+                               if (src2 >= 0)
+                                       FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG));
+                               else {
+                                       FAIL_IF(push_inst(compiler, ADDIU | SA(0) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG));
+                                       FAIL_IF(push_inst(compiler, OR | S(src1) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG));
+                               }
+                       }
+                       /* dst may be the same as src1 or src2. */
+                       if (CHECK_FLAGS(SLJIT_SET_E))
+                               FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(src2), DR(dst)));
+               }
+               else {
+                       if (op & SLJIT_SET_O)
+                               FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(TMP_EREG1), TMP_EREG1));
+                       if (op & SLJIT_SET_E)
+                               FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
+                       if (op & (SLJIT_SET_C | SLJIT_SET_O))
+                               FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(ULESS_FLAG), ULESS_FLAG));
+                       /* dst may be the same as src1 or src2. */
+                       if (CHECK_FLAGS(SLJIT_SET_E))
+                               FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | D(dst), DR(dst)));
+               }
+
+               /* a + b >= a | b (otherwise, the carry should be set to 1). */
+               if (op & (SLJIT_SET_C | SLJIT_SET_O))
+                       FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG));
+               if (!(op & SLJIT_SET_O))
+                       return SLJIT_SUCCESS;
+               FAIL_IF(push_inst(compiler, SLL | TA(ULESS_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG));
+               FAIL_IF(push_inst(compiler, XOR | SA(TMP_EREG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+               FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+               return push_inst(compiler, SLL | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG);
+
+       case SLJIT_ADDC:
+               if (flags & SRC2_IMM) {
+                       if (op & SLJIT_SET_C) {
+                               if (src2 >= 0)
+                                       FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(TMP_EREG1) | IMM(src2), TMP_EREG1));
+                               else {
+                                       FAIL_IF(push_inst(compiler, ADDIU | SA(0) | TA(TMP_EREG1) | IMM(src2), TMP_EREG1));
+                                       FAIL_IF(push_inst(compiler, OR | S(src1) | TA(TMP_EREG1) | DA(TMP_EREG1), TMP_EREG1));
+                               }
+                       }
+                       FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(src2), DR(dst)));
+               } else {
+                       if (op & SLJIT_SET_C)
+                               FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(TMP_EREG1), TMP_EREG1));
+                       /* dst may be the same as src1 or src2. */
+                       FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | D(dst), DR(dst)));
+               }
+               if (op & SLJIT_SET_C)
+                       FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(TMP_EREG1) | DA(TMP_EREG1), TMP_EREG1));
+
+               FAIL_IF(push_inst(compiler, ADDU | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst)));
+               if (!(op & SLJIT_SET_C))
+                       return SLJIT_SUCCESS;
+
+               /* Set ULESS_FLAG (dst == 0) && (ULESS_FLAG == 1). */
+               FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG));
+               /* Set carry flag. */
+               return push_inst(compiler, OR | SA(ULESS_FLAG) | TA(TMP_EREG1) | DA(ULESS_FLAG), ULESS_FLAG);
+
+       case SLJIT_SUB:
+               if ((flags & SRC2_IMM) && ((op & (SLJIT_SET_U | SLJIT_SET_S)) || src2 == SIMM_MIN)) {
+                       FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2)));
+                       src2 = TMP_REG2;
+                       flags &= ~SRC2_IMM;
+               }
+
+               if (flags & SRC2_IMM) {
+                       if (op & SLJIT_SET_O) {
+                               if (src2 >= 0)
+                                       FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(TMP_EREG1), TMP_EREG1));
+                               else
+                                       FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(TMP_EREG1), TMP_EREG1));
+                       }
+                       if (op & SLJIT_SET_E)
+                               FAIL_IF(push_inst(compiler, ADDIU | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG));
+                       if (op & (SLJIT_SET_C | SLJIT_SET_O))
+                               FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG));
+                       /* dst may be the same as src1 or src2. */
+                       if (CHECK_FLAGS(SLJIT_SET_E))
+                               FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(-src2), DR(dst)));
+               }
+               else {
+                       if (op & SLJIT_SET_O)
+                               FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(TMP_EREG1), TMP_EREG1));
+                       if (op & SLJIT_SET_E)
+                               FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
+                       if (op & (SLJIT_SET_U | SLJIT_SET_C | SLJIT_SET_O))
+                               FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(ULESS_FLAG), ULESS_FLAG));
+                       if (op & SLJIT_SET_U)
+                               FAIL_IF(push_inst(compiler, SLTU | S(src2) | T(src1) | DA(UGREATER_FLAG), UGREATER_FLAG));
+                       if (op & SLJIT_SET_S) {
+                               FAIL_IF(push_inst(compiler, SLT | S(src1) | T(src2) | DA(LESS_FLAG), LESS_FLAG));
+                               FAIL_IF(push_inst(compiler, SLT | S(src2) | T(src1) | DA(GREATER_FLAG), GREATER_FLAG));
+                       }
+                       /* dst may be the same as src1 or src2. */
+                       if (CHECK_FLAGS(SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_C))
+                               FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | D(dst), DR(dst)));
+               }
+
+               if (!(op & SLJIT_SET_O))
+                       return SLJIT_SUCCESS;
+               FAIL_IF(push_inst(compiler, SLL | TA(ULESS_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG));
+               FAIL_IF(push_inst(compiler, XOR | SA(TMP_EREG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+               FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+               return push_inst(compiler, SRL | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG);
+
+       case SLJIT_SUBC:
+               if ((flags & SRC2_IMM) && src2 == SIMM_MIN) {
+                       FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2)));
+                       src2 = TMP_REG2;
+                       flags &= ~SRC2_IMM;
+               }
+
+               if (flags & SRC2_IMM) {
+                       if (op & SLJIT_SET_C)
+                               FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(TMP_EREG1) | IMM(src2), TMP_EREG1));
+                       /* dst may be the same as src1 or src2. */
+                       FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(-src2), DR(dst)));
+               }
+               else {
+                       if (op & SLJIT_SET_C)
+                               FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(TMP_EREG1), TMP_EREG1));
+                       /* dst may be the same as src1 or src2. */
+                       FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | D(dst), DR(dst)));
+               }
+
+               if (op & SLJIT_SET_C)
+                       FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(TMP_EREG2), TMP_EREG2));
+
+               FAIL_IF(push_inst(compiler, SUBU | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst)));
+               return (op & SLJIT_SET_C) ? push_inst(compiler, OR | SA(TMP_EREG1) | TA(TMP_EREG2) | DA(ULESS_FLAG), ULESS_FLAG) : SLJIT_SUCCESS;
+
+       case SLJIT_MUL:
+               SLJIT_ASSERT(!(flags & SRC2_IMM));
+               if (!(op & SLJIT_SET_O)) {
+#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64)
+                       return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst));
+#else
+                       FAIL_IF(push_inst(compiler, MULT | S(src1) | T(src2), MOVABLE_INS));
+                       return push_inst(compiler, MFLO | D(dst), DR(dst));
+#endif
+               }
+               FAIL_IF(push_inst(compiler, MULT | S(src1) | T(src2), MOVABLE_INS));
+               FAIL_IF(push_inst(compiler, MFHI | DA(TMP_EREG1), TMP_EREG1));
+               FAIL_IF(push_inst(compiler, MFLO | D(dst), DR(dst)));
+               FAIL_IF(push_inst(compiler, SRA | T(dst) | DA(TMP_EREG2) | SH_IMM(31), TMP_EREG2));
+               return push_inst(compiler, SUBU | SA(TMP_EREG1) | TA(TMP_EREG2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG);
+
+       case SLJIT_AND:
+               EMIT_LOGICAL(ANDI, AND);
+               return SLJIT_SUCCESS;
+
+       case SLJIT_OR:
+               EMIT_LOGICAL(ORI, OR);
+               return SLJIT_SUCCESS;
+
+       case SLJIT_XOR:
+               EMIT_LOGICAL(XORI, XOR);
+               return SLJIT_SUCCESS;
+
+       case SLJIT_SHL:
+               EMIT_SHIFT(SLL, SLLV);
+               return SLJIT_SUCCESS;
+
+       case SLJIT_LSHR:
+               EMIT_SHIFT(SRL, SRLV);
+               return SLJIT_SUCCESS;
+
+       case SLJIT_ASHR:
+               EMIT_SHIFT(SRA, SRAV);
+               return SLJIT_SUCCESS;
+       }
+
+       SLJIT_ASSERT_STOP();
+       return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_si emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw init_value)
+{
+       FAIL_IF(push_inst(compiler, LUI | T(dst) | IMM(init_value >> 16), DR(dst)));
+       return push_inst(compiler, ORI | S(dst) | T(dst) | IMM(init_value), DR(dst));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
+{
+       sljit_ins *inst = (sljit_ins*)addr;
+
+       inst[0] = (inst[0] & 0xffff0000) | ((new_addr >> 16) & 0xffff);
+       inst[1] = (inst[1] & 0xffff0000) | (new_addr & 0xffff);
+       SLJIT_CACHE_FLUSH(inst, inst + 2);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
+{
+       sljit_ins *inst = (sljit_ins*)addr;
+
+       inst[0] = (inst[0] & 0xffff0000) | ((new_constant >> 16) & 0xffff);
+       inst[1] = (inst[1] & 0xffff0000) | (new_constant & 0xffff);
+       SLJIT_CACHE_FLUSH(inst, inst + 2);
+}
diff --git a/ext/pcre/pcrelib/sljit/sljitNativeMIPS_64.c b/ext/pcre/pcrelib/sljit/sljitNativeMIPS_64.c
new file mode 100644 (file)
index 0000000..df22eba
--- /dev/null
@@ -0,0 +1,469 @@
+/*
+ *    Stack-less Just-In-Time compiler
+ *
+ *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice, this list of
+ *      conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *      of conditions and the following disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* mips 64-bit arch dependent functions. */
+
+static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si dst_ar, sljit_sw imm)
+{
+       sljit_si shift = 32;
+       sljit_si shift2;
+       sljit_si inv = 0;
+       sljit_ins ins;
+       sljit_uw uimm;
+
+       if (!(imm & ~0xffff))
+               return push_inst(compiler, ORI | SA(0) | TA(dst_ar) | IMM(imm), dst_ar);
+
+       if (imm < 0 && imm >= SIMM_MIN)
+               return push_inst(compiler, ADDIU | SA(0) | TA(dst_ar) | IMM(imm), dst_ar);
+
+       if (imm <= 0x7fffffffl && imm >= -0x80000000l) {
+               FAIL_IF(push_inst(compiler, LUI | TA(dst_ar) | IMM(imm >> 16), dst_ar));
+               return (imm & 0xffff) ? push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(imm), dst_ar) : SLJIT_SUCCESS;
+       }
+
+       /* Zero extended number. */
+       uimm = imm;
+       if (imm < 0) {
+               uimm = ~imm;
+               inv = 1;
+       }
+
+       while (!(uimm & 0xff00000000000000l)) {
+               shift -= 8;
+               uimm <<= 8;
+       }
+
+       if (!(uimm & 0xf000000000000000l)) {
+               shift -= 4;
+               uimm <<= 4;
+       }
+
+       if (!(uimm & 0xc000000000000000l)) {
+               shift -= 2;
+               uimm <<= 2;
+       }
+
+       if ((sljit_sw)uimm < 0) {
+               uimm >>= 1;
+               shift += 1;
+       }
+       SLJIT_ASSERT(((uimm & 0xc000000000000000l) == 0x4000000000000000l) && (shift > 0) && (shift <= 32));
+
+       if (inv)
+               uimm = ~uimm;
+
+       FAIL_IF(push_inst(compiler, LUI | TA(dst_ar) | IMM(uimm >> 48), dst_ar));
+       if (uimm & 0x0000ffff00000000l)
+               FAIL_IF(push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(uimm >> 32), dst_ar));
+
+       imm &= (1l << shift) - 1;
+       if (!(imm & ~0xffff)) {
+               ins = (shift == 32) ? DSLL32 : DSLL;
+               if (shift < 32)
+                       ins |= SH_IMM(shift);
+               FAIL_IF(push_inst(compiler, ins | TA(dst_ar) | DA(dst_ar), dst_ar));
+               return !(imm & 0xffff) ? SLJIT_SUCCESS : push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(imm), dst_ar);
+       }
+
+       /* Double shifts needs to be performed. */
+       uimm <<= 32;
+       shift2 = shift - 16;
+
+       while (!(uimm & 0xf000000000000000l)) {
+               shift2 -= 4;
+               uimm <<= 4;
+       }
+
+       if (!(uimm & 0xc000000000000000l)) {
+               shift2 -= 2;
+               uimm <<= 2;
+       }
+
+       if (!(uimm & 0x8000000000000000l)) {
+               shift2--;
+               uimm <<= 1;
+       }
+
+       SLJIT_ASSERT((uimm & 0x8000000000000000l) && (shift2 > 0) && (shift2 <= 16));
+
+       FAIL_IF(push_inst(compiler, DSLL | TA(dst_ar) | DA(dst_ar) | SH_IMM(shift - shift2), dst_ar));
+       FAIL_IF(push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(uimm >> 48), dst_ar));
+       FAIL_IF(push_inst(compiler, DSLL | TA(dst_ar) | DA(dst_ar) | SH_IMM(shift2), dst_ar));
+
+       imm &= (1l << shift2) - 1;
+       return !(imm & 0xffff) ? SLJIT_SUCCESS : push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(imm), dst_ar);
+}
+
+#define SELECT_OP(a, b) \
+       (!(op & SLJIT_INT_OP) ? a : b)
+
+#define EMIT_LOGICAL(op_imm, op_norm) \
+       if (flags & SRC2_IMM) { \
+               if (op & SLJIT_SET_E) \
+                       FAIL_IF(push_inst(compiler, op_imm | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); \
+               if (CHECK_FLAGS(SLJIT_SET_E)) \
+                       FAIL_IF(push_inst(compiler, op_imm | S(src1) | T(dst) | IMM(src2), DR(dst))); \
+       } \
+       else { \
+               if (op & SLJIT_SET_E) \
+                       FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); \
+               if (CHECK_FLAGS(SLJIT_SET_E)) \
+                       FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | D(dst), DR(dst))); \
+       }
+
+#define EMIT_SHIFT(op_dimm, op_dimm32, op_imm, op_dv, op_v) \
+       if (flags & SRC2_IMM) { \
+               if (src2 >= 32) { \
+                       SLJIT_ASSERT(!(op & SLJIT_INT_OP)); \
+                       ins = op_dimm32; \
+                       src2 -= 32; \
+               } \
+               else \
+                       ins = (op & SLJIT_INT_OP) ? op_imm : op_dimm; \
+               if (op & SLJIT_SET_E) \
+                       FAIL_IF(push_inst(compiler, ins | T(src1) | DA(EQUAL_FLAG) | SH_IMM(src2), EQUAL_FLAG)); \
+               if (CHECK_FLAGS(SLJIT_SET_E)) \
+                       FAIL_IF(push_inst(compiler, ins | T(src1) | D(dst) | SH_IMM(src2), DR(dst))); \
+       } \
+       else { \
+               ins = (op & SLJIT_INT_OP) ? op_v : op_dv; \
+               if (op & SLJIT_SET_E) \
+                       FAIL_IF(push_inst(compiler, ins | S(src2) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); \
+               if (CHECK_FLAGS(SLJIT_SET_E)) \
+                       FAIL_IF(push_inst(compiler, ins | S(src2) | T(src1) | D(dst), DR(dst))); \
+       }
+
+static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags,
+       sljit_si dst, sljit_si src1, sljit_sw src2)
+{
+       sljit_ins ins;
+
+       switch (GET_OPCODE(op)) {
+       case SLJIT_MOV:
+       case SLJIT_MOV_P:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+               if (dst != src2)
+                       return push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src2) | TA(0) | D(dst), DR(dst));
+               return SLJIT_SUCCESS;
+
+       case SLJIT_MOV_UB:
+       case SLJIT_MOV_SB:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+               if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
+                       if (op == SLJIT_MOV_SB) {
+                               FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(24), DR(dst)));
+                               return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(24), DR(dst));
+                       }
+                       return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst));
+               }
+               else if (dst != src2)
+                       SLJIT_ASSERT_STOP();
+               return SLJIT_SUCCESS;
+
+       case SLJIT_MOV_UH:
+       case SLJIT_MOV_SH:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+               if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
+                       if (op == SLJIT_MOV_SH) {
+                               FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(16), DR(dst)));
+                               return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(16), DR(dst));
+                       }
+                       return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst));
+               }
+               else if (dst != src2)
+                       SLJIT_ASSERT_STOP();
+               return SLJIT_SUCCESS;
+
+       case SLJIT_MOV_UI:
+               SLJIT_ASSERT(!(op & SLJIT_INT_OP));
+               FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(0), DR(dst)));
+               return push_inst(compiler, DSRL32 | T(dst) | D(dst) | SH_IMM(0), DR(dst));
+
+       case SLJIT_MOV_SI:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+               return push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(0), DR(dst));
+
+       case SLJIT_NOT:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+               if (op & SLJIT_SET_E)
+                       FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
+               if (CHECK_FLAGS(SLJIT_SET_E))
+                       FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | D(dst), DR(dst)));
+               return SLJIT_SUCCESS;
+
+       case SLJIT_CLZ:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64)
+               if (op & SLJIT_SET_E)
+                       FAIL_IF(push_inst(compiler, SELECT_OP(DCLZ, CLZ) | S(src2) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
+               if (CHECK_FLAGS(SLJIT_SET_E))
+                       FAIL_IF(push_inst(compiler, SELECT_OP(DCLZ, CLZ) | S(src2) | T(dst) | D(dst), DR(dst)));
+#else
+               if (SLJIT_UNLIKELY(flags & UNUSED_DEST)) {
+                       FAIL_IF(push_inst(compiler, SELECT_OP(DSRL32, SRL) | T(src2) | DA(EQUAL_FLAG) | SH_IMM(31), EQUAL_FLAG));
+                       return push_inst(compiler, XORI | SA(EQUAL_FLAG) | TA(EQUAL_FLAG) | IMM(1), EQUAL_FLAG);
+               }
+               /* Nearly all instructions are unmovable in the following sequence. */
+               FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src2) | TA(0) | D(TMP_REG1), DR(TMP_REG1)));
+               /* Check zero. */
+               FAIL_IF(push_inst(compiler, BEQ | S(TMP_REG1) | TA(0) | IMM(5), UNMOVABLE_INS));
+               FAIL_IF(push_inst(compiler, ORI | SA(0) | T(dst) | IMM((op & SLJIT_INT_OP) ? 32 : 64), UNMOVABLE_INS));
+               FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | T(dst) | IMM(-1), DR(dst)));
+               /* Loop for searching the highest bit. */
+               FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(dst) | T(dst) | IMM(1), DR(dst)));
+               FAIL_IF(push_inst(compiler, BGEZ | S(TMP_REG1) | IMM(-2), UNMOVABLE_INS));
+               FAIL_IF(push_inst(compiler, SELECT_OP(DSLL, SLL) | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(1), UNMOVABLE_INS));
+               if (op & SLJIT_SET_E)
+                       return push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG);
+#endif
+               return SLJIT_SUCCESS;
+
+       case SLJIT_ADD:
+               if (flags & SRC2_IMM) {
+                       if (op & SLJIT_SET_O) {
+                               if (src2 >= 0)
+                                       FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(TMP_EREG1), TMP_EREG1));
+                               else
+                                       FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(TMP_EREG1), TMP_EREG1));
+                       }
+                       if (op & SLJIT_SET_E)
+                               FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
+                       if (op & (SLJIT_SET_C | SLJIT_SET_O)) {
+                               if (src2 >= 0)
+                                       FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG));
+                               else {
+                                       FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG));
+                                       FAIL_IF(push_inst(compiler, OR | S(src1) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG));
+                               }
+                       }
+                       /* dst may be the same as src1 or src2. */
+                       if (CHECK_FLAGS(SLJIT_SET_E))
+                               FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(src2), DR(dst)));
+               }
+               else {
+                       if (op & SLJIT_SET_O)
+                               FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(TMP_EREG1), TMP_EREG1));
+                       if (op & SLJIT_SET_E)
+                               FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
+                       if (op & (SLJIT_SET_C | SLJIT_SET_O))
+                               FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(ULESS_FLAG), ULESS_FLAG));
+                       /* dst may be the same as src1 or src2. */
+                       if (CHECK_FLAGS(SLJIT_SET_E))
+                               FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | D(dst), DR(dst)));
+               }
+
+               /* a + b >= a | b (otherwise, the carry should be set to 1). */
+               if (op & (SLJIT_SET_C | SLJIT_SET_O))
+                       FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG));
+               if (!(op & SLJIT_SET_O))
+                       return SLJIT_SUCCESS;
+               FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | TA(ULESS_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG));
+               FAIL_IF(push_inst(compiler, XOR | SA(TMP_EREG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+               FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+               return push_inst(compiler, SELECT_OP(DSRL32, SLL) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG);
+
+       case SLJIT_ADDC:
+               if (flags & SRC2_IMM) {
+                       if (op & SLJIT_SET_C) {
+                               if (src2 >= 0)
+                                       FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(TMP_EREG1) | IMM(src2), TMP_EREG1));
+                               else {
+                                       FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(TMP_EREG1) | IMM(src2), TMP_EREG1));
+                                       FAIL_IF(push_inst(compiler, OR | S(src1) | TA(TMP_EREG1) | DA(TMP_EREG1), TMP_EREG1));
+                               }
+                       }
+                       FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(src2), DR(dst)));
+               } else {
+                       if (op & SLJIT_SET_C)
+                               FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(TMP_EREG1), TMP_EREG1));
+                       /* dst may be the same as src1 or src2. */
+                       FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | D(dst), DR(dst)));
+               }
+               if (op & SLJIT_SET_C)
+                       FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(TMP_EREG1) | DA(TMP_EREG1), TMP_EREG1));
+
+               FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst)));
+               if (!(op & SLJIT_SET_C))
+                       return SLJIT_SUCCESS;
+
+               /* Set ULESS_FLAG (dst == 0) && (ULESS_FLAG == 1). */
+               FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG));
+               /* Set carry flag. */
+               return push_inst(compiler, OR | SA(ULESS_FLAG) | TA(TMP_EREG1) | DA(ULESS_FLAG), ULESS_FLAG);
+
+       case SLJIT_SUB:
+               if ((flags & SRC2_IMM) && ((op & (SLJIT_SET_U | SLJIT_SET_S)) || src2 == SIMM_MIN)) {
+                       FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2)));
+                       src2 = TMP_REG2;
+                       flags &= ~SRC2_IMM;
+               }
+
+               if (flags & SRC2_IMM) {
+                       if (op & SLJIT_SET_O) {
+                               if (src2 >= 0)
+                                       FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(TMP_EREG1), TMP_EREG1));
+                               else
+                                       FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(TMP_EREG1), TMP_EREG1));
+                       }
+                       if (op & SLJIT_SET_E)
+                               FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG));
+                       if (op & (SLJIT_SET_C | SLJIT_SET_O))
+                               FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG));
+                       /* dst may be the same as src1 or src2. */
+                       if (CHECK_FLAGS(SLJIT_SET_E))
+                               FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(-src2), DR(dst)));
+               }
+               else {
+                       if (op & SLJIT_SET_O)
+                               FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(TMP_EREG1), TMP_EREG1));
+                       if (op & SLJIT_SET_E)
+                               FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
+                       if (op & (SLJIT_SET_U | SLJIT_SET_C | SLJIT_SET_O))
+                               FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(ULESS_FLAG), ULESS_FLAG));
+                       if (op & SLJIT_SET_U)
+                               FAIL_IF(push_inst(compiler, SLTU | S(src2) | T(src1) | DA(UGREATER_FLAG), UGREATER_FLAG));
+                       if (op & SLJIT_SET_S) {
+                               FAIL_IF(push_inst(compiler, SLT | S(src1) | T(src2) | DA(LESS_FLAG), LESS_FLAG));
+                               FAIL_IF(push_inst(compiler, SLT | S(src2) | T(src1) | DA(GREATER_FLAG), GREATER_FLAG));
+                       }
+                       /* dst may be the same as src1 or src2. */
+                       if (CHECK_FLAGS(SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_C))
+                               FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | D(dst), DR(dst)));
+               }
+
+               if (!(op & SLJIT_SET_O))
+                       return SLJIT_SUCCESS;
+               FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | TA(ULESS_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG));
+               FAIL_IF(push_inst(compiler, XOR | SA(TMP_EREG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+               FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+               return push_inst(compiler, SELECT_OP(DSRL32, SRL) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG);
+
+       case SLJIT_SUBC:
+               if ((flags & SRC2_IMM) && src2 == SIMM_MIN) {
+                       FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2)));
+                       src2 = TMP_REG2;
+                       flags &= ~SRC2_IMM;
+               }
+
+               if (flags & SRC2_IMM) {
+                       if (op & SLJIT_SET_C)
+                               FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(TMP_EREG1) | IMM(src2), TMP_EREG1));
+                       /* dst may be the same as src1 or src2. */
+                       FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(-src2), DR(dst)));
+               }
+               else {
+                       if (op & SLJIT_SET_C)
+                               FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(TMP_EREG1), TMP_EREG1));
+                       /* dst may be the same as src1 or src2. */
+                       FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | D(dst), DR(dst)));
+               }
+
+               if (op & SLJIT_SET_C)
+                       FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(TMP_EREG2), TMP_EREG2));
+
+               FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst)));
+               return (op & SLJIT_SET_C) ? push_inst(compiler, OR | SA(TMP_EREG1) | TA(TMP_EREG2) | DA(ULESS_FLAG), ULESS_FLAG) : SLJIT_SUCCESS;
+
+       case SLJIT_MUL:
+               SLJIT_ASSERT(!(flags & SRC2_IMM));
+               if (!(op & SLJIT_SET_O)) {
+#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64)
+                       if (op & SLJIT_INT_OP)
+                               return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst));
+                       FAIL_IF(push_inst(compiler, DMULT | S(src1) | T(src2), MOVABLE_INS));
+                       return push_inst(compiler, MFLO | D(dst), DR(dst));
+#else
+                       FAIL_IF(push_inst(compiler, SELECT_OP(DMULT, MULT) | S(src1) | T(src2), MOVABLE_INS));
+                       return push_inst(compiler, MFLO | D(dst), DR(dst));
+#endif
+               }
+               FAIL_IF(push_inst(compiler, SELECT_OP(DMULT, MULT) | S(src1) | T(src2), MOVABLE_INS));
+               FAIL_IF(push_inst(compiler, MFHI | DA(TMP_EREG1), TMP_EREG1));
+               FAIL_IF(push_inst(compiler, MFLO | D(dst), DR(dst)));
+               FAIL_IF(push_inst(compiler, SELECT_OP(DSRA32, SRA) | T(dst) | DA(TMP_EREG2) | SH_IMM(31), TMP_EREG2));
+               return push_inst(compiler, SELECT_OP(DSUBU, SUBU) | SA(TMP_EREG1) | TA(TMP_EREG2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG);
+
+       case SLJIT_AND:
+               EMIT_LOGICAL(ANDI, AND);
+               return SLJIT_SUCCESS;
+
+       case SLJIT_OR:
+               EMIT_LOGICAL(ORI, OR);
+               return SLJIT_SUCCESS;
+
+       case SLJIT_XOR:
+               EMIT_LOGICAL(XORI, XOR);
+               return SLJIT_SUCCESS;
+
+       case SLJIT_SHL:
+               EMIT_SHIFT(DSLL, DSLL32, SLL, DSLLV, SLLV);
+               return SLJIT_SUCCESS;
+
+       case SLJIT_LSHR:
+               EMIT_SHIFT(DSRL, DSRL32, SRL, DSRLV, SRLV);
+               return SLJIT_SUCCESS;
+
+       case SLJIT_ASHR:
+               EMIT_SHIFT(DSRA, DSRA32, SRA, DSRAV, SRAV);
+               return SLJIT_SUCCESS;
+       }
+
+       SLJIT_ASSERT_STOP();
+       return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_si emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw init_value)
+{
+       FAIL_IF(push_inst(compiler, LUI | T(dst) | IMM(init_value >> 48), DR(dst)));
+       FAIL_IF(push_inst(compiler, ORI | S(dst) | T(dst) | IMM(init_value >> 32), DR(dst)));
+       FAIL_IF(push_inst(compiler, DSLL | T(dst) | D(dst) | SH_IMM(16), DR(dst)));
+       FAIL_IF(push_inst(compiler, ORI | S(dst) | T(dst) | IMM(init_value >> 16), DR(dst)));
+       FAIL_IF(push_inst(compiler, DSLL | T(dst) | D(dst) | SH_IMM(16), DR(dst)));
+       return push_inst(compiler, ORI | S(dst) | T(dst) | IMM(init_value), DR(dst));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
+{
+       sljit_ins *inst = (sljit_ins*)addr;
+
+       inst[0] = (inst[0] & 0xffff0000) | ((new_addr >> 48) & 0xffff);
+       inst[1] = (inst[1] & 0xffff0000) | ((new_addr >> 32) & 0xffff);
+       inst[3] = (inst[3] & 0xffff0000) | ((new_addr >> 16) & 0xffff);
+       inst[5] = (inst[5] & 0xffff0000) | (new_addr & 0xffff);
+       SLJIT_CACHE_FLUSH(inst, inst + 6);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
+{
+       sljit_ins *inst = (sljit_ins*)addr;
+
+       inst[0] = (inst[0] & 0xffff0000) | ((new_constant >> 48) & 0xffff);
+       inst[1] = (inst[1] & 0xffff0000) | ((new_constant >> 32) & 0xffff);
+       inst[3] = (inst[3] & 0xffff0000) | ((new_constant >> 16) & 0xffff);
+       inst[5] = (inst[5] & 0xffff0000) | (new_constant & 0xffff);
+       SLJIT_CACHE_FLUSH(inst, inst + 6);
+}
diff --git a/ext/pcre/pcrelib/sljit/sljitNativeMIPS_common.c b/ext/pcre/pcrelib/sljit/sljitNativeMIPS_common.c
new file mode 100644 (file)
index 0000000..011d887
--- /dev/null
@@ -0,0 +1,2042 @@
+/*
+ *    Stack-less Just-In-Time compiler
+ *
+ *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice, this list of
+ *      conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *      of conditions and the following disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* Latest MIPS architecture. */
+/* Automatically detect SLJIT_MIPS_32_64 */
+
+SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
+{
+#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64)
+       return "MIPS V" SLJIT_CPUINFO;
+#else
+       return "MIPS III" SLJIT_CPUINFO;
+#endif
+}
+
+/* Length of an instruction word
+   Both for mips-32 and mips-64 */
+typedef sljit_ui sljit_ins;
+
+#define TMP_REG1       (SLJIT_NO_REGISTERS + 1)
+#define TMP_REG2       (SLJIT_NO_REGISTERS + 2)
+#define TMP_REG3       (SLJIT_NO_REGISTERS + 3)
+
+/* For position independent code, t9 must contain the function address. */
+#define PIC_ADDR_REG   TMP_REG2
+
+/* TMP_EREGs are used mainly for arithmetic operations. */
+#define TMP_EREG1      15
+#define TMP_EREG2      24
+/* Floating point status register. */
+#define FCSR_REG       31
+/* Return address register. */
+#define RETURN_ADDR_REG        31
+
+/* Flags are keept in volatile registers. */
+#define EQUAL_FLAG     7
+/* And carry flag as well. */
+#define ULESS_FLAG     10
+#define UGREATER_FLAG  11
+#define LESS_FLAG      12
+#define GREATER_FLAG   13
+#define OVERFLOW_FLAG  14
+
+#define TMP_FREG1      (0)
+#define TMP_FREG2      ((SLJIT_FLOAT_REG6 + 1) << 1)
+
+static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
+       0, 2, 5, 6, 3, 8, 16, 17, 18, 19, 20, 29, 4, 25, 9
+};
+
+/* --------------------------------------------------------------------- */
+/*  Instrucion forms                                                     */
+/* --------------------------------------------------------------------- */
+
+#define S(s)           (reg_map[s] << 21)
+#define T(t)           (reg_map[t] << 16)
+#define D(d)           (reg_map[d] << 11)
+/* Absolute registers. */
+#define SA(s)          ((s) << 21)
+#define TA(t)          ((t) << 16)
+#define DA(d)          ((d) << 11)
+#define FT(t)          ((t) << 16)
+#define FS(s)          ((s) << 11)
+#define FD(d)          ((d) << 6)
+#define IMM(imm)       ((imm) & 0xffff)
+#define SH_IMM(imm)    ((imm) << 6)
+
+#define DR(dr)         (reg_map[dr])
+#define HI(opcode)     ((opcode) << 26)
+#define LO(opcode)     (opcode)
+/* S = (16 << 21) D = (17 << 21) */
+#define FMT_SD         (16 << 21)
+
+#define ABS_fmt                (HI(17) | FMT_SD | LO(5))
+#define ADD_fmt                (HI(17) | FMT_SD | LO(0))
+#define ADDIU          (HI(9))
+#define ADDU           (HI(0) | LO(33))
+#define AND            (HI(0) | LO(36))
+#define ANDI           (HI(12))
+#define B              (HI(4))
+#define BAL            (HI(1) | (17 << 16))
+#define BC1F           (HI(17) | (8 << 21))
+#define BC1T           (HI(17) | (8 << 21) | (1 << 16))
+#define BEQ            (HI(4))
+#define BGEZ           (HI(1) | (1 << 16))
+#define BGTZ           (HI(7))
+#define BLEZ           (HI(6))
+#define BLTZ           (HI(1) | (0 << 16))
+#define BNE            (HI(5))
+#define BREAK          (HI(0) | LO(13))
+#define CFC1           (HI(17) | (2 << 21))
+#define C_UN_fmt       (HI(17) | FMT_SD | LO(49))
+#define C_UEQ_fmt      (HI(17) | FMT_SD | LO(51))
+#define C_ULE_fmt      (HI(17) | FMT_SD | LO(55))
+#define C_ULT_fmt      (HI(17) | FMT_SD | LO(53))
+#define DADDIU         (HI(25))
+#define DADDU          (HI(0) | LO(45))
+#define DDIV           (HI(0) | LO(30))
+#define DDIVU          (HI(0) | LO(31))
+#define DIV            (HI(0) | LO(26))
+#define DIVU           (HI(0) | LO(27))
+#define DIV_fmt                (HI(17) | FMT_SD | LO(3))
+#define DMULT          (HI(0) | LO(28))
+#define DMULTU         (HI(0) | LO(29))
+#define DSLL           (HI(0) | LO(56))
+#define DSLL32         (HI(0) | LO(60))
+#define DSLLV          (HI(0) | LO(20))
+#define DSRA           (HI(0) | LO(59))
+#define DSRA32         (HI(0) | LO(63))
+#define DSRAV          (HI(0) | LO(23))
+#define DSRL           (HI(0) | LO(58))
+#define DSRL32         (HI(0) | LO(62))
+#define DSRLV          (HI(0) | LO(22))
+#define DSUBU          (HI(0) | LO(47))
+#define J              (HI(2))
+#define JAL            (HI(3))
+#define JALR           (HI(0) | LO(9))
+#define JR             (HI(0) | LO(8))
+#define LD             (HI(55))
+#define LUI            (HI(15))
+#define LW             (HI(35))
+#define MFHI           (HI(0) | LO(16))
+#define MFLO           (HI(0) | LO(18))
+#define MOV_fmt                (HI(17) | FMT_SD | LO(6))
+#define MUL_fmt                (HI(17) | FMT_SD | LO(2))
+#define MULT           (HI(0) | LO(24))
+#define MULTU          (HI(0) | LO(25))
+#define NEG_fmt                (HI(17) | FMT_SD | LO(7))
+#define NOP            (HI(0) | LO(0))
+#define NOR            (HI(0) | LO(39))
+#define OR             (HI(0) | LO(37))
+#define ORI            (HI(13))
+#define SD             (HI(63))
+#define SLT            (HI(0) | LO(42))
+#define SLTI           (HI(10))
+#define SLTIU          (HI(11))
+#define SLTU           (HI(0) | LO(43))
+#define SLL            (HI(0) | LO(0))
+#define SLLV           (HI(0) | LO(4))
+#define SRL            (HI(0) | LO(2))
+#define SRLV           (HI(0) | LO(6))
+#define SRA            (HI(0) | LO(3))
+#define SRAV           (HI(0) | LO(7))
+#define SUB_fmt                (HI(17) | FMT_SD | LO(1))
+#define SUBU           (HI(0) | LO(35))
+#define SW             (HI(43))
+#define XOR            (HI(0) | LO(38))
+#define XORI           (HI(14))
+
+#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64)
+#define CLZ            (HI(28) | LO(32))
+#define DCLZ           (HI(28) | LO(36))
+#define MUL            (HI(28) | LO(2))
+#define SEB            (HI(31) | (16 << 6) | LO(32))
+#define SEH            (HI(31) | (24 << 6) | LO(32))
+#endif
+
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+#define ADDU_W         ADDU
+#define ADDIU_W                ADDIU
+#define SLL_W          SLL
+#define SUBU_W         SUBU
+#else
+#define ADDU_W         DADDU
+#define ADDIU_W                DADDIU
+#define SLL_W          DSLL
+#define SUBU_W         DSUBU
+#endif
+
+#define SIMM_MAX       (0x7fff)
+#define SIMM_MIN       (-0x8000)
+#define UIMM_MAX       (0xffff)
+
+/* dest_reg is the absolute name of the register
+   Useful for reordering instructions in the delay slot. */
+static sljit_si push_inst(struct sljit_compiler *compiler, sljit_ins ins, sljit_si delay_slot)
+{
+       SLJIT_ASSERT(delay_slot == MOVABLE_INS || delay_slot >= UNMOVABLE_INS
+               || delay_slot == ((ins >> 11) & 0x1f) || delay_slot == ((ins >> 16) & 0x1f));
+       sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
+       FAIL_IF(!ptr);
+       *ptr = ins;
+       compiler->size++;
+       compiler->delay_slot = delay_slot;
+       return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_ins invert_branch(sljit_si flags)
+{
+       return (flags & IS_BIT26_COND) ? (1 << 26) : (1 << 16);
+}
+
+static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code)
+{
+       sljit_sw diff;
+       sljit_uw target_addr;
+       sljit_ins *inst;
+       sljit_ins saved_inst;
+
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+       if (jump->flags & (SLJIT_REWRITABLE_JUMP | IS_CALL))
+               return code_ptr;
+#else
+       if (jump->flags & SLJIT_REWRITABLE_JUMP)
+               return code_ptr;
+#endif
+
+       if (jump->flags & JUMP_ADDR)
+               target_addr = jump->u.target;
+       else {
+               SLJIT_ASSERT(jump->flags & JUMP_LABEL);
+               target_addr = (sljit_uw)(code + jump->u.label->size);
+       }
+       inst = (sljit_ins*)jump->addr;
+       if (jump->flags & IS_COND)
+               inst--;
+
+#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+       if (jump->flags & IS_CALL)
+               goto keep_address;
+#endif
+
+       /* B instructions. */
+       if (jump->flags & IS_MOVABLE) {
+               diff = ((sljit_sw)target_addr - (sljit_sw)(inst)) >> 2;
+               if (diff <= SIMM_MAX && diff >= SIMM_MIN) {
+                       jump->flags |= PATCH_B;
+
+                       if (!(jump->flags & IS_COND)) {
+                               inst[0] = inst[-1];
+                               inst[-1] = (jump->flags & IS_JAL) ? BAL : B;
+                               jump->addr -= sizeof(sljit_ins);
+                               return inst;
+                       }
+                       saved_inst = inst[0];
+                       inst[0] = inst[-1];
+                       inst[-1] = saved_inst ^ invert_branch(jump->flags);
+                       jump->addr -= 2 * sizeof(sljit_ins);
+                       return inst;
+               }
+       }
+       else {
+               diff = ((sljit_sw)target_addr - (sljit_sw)(inst + 1)) >> 2;
+               if (diff <= SIMM_MAX && diff >= SIMM_MIN) {
+                       jump->flags |= PATCH_B;
+
+                       if (!(jump->flags & IS_COND)) {
+                               inst[0] = (jump->flags & IS_JAL) ? BAL : B;
+                               inst[1] = NOP;
+                               return inst + 1;
+                       }
+                       inst[0] = inst[0] ^ invert_branch(jump->flags);
+                       inst[1] = NOP;
+                       jump->addr -= sizeof(sljit_ins);
+                       return inst + 1;
+               }
+       }
+
+       if (jump->flags & IS_COND) {
+               if ((jump->flags & IS_MOVABLE) && (target_addr & ~0xfffffff) == ((jump->addr + 2 * sizeof(sljit_ins)) & ~0xfffffff)) {
+                       jump->flags |= PATCH_J;
+                       saved_inst = inst[0];
+                       inst[0] = inst[-1];
+                       inst[-1] = (saved_inst & 0xffff0000) | 3;
+                       inst[1] = J;
+                       inst[2] = NOP;
+                       return inst + 2;
+               }
+               else if ((target_addr & ~0xfffffff) == ((jump->addr + 3 * sizeof(sljit_ins)) & ~0xfffffff)) {
+                       jump->flags |= PATCH_J;
+                       inst[0] = (inst[0] & 0xffff0000) | 3;
+                       inst[1] = NOP;
+                       inst[2] = J;
+                       inst[3] = NOP;
+                       jump->addr += sizeof(sljit_ins);
+                       return inst + 3;
+               }
+       }
+       else {
+               /* J instuctions. */
+               if ((jump->flags & IS_MOVABLE) && (target_addr & ~0xfffffff) == (jump->addr & ~0xfffffff)) {
+                       jump->flags |= PATCH_J;
+                       inst[0] = inst[-1];
+                       inst[-1] = (jump->flags & IS_JAL) ? JAL : J;
+                       jump->addr -= sizeof(sljit_ins);
+                       return inst;
+               }
+
+               if ((target_addr & ~0xfffffff) == ((jump->addr + sizeof(sljit_ins)) & ~0xfffffff)) {
+                       jump->flags |= PATCH_J;
+                       inst[0] = (jump->flags & IS_JAL) ? JAL : J;
+                       inst[1] = NOP;
+                       return inst + 1;
+               }
+       }
+
+#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+keep_address:
+       if (target_addr <= 0x7fffffff) {
+               jump->flags |= PATCH_ABS32;
+               if (jump->flags & IS_COND) {
+                       inst[0] -= 4;
+                       inst++;
+               }
+               inst[2] = inst[6];
+               inst[3] = inst[7];
+               return inst + 3;
+       }
+       if (target_addr <= 0x7fffffffffffl) {
+               jump->flags |= PATCH_ABS48;
+               if (jump->flags & IS_COND) {
+                       inst[0] -= 2;
+                       inst++;
+               }
+               inst[4] = inst[6];
+               inst[5] = inst[7];
+               return inst + 5;
+       }
+#endif
+
+       return code_ptr;
+}
+
+#ifdef __GNUC__
+static __attribute__ ((noinline)) void sljit_cache_flush(void* code, void* code_ptr)
+{
+       SLJIT_CACHE_FLUSH(code, code_ptr);
+}
+#endif
+
+SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
+{
+       struct sljit_memory_fragment *buf;
+       sljit_ins *code;
+       sljit_ins *code_ptr;
+       sljit_ins *buf_ptr;
+       sljit_ins *buf_end;
+       sljit_uw word_count;
+       sljit_uw addr;
+
+       struct sljit_label *label;
+       struct sljit_jump *jump;
+       struct sljit_const *const_;
+
+       CHECK_ERROR_PTR();
+       check_sljit_generate_code(compiler);
+       reverse_buf(compiler);
+
+       code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins));
+       PTR_FAIL_WITH_EXEC_IF(code);
+       buf = compiler->buf;
+
+       code_ptr = code;
+       word_count = 0;
+       label = compiler->labels;
+       jump = compiler->jumps;
+       const_ = compiler->consts;
+       do {
+               buf_ptr = (sljit_ins*)buf->memory;
+               buf_end = buf_ptr + (buf->used_size >> 2);
+               do {
+                       *code_ptr = *buf_ptr++;
+                       SLJIT_ASSERT(!label || label->size >= word_count);
+                       SLJIT_ASSERT(!jump || jump->addr >= word_count);
+                       SLJIT_ASSERT(!const_ || const_->addr >= word_count);
+                       /* These structures are ordered by their address. */
+                       if (label && label->size == word_count) {
+                               /* Just recording the address. */
+                               label->addr = (sljit_uw)code_ptr;
+                               label->size = code_ptr - code;
+                               label = label->next;
+                       }
+                       if (jump && jump->addr == word_count) {
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+                               jump->addr = (sljit_uw)(code_ptr - 3);
+#else
+                               jump->addr = (sljit_uw)(code_ptr - 7);
+#endif
+                               code_ptr = detect_jump_type(jump, code_ptr, code);
+                               jump = jump->next;
+                       }
+                       if (const_ && const_->addr == word_count) {
+                               /* Just recording the address. */
+                               const_->addr = (sljit_uw)code_ptr;
+                               const_ = const_->next;
+                       }
+                       code_ptr ++;
+                       word_count ++;
+               } while (buf_ptr < buf_end);
+
+               buf = buf->next;
+       } while (buf);
+
+       if (label && label->size == word_count) {
+               label->addr = (sljit_uw)code_ptr;
+               label->size = code_ptr - code;
+               label = label->next;
+       }
+
+       SLJIT_ASSERT(!label);
+       SLJIT_ASSERT(!jump);
+       SLJIT_ASSERT(!const_);
+       SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
+
+       jump = compiler->jumps;
+       while (jump) {
+               do {
+                       addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
+                       buf_ptr = (sljit_ins*)jump->addr;
+
+                       if (jump->flags & PATCH_B) {
+                               addr = (sljit_sw)(addr - (jump->addr + sizeof(sljit_ins))) >> 2;
+                               SLJIT_ASSERT((sljit_sw)addr <= SIMM_MAX && (sljit_sw)addr >= SIMM_MIN);
+                               buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | (addr & 0xffff);
+                               break;
+                       }
+                       if (jump->flags & PATCH_J) {
+                               SLJIT_ASSERT((addr & ~0xfffffff) == ((jump->addr + sizeof(sljit_ins)) & ~0xfffffff));
+                               buf_ptr[0] |= (addr >> 2) & 0x03ffffff;
+                               break;
+                       }
+
+                       /* Set the fields of immediate loads. */
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+                       buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff);
+                       buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff);
+#else
+                       if (jump->flags & PATCH_ABS32) {
+                               SLJIT_ASSERT(addr <= 0x7fffffff);
+                               buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff);
+                               buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff);
+                       }
+                       else if (jump->flags & PATCH_ABS48) {
+                               SLJIT_ASSERT(addr <= 0x7fffffffffffl);
+                               buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 32) & 0xffff);
+                               buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 16) & 0xffff);
+                               buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | (addr & 0xffff);
+                       }
+                       else {
+                               buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 48) & 0xffff);
+                               buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 32) & 0xffff);
+                               buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | ((addr >> 16) & 0xffff);
+                               buf_ptr[5] = (buf_ptr[5] & 0xffff0000) | (addr & 0xffff);
+                       }
+#endif
+               } while (0);
+               jump = jump->next;
+       }
+
+       compiler->error = SLJIT_ERR_COMPILED;
+       compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
+#ifndef __GNUC__
+       SLJIT_CACHE_FLUSH(code, code_ptr);
+#else
+       /* GCC workaround for invalid code generation with -O2. */
+       sljit_cache_flush(code, code_ptr);
+#endif
+       return code;
+}
+
+/* --------------------------------------------------------------------- */
+/*  Entry, exit                                                          */
+/* --------------------------------------------------------------------- */
+
+/* Creates an index in data_transfer_insts array. */
+#define LOAD_DATA      0x01
+#define WORD_DATA      0x00
+#define BYTE_DATA      0x02
+#define HALF_DATA      0x04
+#define INT_DATA       0x06
+#define SIGNED_DATA    0x08
+/* Separates integer and floating point registers */
+#define GPR_REG                0x0f
+#define DOUBLE_DATA    0x10
+
+#define MEM_MASK       0x1f
+
+#define WRITE_BACK     0x00020
+#define ARG_TEST       0x00040
+#define ALT_KEEP_CACHE 0x00080
+#define CUMULATIVE_OP  0x00100
+#define LOGICAL_OP     0x00200
+#define IMM_OP         0x00400
+#define SRC2_IMM       0x00800
+
+#define UNUSED_DEST    0x01000
+#define REG_DEST       0x02000
+#define REG1_SOURCE    0x04000
+#define REG2_SOURCE    0x08000
+#define SLOW_SRC1      0x10000
+#define SLOW_SRC2      0x20000
+#define SLOW_DEST      0x40000
+
+/* Only these flags are set. UNUSED_DEST is not set when no flags should be set. */
+#define CHECK_FLAGS(list) \
+       (!(flags & UNUSED_DEST) || (op & GET_FLAGS(~(list))))
+
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+#define STACK_STORE    SW
+#define STACK_LOAD     LW
+#else
+#define STACK_STORE    SD
+#define STACK_LOAD     LD
+#endif
+
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+#include "sljitNativeMIPS_32.c"
+#else
+#include "sljitNativeMIPS_64.c"
+#endif
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
+{
+       sljit_ins base;
+
+       CHECK_ERROR();
+       check_sljit_emit_enter(compiler, args, scratches, saveds, local_size);
+
+       compiler->scratches = scratches;
+       compiler->saveds = saveds;
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       compiler->logical_local_size = local_size;
+#endif
+
+       local_size += (saveds + 1 + 4) * sizeof(sljit_sw);
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+       local_size = (local_size + 15) & ~0xf;
+#else
+       local_size = (local_size + 31) & ~0x1f;
+#endif
+       compiler->local_size = local_size;
+
+       if (local_size <= SIMM_MAX) {
+               /* Frequent case. */
+               FAIL_IF(push_inst(compiler, ADDIU_W | S(SLJIT_LOCALS_REG) | T(SLJIT_LOCALS_REG) | IMM(-local_size), DR(SLJIT_LOCALS_REG)));
+               base = S(SLJIT_LOCALS_REG);
+       }
+       else {
+               FAIL_IF(load_immediate(compiler, DR(TMP_REG1), local_size));
+               FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_LOCALS_REG) | TA(0) | D(TMP_REG2), DR(TMP_REG2)));
+               FAIL_IF(push_inst(compiler, SUBU_W | S(SLJIT_LOCALS_REG) | T(TMP_REG1) | D(SLJIT_LOCALS_REG), DR(SLJIT_LOCALS_REG)));
+               base = S(TMP_REG2);
+               local_size = 0;
+       }
+
+       FAIL_IF(push_inst(compiler, STACK_STORE | base | TA(RETURN_ADDR_REG) | IMM(local_size - 1 * (sljit_si)sizeof(sljit_sw)), MOVABLE_INS));
+       if (saveds >= 1)
+               FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_REG1) | IMM(local_size - 2 * (sljit_si)sizeof(sljit_sw)), MOVABLE_INS));
+       if (saveds >= 2)
+               FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_REG2) | IMM(local_size - 3 * (sljit_si)sizeof(sljit_sw)), MOVABLE_INS));
+       if (saveds >= 3)
+               FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_REG3) | IMM(local_size - 4 * (sljit_si)sizeof(sljit_sw)), MOVABLE_INS));
+       if (saveds >= 4)
+               FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_EREG1) | IMM(local_size - 5 * (sljit_si)sizeof(sljit_sw)), MOVABLE_INS));
+       if (saveds >= 5)
+               FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_EREG2) | IMM(local_size - 6 * (sljit_si)sizeof(sljit_sw)), MOVABLE_INS));
+
+       if (args >= 1)
+               FAIL_IF(push_inst(compiler, ADDU_W | SA(4) | TA(0) | D(SLJIT_SAVED_REG1), DR(SLJIT_SAVED_REG1)));
+       if (args >= 2)
+               FAIL_IF(push_inst(compiler, ADDU_W | SA(5) | TA(0) | D(SLJIT_SAVED_REG2), DR(SLJIT_SAVED_REG2)));
+       if (args >= 3)
+               FAIL_IF(push_inst(compiler, ADDU_W | SA(6) | TA(0) | D(SLJIT_SAVED_REG3), DR(SLJIT_SAVED_REG3)));
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
+{
+       CHECK_ERROR_VOID();
+       check_sljit_set_context(compiler, args, scratches, saveds, local_size);
+
+       compiler->scratches = scratches;
+       compiler->saveds = saveds;
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       compiler->logical_local_size = local_size;
+#endif
+
+       local_size += (saveds + 1 + 4) * sizeof(sljit_sw);
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+       compiler->local_size = (local_size + 15) & ~0xf;
+#else
+       compiler->local_size = (local_size + 31) & ~0x1f;
+#endif
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
+{
+       sljit_si local_size;
+       sljit_ins base;
+
+       CHECK_ERROR();
+       check_sljit_emit_return(compiler, op, src, srcw);
+
+       FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
+
+       local_size = compiler->local_size;
+       if (local_size <= SIMM_MAX)
+               base = S(SLJIT_LOCALS_REG);
+       else {
+               FAIL_IF(load_immediate(compiler, DR(TMP_REG1), local_size));
+               FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_LOCALS_REG) | T(TMP_REG1) | D(TMP_REG1), DR(TMP_REG1)));
+               base = S(TMP_REG1);
+               local_size = 0;
+       }
+
+       FAIL_IF(push_inst(compiler, STACK_LOAD | base | TA(RETURN_ADDR_REG) | IMM(local_size - 1 * (sljit_si)sizeof(sljit_sw)), RETURN_ADDR_REG));
+       if (compiler->saveds >= 5)
+               FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_EREG2) | IMM(local_size - 6 * (sljit_si)sizeof(sljit_sw)), DR(SLJIT_SAVED_EREG2)));
+       if (compiler->saveds >= 4)
+               FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_EREG1) | IMM(local_size - 5 * (sljit_si)sizeof(sljit_sw)), DR(SLJIT_SAVED_EREG1)));
+       if (compiler->saveds >= 3)
+               FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_REG3) | IMM(local_size - 4 * (sljit_si)sizeof(sljit_sw)), DR(SLJIT_SAVED_REG3)));
+       if (compiler->saveds >= 2)
+               FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_REG2) | IMM(local_size - 3 * (sljit_si)sizeof(sljit_sw)), DR(SLJIT_SAVED_REG2)));
+       if (compiler->saveds >= 1)
+               FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_REG1) | IMM(local_size - 2 * (sljit_si)sizeof(sljit_sw)), DR(SLJIT_SAVED_REG1)));
+
+       FAIL_IF(push_inst(compiler, JR | SA(RETURN_ADDR_REG), UNMOVABLE_INS));
+       if (compiler->local_size <= SIMM_MAX)
+               return push_inst(compiler, ADDIU_W | S(SLJIT_LOCALS_REG) | T(SLJIT_LOCALS_REG) | IMM(compiler->local_size), UNMOVABLE_INS);
+       else
+               return push_inst(compiler, ADDU_W | S(TMP_REG1) | TA(0) | D(SLJIT_LOCALS_REG), UNMOVABLE_INS);
+}
+
+#undef STACK_STORE
+#undef STACK_LOAD
+
+/* --------------------------------------------------------------------- */
+/*  Operators                                                            */
+/* --------------------------------------------------------------------- */
+
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+#define ARCH_32_64(a, b)       a
+#else
+#define ARCH_32_64(a, b)       b
+#endif
+
+static SLJIT_CONST sljit_ins data_transfer_insts[16 + 4] = {
+/* u w s */ ARCH_32_64(HI(43) /* sw */, HI(63) /* sd */),
+/* u w l */ ARCH_32_64(HI(35) /* lw */, HI(55) /* ld */),
+/* u b s */ HI(40) /* sb */,
+/* u b l */ HI(36) /* lbu */,
+/* u h s */ HI(41) /* sh */,
+/* u h l */ HI(37) /* lhu */,
+/* u i s */ HI(43) /* sw */,
+/* u i l */ ARCH_32_64(HI(35) /* lw */, HI(39) /* lwu */),
+
+/* s w s */ ARCH_32_64(HI(43) /* sw */, HI(63) /* sd */),
+/* s w l */ ARCH_32_64(HI(35) /* lw */, HI(55) /* ld */),
+/* s b s */ HI(40) /* sb */,
+/* s b l */ HI(32) /* lb */,
+/* s h s */ HI(41) /* sh */,
+/* s h l */ HI(33) /* lh */,
+/* s i s */ HI(43) /* sw */,
+/* s i l */ HI(35) /* lw */,
+
+/* d   s */ HI(61) /* sdc1 */,
+/* d   l */ HI(53) /* ldc1 */,
+/* s   s */ HI(57) /* swc1 */,
+/* s   l */ HI(49) /* lwc1 */,
+};
+
+#undef ARCH_32_64
+
+/* reg_ar is an absoulute register! */
+
+/* Can perform an operation using at most 1 instruction. */
+static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg_ar, sljit_si arg, sljit_sw argw)
+{
+       SLJIT_ASSERT(arg & SLJIT_MEM);
+
+       if ((!(flags & WRITE_BACK) || !(arg & REG_MASK)) && !(arg & OFFS_REG_MASK) && argw <= SIMM_MAX && argw >= SIMM_MIN) {
+               /* Works for both absoulte and relative addresses. */
+               if (SLJIT_UNLIKELY(flags & ARG_TEST))
+                       return 1;
+               FAIL_IF(push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(arg & REG_MASK)
+                       | TA(reg_ar) | IMM(argw), ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) ? reg_ar : MOVABLE_INS));
+               return -1;
+       }
+       return 0;
+}
+
+/* See getput_arg below.
+   Note: can_cache is called only for binary operators. Those
+   operators always uses word arguments without write back. */
+static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw)
+{
+       SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM));
+
+       /* Simple operation except for updates. */
+       if (arg & OFFS_REG_MASK) {
+               argw &= 0x3;
+               next_argw &= 0x3;
+               if (argw && argw == next_argw && (arg == next_arg || (arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK)))
+                       return 1;
+               return 0;
+       }
+
+       if (arg == next_arg) {
+               if (((next_argw - argw) <= SIMM_MAX && (next_argw - argw) >= SIMM_MIN))
+                       return 1;
+               return 0;
+       }
+
+       return 0;
+}
+
+/* Emit the necessary instructions. See can_cache above. */
+static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg_ar, sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw)
+{
+       sljit_si tmp_ar, base, delay_slot;
+
+       SLJIT_ASSERT(arg & SLJIT_MEM);
+       if (!(next_arg & SLJIT_MEM)) {
+               next_arg = 0;
+               next_argw = 0;
+       }
+
+       if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) {
+               tmp_ar = reg_ar;
+               delay_slot = reg_ar;
+       } else {
+               tmp_ar = DR(TMP_REG1);
+               delay_slot = MOVABLE_INS;
+       }
+       base = arg & REG_MASK;
+
+       if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
+               argw &= 0x3;
+               if ((flags & WRITE_BACK) && reg_ar == DR(base)) {
+                       SLJIT_ASSERT(!(flags & LOAD_DATA) && DR(TMP_REG1) != reg_ar);
+                       FAIL_IF(push_inst(compiler, ADDU_W | SA(reg_ar) | TA(0) | D(TMP_REG1), DR(TMP_REG1)));
+                       reg_ar = DR(TMP_REG1);
+               }
+
+               /* Using the cache. */
+               if (argw == compiler->cache_argw) {
+                       if (!(flags & WRITE_BACK)) {
+                               if (arg == compiler->cache_arg)
+                                       return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot);
+                               if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) {
+                                       if (arg == next_arg && argw == (next_argw & 0x3)) {
+                                               compiler->cache_arg = arg;
+                                               compiler->cache_argw = argw;
+                                               FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | D(TMP_REG3), DR(TMP_REG3)));
+                                               return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot);
+                                       }
+                                       FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | DA(tmp_ar), tmp_ar));
+                                       return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot);
+                               }
+                       }
+                       else {
+                               if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) {
+                                       FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | D(base), DR(base)));
+                                       return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar), delay_slot);
+                               }
+                       }
+               }
+
+               if (SLJIT_UNLIKELY(argw)) {
+                       compiler->cache_arg = SLJIT_MEM | (arg & OFFS_REG_MASK);
+                       compiler->cache_argw = argw;
+                       FAIL_IF(push_inst(compiler, SLL_W | T(OFFS_REG(arg)) | D(TMP_REG3) | SH_IMM(argw), DR(TMP_REG3)));
+               }
+
+               if (!(flags & WRITE_BACK)) {
+                       if (arg == next_arg && argw == (next_argw & 0x3)) {
+                               compiler->cache_arg = arg;
+                               compiler->cache_argw = argw;
+                               FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(!argw ? OFFS_REG(arg) : TMP_REG3) | D(TMP_REG3), DR(TMP_REG3)));
+                               tmp_ar = DR(TMP_REG3);
+                       }
+                       else
+                               FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(!argw ? OFFS_REG(arg) : TMP_REG3) | DA(tmp_ar), tmp_ar));
+                       return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot);
+               }
+               FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(!argw ? OFFS_REG(arg) : TMP_REG3) | D(base), DR(base)));
+               return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar), delay_slot);
+       }
+
+       if (SLJIT_UNLIKELY(flags & WRITE_BACK) && base) {
+               /* Update only applies if a base register exists. */
+               if (reg_ar == DR(base)) {
+                       SLJIT_ASSERT(!(flags & LOAD_DATA) && DR(TMP_REG1) != reg_ar);
+                       if (argw <= SIMM_MAX && argw >= SIMM_MIN) {
+                               FAIL_IF(push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar) | IMM(argw), MOVABLE_INS));
+                               if (argw)
+                                       return push_inst(compiler, ADDIU_W | S(base) | T(base) | IMM(argw), DR(base));
+                               return SLJIT_SUCCESS;
+                       }
+                       FAIL_IF(push_inst(compiler, ADDU_W | SA(reg_ar) | TA(0) | D(TMP_REG1), DR(TMP_REG1)));
+                       reg_ar = DR(TMP_REG1);
+               }
+
+               if (argw <= SIMM_MAX && argw >= SIMM_MIN) {
+                       if (argw)
+                               FAIL_IF(push_inst(compiler, ADDIU_W | S(base) | T(base) | IMM(argw), DR(base)));
+               }
+               else {
+                       if (compiler->cache_arg == SLJIT_MEM && argw - compiler->cache_argw <= SIMM_MAX && argw - compiler->cache_argw >= SIMM_MIN) {
+                               if (argw != compiler->cache_argw) {
+                                       FAIL_IF(push_inst(compiler, ADDIU_W | S(TMP_REG3) | T(TMP_REG3) | IMM(argw - compiler->cache_argw), DR(TMP_REG3)));
+                                       compiler->cache_argw = argw;
+                               }
+                               FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | D(base), DR(base)));
+                       }
+                       else {
+                               compiler->cache_arg = SLJIT_MEM;
+                               compiler->cache_argw = argw;
+                               FAIL_IF(load_immediate(compiler, DR(TMP_REG3), argw));
+                               FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | D(base), DR(base)));
+                       }
+               }
+               return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar), delay_slot);
+       }
+
+       if (compiler->cache_arg == arg && argw - compiler->cache_argw <= SIMM_MAX && argw - compiler->cache_argw >= SIMM_MIN) {
+               if (argw != compiler->cache_argw) {
+                       FAIL_IF(push_inst(compiler, ADDIU_W | S(TMP_REG3) | T(TMP_REG3) | IMM(argw - compiler->cache_argw), DR(TMP_REG3)));
+                       compiler->cache_argw = argw;
+               }
+               return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot);
+       }
+
+       if (compiler->cache_arg == SLJIT_MEM && argw - compiler->cache_argw <= SIMM_MAX && argw - compiler->cache_argw >= SIMM_MIN) {
+               if (argw != compiler->cache_argw)
+                       FAIL_IF(push_inst(compiler, ADDIU_W | S(TMP_REG3) | T(TMP_REG3) | IMM(argw - compiler->cache_argw), DR(TMP_REG3)));
+       }
+       else {
+               compiler->cache_arg = SLJIT_MEM;
+               FAIL_IF(load_immediate(compiler, DR(TMP_REG3), argw));
+       }
+       compiler->cache_argw = argw;
+
+       if (!base)
+               return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot);
+
+       if (arg == next_arg && next_argw - argw <= SIMM_MAX && next_argw - argw >= SIMM_MIN) {
+               compiler->cache_arg = arg;
+               FAIL_IF(push_inst(compiler, ADDU_W | S(TMP_REG3) | T(base) | D(TMP_REG3), DR(TMP_REG3)));
+               return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot);
+       }
+
+       FAIL_IF(push_inst(compiler, ADDU_W | S(TMP_REG3) | T(base) | DA(tmp_ar), tmp_ar));
+       return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot);
+}
+
+static SLJIT_INLINE sljit_si emit_op_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg_ar, sljit_si arg, sljit_sw argw)
+{
+       if (getput_arg_fast(compiler, flags, reg_ar, arg, argw))
+               return compiler->error;
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+       return getput_arg(compiler, flags, reg_ar, arg, argw, 0, 0);
+}
+
+static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg1, sljit_sw arg1w, sljit_si arg2, sljit_sw arg2w)
+{
+       if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
+               return compiler->error;
+       return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
+}
+
+static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       /* arg1 goes to TMP_REG1 or src reg
+          arg2 goes to TMP_REG2, imm or src reg
+          TMP_REG3 can be used for caching
+          result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */
+       sljit_si dst_r = TMP_REG2;
+       sljit_si src1_r;
+       sljit_sw src2_r = 0;
+       sljit_si sugg_src2_r = TMP_REG2;
+
+       if (!(flags & ALT_KEEP_CACHE)) {
+               compiler->cache_arg = 0;
+               compiler->cache_argw = 0;
+       }
+
+       if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
+               if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI && !(src2 & SLJIT_MEM))
+                       return SLJIT_SUCCESS;
+               if (GET_FLAGS(op))
+                       flags |= UNUSED_DEST;
+       }
+       else if (FAST_IS_REG(dst)) {
+               dst_r = dst;
+               flags |= REG_DEST;
+               if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI)
+                       sugg_src2_r = dst_r;
+       }
+       else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, DR(TMP_REG1), dst, dstw))
+               flags |= SLOW_DEST;
+
+       if (flags & IMM_OP) {
+               if ((src2 & SLJIT_IMM) && src2w) {
+                       if ((!(flags & LOGICAL_OP) && (src2w <= SIMM_MAX && src2w >= SIMM_MIN))
+                               || ((flags & LOGICAL_OP) && !(src2w & ~UIMM_MAX))) {
+                               flags |= SRC2_IMM;
+                               src2_r = src2w;
+                       }
+               }
+               if (!(flags & SRC2_IMM) && (flags & CUMULATIVE_OP) && (src1 & SLJIT_IMM) && src1w) {
+                       if ((!(flags & LOGICAL_OP) && (src1w <= SIMM_MAX && src1w >= SIMM_MIN))
+                               || ((flags & LOGICAL_OP) && !(src1w & ~UIMM_MAX))) {
+                               flags |= SRC2_IMM;
+                               src2_r = src1w;
+
+                               /* And swap arguments. */
+                               src1 = src2;
+                               src1w = src2w;
+                               src2 = SLJIT_IMM;
+                               /* src2w = src2_r unneeded. */
+                       }
+               }
+       }
+
+       /* Source 1. */
+       if (FAST_IS_REG(src1)) {
+               src1_r = src1;
+               flags |= REG1_SOURCE;
+       }
+       else if (src1 & SLJIT_IMM) {
+               if (src1w) {
+                       FAIL_IF(load_immediate(compiler, DR(TMP_REG1), src1w));
+                       src1_r = TMP_REG1;
+               }
+               else
+                       src1_r = 0;
+       }
+       else {
+               if (getput_arg_fast(compiler, flags | LOAD_DATA, DR(TMP_REG1), src1, src1w))
+                       FAIL_IF(compiler->error);
+               else
+                       flags |= SLOW_SRC1;
+               src1_r = TMP_REG1;
+       }
+
+       /* Source 2. */
+       if (FAST_IS_REG(src2)) {
+               src2_r = src2;
+               flags |= REG2_SOURCE;
+               if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_SI)
+                       dst_r = src2_r;
+       }
+       else if (src2 & SLJIT_IMM) {
+               if (!(flags & SRC2_IMM)) {
+                       if (src2w) {
+                               FAIL_IF(load_immediate(compiler, DR(sugg_src2_r), src2w));
+                               src2_r = sugg_src2_r;
+                       }
+                       else {
+                               src2_r = 0;
+                               if ((op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) && (dst & SLJIT_MEM))
+                                       dst_r = 0;
+                       }
+               }
+       }
+       else {
+               if (getput_arg_fast(compiler, flags | LOAD_DATA, DR(sugg_src2_r), src2, src2w))
+                       FAIL_IF(compiler->error);
+               else
+                       flags |= SLOW_SRC2;
+               src2_r = sugg_src2_r;
+       }
+
+       if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
+               SLJIT_ASSERT(src2_r == TMP_REG2);
+               if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
+                       FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG2), src2, src2w, src1, src1w));
+                       FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG1), src1, src1w, dst, dstw));
+               }
+               else {
+                       FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG1), src1, src1w, src2, src2w));
+                       FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG2), src2, src2w, dst, dstw));
+               }
+       }
+       else if (flags & SLOW_SRC1)
+               FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG1), src1, src1w, dst, dstw));
+       else if (flags & SLOW_SRC2)
+               FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(sugg_src2_r), src2, src2w, dst, dstw));
+
+       FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));
+
+       if (dst & SLJIT_MEM) {
+               if (!(flags & SLOW_DEST)) {
+                       getput_arg_fast(compiler, flags, DR(dst_r), dst, dstw);
+                       return compiler->error;
+               }
+               return getput_arg(compiler, flags, DR(dst_r), dst, dstw, 0, 0);
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op)
+{
+#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+       sljit_si int_op = op & SLJIT_INT_OP;
+#endif
+
+       CHECK_ERROR();
+       check_sljit_emit_op0(compiler, op);
+
+       op = GET_OPCODE(op);
+       switch (op) {
+       case SLJIT_BREAKPOINT:
+               return push_inst(compiler, BREAK, UNMOVABLE_INS);
+       case SLJIT_NOP:
+               return push_inst(compiler, NOP, UNMOVABLE_INS);
+       case SLJIT_UMUL:
+       case SLJIT_SMUL:
+#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+               FAIL_IF(push_inst(compiler, (op == SLJIT_UMUL ? DMULTU : DMULT) | S(SLJIT_SCRATCH_REG1) | T(SLJIT_SCRATCH_REG2), MOVABLE_INS));
+#else
+               FAIL_IF(push_inst(compiler, (op == SLJIT_UMUL ? MULTU : MULT) | S(SLJIT_SCRATCH_REG1) | T(SLJIT_SCRATCH_REG2), MOVABLE_INS));
+#endif
+               FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_SCRATCH_REG1), DR(SLJIT_SCRATCH_REG1)));
+               return push_inst(compiler, MFHI | D(SLJIT_SCRATCH_REG2), DR(SLJIT_SCRATCH_REG2));
+       case SLJIT_UDIV:
+       case SLJIT_SDIV:
+#if !(defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64)
+               FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
+               FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
+#endif
+
+#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+               if (int_op)
+                       FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVU : DIV) | S(SLJIT_SCRATCH_REG1) | T(SLJIT_SCRATCH_REG2), MOVABLE_INS));
+               else
+                       FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DDIVU : DDIV) | S(SLJIT_SCRATCH_REG1) | T(SLJIT_SCRATCH_REG2), MOVABLE_INS));
+#else
+               FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVU : DIV) | S(SLJIT_SCRATCH_REG1) | T(SLJIT_SCRATCH_REG2), MOVABLE_INS));
+#endif
+
+               FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_SCRATCH_REG1), DR(SLJIT_SCRATCH_REG1)));
+               return push_inst(compiler, MFHI | D(SLJIT_SCRATCH_REG2), DR(SLJIT_SCRATCH_REG2));
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+#      define flags 0
+#else
+       sljit_si flags = 0;
+#endif
+
+       CHECK_ERROR();
+       check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+       if ((op & SLJIT_INT_OP) && GET_OPCODE(op) >= SLJIT_NOT) {
+               flags |= INT_DATA | SIGNED_DATA;
+               if (src & SLJIT_IMM)
+                       srcw = (sljit_si)srcw;
+       }
+#endif
+
+       switch (GET_OPCODE(op)) {
+       case SLJIT_MOV:
+       case SLJIT_MOV_P:
+               return emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
+
+       case SLJIT_MOV_UI:
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+               return emit_op(compiler, SLJIT_MOV_UI, INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
+#else
+               return emit_op(compiler, SLJIT_MOV_UI, INT_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ui)srcw : srcw);
+#endif
+
+       case SLJIT_MOV_SI:
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+               return emit_op(compiler, SLJIT_MOV_SI, INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
+#else
+               return emit_op(compiler, SLJIT_MOV_SI, INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_si)srcw : srcw);
+#endif
+
+       case SLJIT_MOV_UB:
+               return emit_op(compiler, SLJIT_MOV_UB, BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ub)srcw : srcw);
+
+       case SLJIT_MOV_SB:
+               return emit_op(compiler, SLJIT_MOV_SB, BYTE_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sb)srcw : srcw);
+
+       case SLJIT_MOV_UH:
+               return emit_op(compiler, SLJIT_MOV_UH, HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_uh)srcw : srcw);
+
+       case SLJIT_MOV_SH:
+               return emit_op(compiler, SLJIT_MOV_SH, HALF_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sh)srcw : srcw);
+
+       case SLJIT_MOVU:
+       case SLJIT_MOVU_P:
+               return emit_op(compiler, SLJIT_MOV, WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
+
+       case SLJIT_MOVU_UI:
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+               return emit_op(compiler, SLJIT_MOV_UI, INT_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
+#else
+               return emit_op(compiler, SLJIT_MOV_UI, INT_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ui)srcw : srcw);
+#endif
+
+       case SLJIT_MOVU_SI:
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+               return emit_op(compiler, SLJIT_MOV_SI, INT_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
+#else
+               return emit_op(compiler, SLJIT_MOV_SI, INT_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_si)srcw : srcw);
+#endif
+
+       case SLJIT_MOVU_UB:
+               return emit_op(compiler, SLJIT_MOV_UB, BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ub)srcw : srcw);
+
+       case SLJIT_MOVU_SB:
+               return emit_op(compiler, SLJIT_MOV_SB, BYTE_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sb)srcw : srcw);
+
+       case SLJIT_MOVU_UH:
+               return emit_op(compiler, SLJIT_MOV_UH, HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_uh)srcw : srcw);
+
+       case SLJIT_MOVU_SH:
+               return emit_op(compiler, SLJIT_MOV_SH, HALF_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sh)srcw : srcw);
+
+       case SLJIT_NOT:
+               return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw);
+
+       case SLJIT_NEG:
+               return emit_op(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), flags | IMM_OP, dst, dstw, SLJIT_IMM, 0, src, srcw);
+
+       case SLJIT_CLZ:
+               return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw);
+       }
+
+       return SLJIT_SUCCESS;
+
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+#      undef flags
+#endif
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+#      define flags 0
+#else
+       sljit_si flags = 0;
+#endif
+
+       CHECK_ERROR();
+       check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+       ADJUST_LOCAL_OFFSET(src1, src1w);
+       ADJUST_LOCAL_OFFSET(src2, src2w);
+
+#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+       if (op & SLJIT_INT_OP) {
+               flags |= INT_DATA | SIGNED_DATA;
+               if (src1 & SLJIT_IMM)
+                       src1w = (sljit_si)src1w;
+               if (src2 & SLJIT_IMM)
+                       src2w = (sljit_si)src2w;
+       }
+#endif
+
+       switch (GET_OPCODE(op)) {
+       case SLJIT_ADD:
+       case SLJIT_ADDC:
+               return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
+
+       case SLJIT_SUB:
+       case SLJIT_SUBC:
+               return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
+
+       case SLJIT_MUL:
+               return emit_op(compiler, op, flags | CUMULATIVE_OP, dst, dstw, src1, src1w, src2, src2w);
+
+       case SLJIT_AND:
+       case SLJIT_OR:
+       case SLJIT_XOR:
+               return emit_op(compiler, op, flags | CUMULATIVE_OP | LOGICAL_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
+
+       case SLJIT_SHL:
+       case SLJIT_LSHR:
+       case SLJIT_ASHR:
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+               if (src2 & SLJIT_IMM)
+                       src2w &= 0x1f;
+#else
+               if (src2 & SLJIT_IMM) {
+                       if (op & SLJIT_INT_OP)
+                               src2w &= 0x1f;
+                       else
+                               src2w &= 0x3f;
+               }
+#endif
+               return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
+       }
+
+       return SLJIT_SUCCESS;
+
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+#      undef flags
+#endif
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
+{
+       check_sljit_get_register_index(reg);
+       return reg_map[reg];
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg)
+{
+       check_sljit_get_float_register_index(reg);
+       return reg << 1;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
+       void *instruction, sljit_si size)
+{
+       CHECK_ERROR();
+       check_sljit_emit_op_custom(compiler, instruction, size);
+       SLJIT_ASSERT(size == 4);
+
+       return push_inst(compiler, *(sljit_ins*)instruction, UNMOVABLE_INS);
+}
+
+/* --------------------------------------------------------------------- */
+/*  Floating point operators                                             */
+/* --------------------------------------------------------------------- */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
+{
+#ifdef SLJIT_IS_FPU_AVAILABLE
+       return SLJIT_IS_FPU_AVAILABLE;
+#elif defined(__GNUC__)
+       sljit_sw fir;
+       asm ("cfc1 %0, $0" : "=r"(fir));
+       return (fir >> 22) & 0x1;
+#else
+#error "FIR check is not implemented for this architecture"
+#endif
+}
+
+#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_SINGLE_OP) >> 7))
+#define FMT(op) (((op & SLJIT_SINGLE_OP) ^ SLJIT_SINGLE_OP) << (21 - 8))
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       sljit_si dst_fr;
+
+       CHECK_ERROR();
+       check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
+       SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error);
+
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+
+       if (GET_OPCODE(op) == SLJIT_CMPD) {
+               if (dst & SLJIT_MEM) {
+                       FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, dst, dstw, src, srcw));
+                       dst = TMP_FREG1;
+               }
+               else
+                       dst <<= 1;
+
+               if (src & SLJIT_MEM) {
+                       FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src, srcw, 0, 0));
+                       src = TMP_FREG2;
+               }
+               else
+                       src <<= 1;
+
+               /* src and dst are swapped. */
+               if (op & SLJIT_SET_E) {
+                       FAIL_IF(push_inst(compiler, C_UEQ_fmt | FMT(op) | FT(src) | FS(dst), UNMOVABLE_INS));
+                       FAIL_IF(push_inst(compiler, CFC1 | TA(EQUAL_FLAG) | DA(FCSR_REG), EQUAL_FLAG));
+                       FAIL_IF(push_inst(compiler, SRL | TA(EQUAL_FLAG) | DA(EQUAL_FLAG) | SH_IMM(23), EQUAL_FLAG));
+                       FAIL_IF(push_inst(compiler, ANDI | SA(EQUAL_FLAG) | TA(EQUAL_FLAG) | IMM(1), EQUAL_FLAG));
+               }
+               if (op & SLJIT_SET_S) {
+                       /* Mixing the instructions for the two checks. */
+                       FAIL_IF(push_inst(compiler, C_ULT_fmt | FMT(op) | FT(src) | FS(dst), UNMOVABLE_INS));
+                       FAIL_IF(push_inst(compiler, CFC1 | TA(ULESS_FLAG) | DA(FCSR_REG), ULESS_FLAG));
+                       FAIL_IF(push_inst(compiler, C_ULT_fmt | FMT(op) | FT(dst) | FS(src), UNMOVABLE_INS));
+                       FAIL_IF(push_inst(compiler, SRL | TA(ULESS_FLAG) | DA(ULESS_FLAG) | SH_IMM(23), ULESS_FLAG));
+                       FAIL_IF(push_inst(compiler, ANDI | SA(ULESS_FLAG) | TA(ULESS_FLAG) | IMM(1), ULESS_FLAG));
+                       FAIL_IF(push_inst(compiler, CFC1 | TA(UGREATER_FLAG) | DA(FCSR_REG), UGREATER_FLAG));
+                       FAIL_IF(push_inst(compiler, SRL | TA(UGREATER_FLAG) | DA(UGREATER_FLAG) | SH_IMM(23), UGREATER_FLAG));
+                       FAIL_IF(push_inst(compiler, ANDI | SA(UGREATER_FLAG) | TA(UGREATER_FLAG) | IMM(1), UGREATER_FLAG));
+               }
+               return push_inst(compiler, C_UN_fmt | FMT(op) | FT(src) | FS(dst), FCSR_FCC);
+       }
+
+       dst_fr = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1;
+
+       if (src & SLJIT_MEM) {
+               FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_fr, src, srcw, dst, dstw));
+               src = dst_fr;
+       }
+       else
+               src <<= 1;
+
+       switch (GET_OPCODE(op)) {
+               case SLJIT_MOVD:
+                       if (src != dst_fr && dst_fr != TMP_FREG1)
+                               FAIL_IF(push_inst(compiler, MOV_fmt | FMT(op) | FS(src) | FD(dst_fr), MOVABLE_INS));
+                       break;
+               case SLJIT_NEGD:
+                       FAIL_IF(push_inst(compiler, NEG_fmt | FMT(op) | FS(src) | FD(dst_fr), MOVABLE_INS));
+                       break;
+               case SLJIT_ABSD:
+                       FAIL_IF(push_inst(compiler, ABS_fmt | FMT(op) | FS(src) | FD(dst_fr), MOVABLE_INS));
+                       break;
+       }
+
+       if (dst_fr == TMP_FREG1) {
+               if (GET_OPCODE(op) == SLJIT_MOVD)
+                       dst_fr = src;
+               FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), dst_fr, dst, dstw, 0, 0));
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       sljit_si dst_fr, flags = 0;
+
+       CHECK_ERROR();
+       check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
+
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+
+       dst_fr = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG2;
+
+       if (src1 & SLJIT_MEM) {
+               if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) {
+                       FAIL_IF(compiler->error);
+                       src1 = TMP_FREG1;
+               } else
+                       flags |= SLOW_SRC1;
+       }
+       else
+               src1 <<= 1;
+
+       if (src2 & SLJIT_MEM) {
+               if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) {
+                       FAIL_IF(compiler->error);
+                       src2 = TMP_FREG2;
+               } else
+                       flags |= SLOW_SRC2;
+       }
+       else
+               src2 <<= 1;
+
+       if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
+               if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
+                       FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w));
+                       FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
+               }
+               else {
+                       FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
+                       FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
+               }
+       }
+       else if (flags & SLOW_SRC1)
+               FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
+       else if (flags & SLOW_SRC2)
+               FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
+
+       if (flags & SLOW_SRC1)
+               src1 = TMP_FREG1;
+       if (flags & SLOW_SRC2)
+               src2 = TMP_FREG2;
+
+       switch (GET_OPCODE(op)) {
+       case SLJIT_ADDD:
+               FAIL_IF(push_inst(compiler, ADD_fmt | FMT(op) | FT(src2) | FS(src1) | FD(dst_fr), MOVABLE_INS));
+               break;
+
+       case SLJIT_SUBD:
+               FAIL_IF(push_inst(compiler, SUB_fmt | FMT(op) | FT(src2) | FS(src1) | FD(dst_fr), MOVABLE_INS));
+               break;
+
+       case SLJIT_MULD:
+               FAIL_IF(push_inst(compiler, MUL_fmt | FMT(op) | FT(src2) | FS(src1) | FD(dst_fr), MOVABLE_INS));
+               break;
+
+       case SLJIT_DIVD:
+               FAIL_IF(push_inst(compiler, DIV_fmt | FMT(op) | FT(src2) | FS(src1) | FD(dst_fr), MOVABLE_INS));
+               break;
+       }
+
+       if (dst_fr == TMP_FREG2)
+               FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0));
+
+       return SLJIT_SUCCESS;
+}
+
+/* --------------------------------------------------------------------- */
+/*  Other instructions                                                   */
+/* --------------------------------------------------------------------- */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw)
+{
+       CHECK_ERROR();
+       check_sljit_emit_fast_enter(compiler, dst, dstw);
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+
+       /* For UNUSED dst. Uncommon, but possible. */
+       if (dst == SLJIT_UNUSED)
+               return SLJIT_SUCCESS;
+
+       if (FAST_IS_REG(dst))
+               return push_inst(compiler, ADDU_W | SA(RETURN_ADDR_REG) | TA(0) | D(dst), DR(dst));
+
+       /* Memory. */
+       return emit_op_mem(compiler, WORD_DATA, RETURN_ADDR_REG, dst, dstw);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw)
+{
+       CHECK_ERROR();
+       check_sljit_emit_fast_return(compiler, src, srcw);
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       if (FAST_IS_REG(src))
+               FAIL_IF(push_inst(compiler, ADDU_W | S(src) | TA(0) | DA(RETURN_ADDR_REG), RETURN_ADDR_REG));
+       else if (src & SLJIT_MEM)
+               FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RETURN_ADDR_REG, src, srcw));
+       else if (src & SLJIT_IMM)
+               FAIL_IF(load_immediate(compiler, RETURN_ADDR_REG, srcw));
+
+       FAIL_IF(push_inst(compiler, JR | SA(RETURN_ADDR_REG), UNMOVABLE_INS));
+       return push_inst(compiler, NOP, UNMOVABLE_INS);
+}
+
+/* --------------------------------------------------------------------- */
+/*  Conditional instructions                                             */
+/* --------------------------------------------------------------------- */
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
+{
+       struct sljit_label *label;
+
+       CHECK_ERROR_PTR();
+       check_sljit_emit_label(compiler);
+
+       if (compiler->last_label && compiler->last_label->size == compiler->size)
+               return compiler->last_label;
+
+       label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
+       PTR_FAIL_IF(!label);
+       set_label(label, compiler);
+       compiler->delay_slot = UNMOVABLE_INS;
+       return label;
+}
+
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+#define JUMP_LENGTH    4
+#else
+#define JUMP_LENGTH    8
+#endif
+
+#define BR_Z(src) \
+       inst = BEQ | SA(src) | TA(0) | JUMP_LENGTH; \
+       flags = IS_BIT26_COND; \
+       delay_check = src;
+
+#define BR_NZ(src) \
+       inst = BNE | SA(src) | TA(0) | JUMP_LENGTH; \
+       flags = IS_BIT26_COND; \
+       delay_check = src;
+
+#define BR_T() \
+       inst = BC1T | JUMP_LENGTH; \
+       flags = IS_BIT16_COND; \
+       delay_check = FCSR_FCC;
+
+#define BR_F() \
+       inst = BC1F | JUMP_LENGTH; \
+       flags = IS_BIT16_COND; \
+       delay_check = FCSR_FCC;
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type)
+{
+       struct sljit_jump *jump;
+       sljit_ins inst;
+       sljit_si flags = 0;
+       sljit_si delay_check = UNMOVABLE_INS;
+
+       CHECK_ERROR_PTR();
+       check_sljit_emit_jump(compiler, type);
+
+       jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
+       PTR_FAIL_IF(!jump);
+       set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
+       type &= 0xff;
+
+       switch (type) {
+       case SLJIT_C_EQUAL:
+       case SLJIT_C_FLOAT_NOT_EQUAL:
+               BR_NZ(EQUAL_FLAG);
+               break;
+       case SLJIT_C_NOT_EQUAL:
+       case SLJIT_C_FLOAT_EQUAL:
+               BR_Z(EQUAL_FLAG);
+               break;
+       case SLJIT_C_LESS:
+       case SLJIT_C_FLOAT_LESS:
+               BR_Z(ULESS_FLAG);
+               break;
+       case SLJIT_C_GREATER_EQUAL:
+       case SLJIT_C_FLOAT_GREATER_EQUAL:
+               BR_NZ(ULESS_FLAG);
+               break;
+       case SLJIT_C_GREATER:
+       case SLJIT_C_FLOAT_GREATER:
+               BR_Z(UGREATER_FLAG);
+               break;
+       case SLJIT_C_LESS_EQUAL:
+       case SLJIT_C_FLOAT_LESS_EQUAL:
+               BR_NZ(UGREATER_FLAG);
+               break;
+       case SLJIT_C_SIG_LESS:
+               BR_Z(LESS_FLAG);
+               break;
+       case SLJIT_C_SIG_GREATER_EQUAL:
+               BR_NZ(LESS_FLAG);
+               break;
+       case SLJIT_C_SIG_GREATER:
+               BR_Z(GREATER_FLAG);
+               break;
+       case SLJIT_C_SIG_LESS_EQUAL:
+               BR_NZ(GREATER_FLAG);
+               break;
+       case SLJIT_C_OVERFLOW:
+       case SLJIT_C_MUL_OVERFLOW:
+               BR_Z(OVERFLOW_FLAG);
+               break;
+       case SLJIT_C_NOT_OVERFLOW:
+       case SLJIT_C_MUL_NOT_OVERFLOW:
+               BR_NZ(OVERFLOW_FLAG);
+               break;
+       case SLJIT_C_FLOAT_UNORDERED:
+               BR_F();
+               break;
+       case SLJIT_C_FLOAT_ORDERED:
+               BR_T();
+               break;
+       default:
+               /* Not conditional branch. */
+               inst = 0;
+               break;
+       }
+
+       jump->flags |= flags;
+       if (compiler->delay_slot == MOVABLE_INS || (compiler->delay_slot != UNMOVABLE_INS && compiler->delay_slot != delay_check))
+               jump->flags |= IS_MOVABLE;
+
+       if (inst)
+               PTR_FAIL_IF(push_inst(compiler, inst, UNMOVABLE_INS));
+
+       PTR_FAIL_IF(emit_const(compiler, TMP_REG2, 0));
+       if (type <= SLJIT_JUMP) {
+               PTR_FAIL_IF(push_inst(compiler, JR | S(TMP_REG2), UNMOVABLE_INS));
+               jump->addr = compiler->size;
+               PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
+       } else {
+               SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2);
+               /* Cannot be optimized out if type is >= CALL0. */
+               jump->flags |= IS_JAL | (type >= SLJIT_CALL0 ? IS_CALL : 0);
+               PTR_FAIL_IF(push_inst(compiler, JALR | S(TMP_REG2) | DA(RETURN_ADDR_REG), UNMOVABLE_INS));
+               jump->addr = compiler->size;
+               /* A NOP if type < CALL1. */
+               PTR_FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SCRATCH_REG1) | TA(0) | DA(4), UNMOVABLE_INS));
+       }
+       return jump;
+}
+
+#define RESOLVE_IMM1() \
+       if (src1 & SLJIT_IMM) { \
+               if (src1w) { \
+                       PTR_FAIL_IF(load_immediate(compiler, DR(TMP_REG1), src1w)); \
+                       src1 = TMP_REG1; \
+               } \
+               else \
+                       src1 = 0; \
+       }
+
+#define RESOLVE_IMM2() \
+       if (src2 & SLJIT_IMM) { \
+               if (src2w) { \
+                       PTR_FAIL_IF(load_immediate(compiler, DR(TMP_REG2), src2w)); \
+                       src2 = TMP_REG2; \
+               } \
+               else \
+                       src2 = 0; \
+       }
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_si type,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       struct sljit_jump *jump;
+       sljit_si flags;
+       sljit_ins inst;
+
+       CHECK_ERROR_PTR();
+       check_sljit_emit_cmp(compiler, type, src1, src1w, src2, src2w);
+       ADJUST_LOCAL_OFFSET(src1, src1w);
+       ADJUST_LOCAL_OFFSET(src2, src2w);
+
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+       flags = ((type & SLJIT_INT_OP) ? INT_DATA : WORD_DATA) | LOAD_DATA;
+       if (src1 & SLJIT_MEM) {
+               PTR_FAIL_IF(emit_op_mem2(compiler, flags, DR(TMP_REG1), src1, src1w, src2, src2w));
+               src1 = TMP_REG1;
+       }
+       if (src2 & SLJIT_MEM) {
+               PTR_FAIL_IF(emit_op_mem2(compiler, flags, DR(TMP_REG2), src2, src2w, 0, 0));
+               src2 = TMP_REG2;
+       }
+
+       jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
+       PTR_FAIL_IF(!jump);
+       set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
+       type &= 0xff;
+
+       if (type <= SLJIT_C_NOT_EQUAL) {
+               RESOLVE_IMM1();
+               RESOLVE_IMM2();
+               jump->flags |= IS_BIT26_COND;
+               if (compiler->delay_slot == MOVABLE_INS || (compiler->delay_slot != UNMOVABLE_INS && compiler->delay_slot != DR(src1) && compiler->delay_slot != DR(src2)))
+                       jump->flags |= IS_MOVABLE;
+               PTR_FAIL_IF(push_inst(compiler, (type == SLJIT_C_EQUAL ? BNE : BEQ) | S(src1) | T(src2) | JUMP_LENGTH, UNMOVABLE_INS));
+       }
+       else if (type >= SLJIT_C_SIG_LESS && (((src1 & SLJIT_IMM) && (src1w == 0)) || ((src2 & SLJIT_IMM) && (src2w == 0)))) {
+               inst = NOP;
+               if ((src1 & SLJIT_IMM) && (src1w == 0)) {
+                       RESOLVE_IMM2();
+                       switch (type) {
+                       case SLJIT_C_SIG_LESS:
+                               inst = BLEZ;
+                               jump->flags |= IS_BIT26_COND;
+                               break;
+                       case SLJIT_C_SIG_GREATER_EQUAL:
+                               inst = BGTZ;
+                               jump->flags |= IS_BIT26_COND;
+                               break;
+                       case SLJIT_C_SIG_GREATER:
+                               inst = BGEZ;
+                               jump->flags |= IS_BIT16_COND;
+                               break;
+                       case SLJIT_C_SIG_LESS_EQUAL:
+                               inst = BLTZ;
+                               jump->flags |= IS_BIT16_COND;
+                               break;
+                       }
+                       src1 = src2;
+               }
+               else {
+                       RESOLVE_IMM1();
+                       switch (type) {
+                       case SLJIT_C_SIG_LESS:
+                               inst = BGEZ;
+                               jump->flags |= IS_BIT16_COND;
+                               break;
+                       case SLJIT_C_SIG_GREATER_EQUAL:
+                               inst = BLTZ;
+                               jump->flags |= IS_BIT16_COND;
+                               break;
+                       case SLJIT_C_SIG_GREATER:
+                               inst = BLEZ;
+                               jump->flags |= IS_BIT26_COND;
+                               break;
+                       case SLJIT_C_SIG_LESS_EQUAL:
+                               inst = BGTZ;
+                               jump->flags |= IS_BIT26_COND;
+                               break;
+                       }
+               }
+               PTR_FAIL_IF(push_inst(compiler, inst | S(src1) | JUMP_LENGTH, UNMOVABLE_INS));
+       }
+       else {
+               if (type == SLJIT_C_LESS || type == SLJIT_C_GREATER_EQUAL || type == SLJIT_C_SIG_LESS || type == SLJIT_C_SIG_GREATER_EQUAL) {
+                       RESOLVE_IMM1();
+                       if ((src2 & SLJIT_IMM) && src2w <= SIMM_MAX && src2w >= SIMM_MIN)
+                               PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_C_LESS_EQUAL ? SLTIU : SLTI) | S(src1) | T(TMP_REG1) | IMM(src2w), DR(TMP_REG1)));
+                       else {
+                               RESOLVE_IMM2();
+                               PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_C_LESS_EQUAL ? SLTU : SLT) | S(src1) | T(src2) | D(TMP_REG1), DR(TMP_REG1)));
+                       }
+                       type = (type == SLJIT_C_LESS || type == SLJIT_C_SIG_LESS) ? SLJIT_C_NOT_EQUAL : SLJIT_C_EQUAL;
+               }
+               else {
+                       RESOLVE_IMM2();
+                       if ((src1 & SLJIT_IMM) && src1w <= SIMM_MAX && src1w >= SIMM_MIN)
+                               PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_C_LESS_EQUAL ? SLTIU : SLTI) | S(src2) | T(TMP_REG1) | IMM(src1w), DR(TMP_REG1)));
+                       else {
+                               RESOLVE_IMM1();
+                               PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_C_LESS_EQUAL ? SLTU : SLT) | S(src2) | T(src1) | D(TMP_REG1), DR(TMP_REG1)));
+                       }
+                       type = (type == SLJIT_C_GREATER || type == SLJIT_C_SIG_GREATER) ? SLJIT_C_NOT_EQUAL : SLJIT_C_EQUAL;
+               }
+
+               jump->flags |= IS_BIT26_COND;
+               PTR_FAIL_IF(push_inst(compiler, (type == SLJIT_C_EQUAL ? BNE : BEQ) | S(TMP_REG1) | TA(0) | JUMP_LENGTH, UNMOVABLE_INS));
+       }
+
+       PTR_FAIL_IF(emit_const(compiler, TMP_REG2, 0));
+       PTR_FAIL_IF(push_inst(compiler, JR | S(TMP_REG2), UNMOVABLE_INS));
+       jump->addr = compiler->size;
+       PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
+       return jump;
+}
+
+#undef RESOLVE_IMM1
+#undef RESOLVE_IMM2
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_si type,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       struct sljit_jump *jump;
+       sljit_ins inst;
+       sljit_si if_true;
+
+       CHECK_ERROR_PTR();
+       check_sljit_emit_fcmp(compiler, type, src1, src1w, src2, src2w);
+
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+
+       if (src1 & SLJIT_MEM) {
+               PTR_FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(type) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
+               src1 = TMP_FREG1;
+       }
+       else
+               src1 <<= 1;
+
+       if (src2 & SLJIT_MEM) {
+               PTR_FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(type) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0));
+               src2 = TMP_FREG2;
+       }
+       else
+               src2 <<= 1;
+
+       jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
+       PTR_FAIL_IF(!jump);
+       set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
+       jump->flags |= IS_BIT16_COND;
+
+       switch (type & 0xff) {
+       case SLJIT_C_FLOAT_EQUAL:
+               inst = C_UEQ_fmt;
+               if_true = 1;
+               break;
+       case SLJIT_C_FLOAT_NOT_EQUAL:
+               inst = C_UEQ_fmt;
+               if_true = 0;
+               break;
+       case SLJIT_C_FLOAT_LESS:
+               inst = C_ULT_fmt;
+               if_true = 1;
+               break;
+       case SLJIT_C_FLOAT_GREATER_EQUAL:
+               inst = C_ULT_fmt;
+               if_true = 0;
+               break;
+       case SLJIT_C_FLOAT_GREATER:
+               inst = C_ULE_fmt;
+               if_true = 0;
+               break;
+       case SLJIT_C_FLOAT_LESS_EQUAL:
+               inst = C_ULE_fmt;
+               if_true = 1;
+               break;
+       case SLJIT_C_FLOAT_UNORDERED:
+               inst = C_UN_fmt;
+               if_true = 1;
+               break;
+       case SLJIT_C_FLOAT_ORDERED:
+       default: /* Make compilers happy. */
+               inst = C_UN_fmt;
+               if_true = 0;
+               break;
+       }
+
+       PTR_FAIL_IF(push_inst(compiler, inst | FMT(type) | FT(src2) | FS(src1), UNMOVABLE_INS));
+       /* Intentionally the other opcode. */
+       PTR_FAIL_IF(push_inst(compiler, (if_true ? BC1F : BC1T) | JUMP_LENGTH, UNMOVABLE_INS));
+       PTR_FAIL_IF(emit_const(compiler, TMP_REG2, 0));
+       PTR_FAIL_IF(push_inst(compiler, JR | S(TMP_REG2), UNMOVABLE_INS));
+       jump->addr = compiler->size;
+       PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
+       return jump;
+}
+
+#undef JUMP_LENGTH
+#undef BR_Z
+#undef BR_NZ
+#undef BR_T
+#undef BR_F
+
+#undef FLOAT_DATA
+#undef FMT
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw)
+{
+       sljit_si src_r = TMP_REG2;
+       struct sljit_jump *jump = NULL;
+
+       CHECK_ERROR();
+       check_sljit_emit_ijump(compiler, type, src, srcw);
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       if (FAST_IS_REG(src)) {
+               if (DR(src) != 4)
+                       src_r = src;
+               else
+                       FAIL_IF(push_inst(compiler, ADDU_W | S(src) | TA(0) | D(TMP_REG2), DR(TMP_REG2)));
+       }
+
+       if (type >= SLJIT_CALL0) {
+               SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2);
+               if (src & (SLJIT_IMM | SLJIT_MEM)) {
+                       if (src & SLJIT_IMM)
+                               FAIL_IF(load_immediate(compiler, DR(PIC_ADDR_REG), srcw));
+                       else {
+                               SLJIT_ASSERT(src_r == TMP_REG2 && (src & SLJIT_MEM));
+                               FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw));
+                       }
+                       FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS));
+                       /* We need an extra instruction in any case. */
+                       return push_inst(compiler, ADDU_W | S(SLJIT_SCRATCH_REG1) | TA(0) | DA(4), UNMOVABLE_INS);
+               }
+
+               /* Register input. */
+               if (type >= SLJIT_CALL1)
+                       FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SCRATCH_REG1) | TA(0) | DA(4), 4));
+               FAIL_IF(push_inst(compiler, JALR | S(src_r) | DA(RETURN_ADDR_REG), UNMOVABLE_INS));
+               return push_inst(compiler, ADDU_W | S(src_r) | TA(0) | D(PIC_ADDR_REG), UNMOVABLE_INS);
+       }
+
+       if (src & SLJIT_IMM) {
+               jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
+               FAIL_IF(!jump);
+               set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_JAL : 0));
+               jump->u.target = srcw;
+
+               if (compiler->delay_slot != UNMOVABLE_INS)
+                       jump->flags |= IS_MOVABLE;
+
+               FAIL_IF(emit_const(compiler, TMP_REG2, 0));
+       }
+       else if (src & SLJIT_MEM)
+               FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw));
+
+       FAIL_IF(push_inst(compiler, JR | S(src_r), UNMOVABLE_INS));
+       if (jump)
+               jump->addr = compiler->size;
+       FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw,
+       sljit_si type)
+{
+       sljit_si sugg_dst_ar, dst_ar;
+       sljit_si flags = GET_ALL_FLAGS(op);
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+#      define mem_type WORD_DATA
+#else
+       sljit_si mem_type = (op & SLJIT_INT_OP) ? (INT_DATA | SIGNED_DATA) : WORD_DATA;
+#endif
+
+       CHECK_ERROR();
+       check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type);
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+
+       if (dst == SLJIT_UNUSED)
+               return SLJIT_SUCCESS;
+
+       op = GET_OPCODE(op);
+#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+       if (op == SLJIT_MOV_SI || op == SLJIT_MOV_UI)
+               mem_type = INT_DATA | SIGNED_DATA;
+#endif
+       sugg_dst_ar = DR((op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2);
+
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+       if (op >= SLJIT_ADD && (src & SLJIT_MEM)) {
+               ADJUST_LOCAL_OFFSET(src, srcw);
+               FAIL_IF(emit_op_mem2(compiler, mem_type | LOAD_DATA, DR(TMP_REG1), src, srcw, dst, dstw));
+               src = TMP_REG1;
+               srcw = 0;
+       }
+
+       switch (type) {
+       case SLJIT_C_EQUAL:
+       case SLJIT_C_NOT_EQUAL:
+               FAIL_IF(push_inst(compiler, SLTIU | SA(EQUAL_FLAG) | TA(sugg_dst_ar) | IMM(1), sugg_dst_ar));
+               dst_ar = sugg_dst_ar;
+               break;
+       case SLJIT_C_LESS:
+       case SLJIT_C_GREATER_EQUAL:
+       case SLJIT_C_FLOAT_LESS:
+       case SLJIT_C_FLOAT_GREATER_EQUAL:
+               dst_ar = ULESS_FLAG;
+               break;
+       case SLJIT_C_GREATER:
+       case SLJIT_C_LESS_EQUAL:
+       case SLJIT_C_FLOAT_GREATER:
+       case SLJIT_C_FLOAT_LESS_EQUAL:
+               dst_ar = UGREATER_FLAG;
+               break;
+       case SLJIT_C_SIG_LESS:
+       case SLJIT_C_SIG_GREATER_EQUAL:
+               dst_ar = LESS_FLAG;
+               break;
+       case SLJIT_C_SIG_GREATER:
+       case SLJIT_C_SIG_LESS_EQUAL:
+               dst_ar = GREATER_FLAG;
+               break;
+       case SLJIT_C_OVERFLOW:
+       case SLJIT_C_NOT_OVERFLOW:
+               dst_ar = OVERFLOW_FLAG;
+               break;
+       case SLJIT_C_MUL_OVERFLOW:
+       case SLJIT_C_MUL_NOT_OVERFLOW:
+               FAIL_IF(push_inst(compiler, SLTIU | SA(OVERFLOW_FLAG) | TA(sugg_dst_ar) | IMM(1), sugg_dst_ar));
+               dst_ar = sugg_dst_ar;
+               type ^= 0x1; /* Flip type bit for the XORI below. */
+               break;
+       case SLJIT_C_FLOAT_EQUAL:
+       case SLJIT_C_FLOAT_NOT_EQUAL:
+               dst_ar = EQUAL_FLAG;
+               break;
+
+       case SLJIT_C_FLOAT_UNORDERED:
+       case SLJIT_C_FLOAT_ORDERED:
+               FAIL_IF(push_inst(compiler, CFC1 | TA(sugg_dst_ar) | DA(FCSR_REG), sugg_dst_ar));
+               FAIL_IF(push_inst(compiler, SRL | TA(sugg_dst_ar) | DA(sugg_dst_ar) | SH_IMM(23), sugg_dst_ar));
+               FAIL_IF(push_inst(compiler, ANDI | SA(sugg_dst_ar) | TA(sugg_dst_ar) | IMM(1), sugg_dst_ar));
+               dst_ar = sugg_dst_ar;
+               break;
+
+       default:
+               SLJIT_ASSERT_STOP();
+               dst_ar = sugg_dst_ar;
+               break;
+       }
+
+       if (type & 0x1) {
+               FAIL_IF(push_inst(compiler, XORI | SA(dst_ar) | TA(sugg_dst_ar) | IMM(1), sugg_dst_ar));
+               dst_ar = sugg_dst_ar;
+       }
+
+       if (op >= SLJIT_ADD) {
+               if (DR(TMP_REG2) != dst_ar)
+                       FAIL_IF(push_inst(compiler, ADDU_W | SA(dst_ar) | TA(0) | D(TMP_REG2), DR(TMP_REG2)));
+               return emit_op(compiler, op | flags, mem_type | CUMULATIVE_OP | LOGICAL_OP | IMM_OP | ALT_KEEP_CACHE, dst, dstw, src, srcw, TMP_REG2, 0);
+       }
+
+       if (dst & SLJIT_MEM)
+               return emit_op_mem(compiler, mem_type, dst_ar, dst, dstw);
+
+       if (sugg_dst_ar != dst_ar)
+               return push_inst(compiler, ADDU_W | SA(dst_ar) | TA(0) | DA(sugg_dst_ar), sugg_dst_ar);
+       return SLJIT_SUCCESS;
+
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+#      undef mem_type
+#endif
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value)
+{
+       struct sljit_const *const_;
+       sljit_si reg;
+
+       CHECK_ERROR_PTR();
+       check_sljit_emit_const(compiler, dst, dstw, init_value);
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+
+       const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
+       PTR_FAIL_IF(!const_);
+       set_const(const_, compiler);
+
+       reg = SLOW_IS_REG(dst) ? dst : TMP_REG2;
+
+       PTR_FAIL_IF(emit_const(compiler, reg, init_value));
+
+       if (dst & SLJIT_MEM)
+               PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0));
+       return const_;
+}
diff --git a/ext/pcre/pcrelib/sljit/sljitNativePPC_32.c b/ext/pcre/pcrelib/sljit/sljitNativePPC_32.c
new file mode 100644 (file)
index 0000000..b14b75c
--- /dev/null
@@ -0,0 +1,269 @@
+/*
+ *    Stack-less Just-In-Time compiler
+ *
+ *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice, this list of
+ *      conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *      of conditions and the following disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* ppc 32-bit arch dependent functions. */
+
+static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si reg, sljit_sw imm)
+{
+       if (imm <= SIMM_MAX && imm >= SIMM_MIN)
+               return push_inst(compiler, ADDI | D(reg) | A(0) | IMM(imm));
+
+       if (!(imm & ~0xffff))
+               return push_inst(compiler, ORI | S(TMP_ZERO) | A(reg) | IMM(imm));
+
+       FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(imm >> 16)));
+       return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm)) : SLJIT_SUCCESS;
+}
+
+#define INS_CLEAR_LEFT(dst, src, from) \
+       (RLWINM | S(src) | A(dst) | ((from) << 6) | (31 << 1))
+
+static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags,
+       sljit_si dst, sljit_si src1, sljit_si src2)
+{
+       switch (op) {
+       case SLJIT_MOV:
+       case SLJIT_MOV_UI:
+       case SLJIT_MOV_SI:
+       case SLJIT_MOV_P:
+               SLJIT_ASSERT(src1 == TMP_REG1);
+               if (dst != src2)
+                       return push_inst(compiler, OR | S(src2) | A(dst) | B(src2));
+               return SLJIT_SUCCESS;
+
+       case SLJIT_MOV_UB:
+       case SLJIT_MOV_SB:
+               SLJIT_ASSERT(src1 == TMP_REG1);
+               if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
+                       if (op == SLJIT_MOV_SB)
+                               return push_inst(compiler, EXTSB | S(src2) | A(dst));
+                       return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 24));
+               }
+               else if ((flags & REG_DEST) && op == SLJIT_MOV_SB)
+                       return push_inst(compiler, EXTSB | S(src2) | A(dst));
+               else {
+                       SLJIT_ASSERT(dst == src2);
+               }
+               return SLJIT_SUCCESS;
+
+       case SLJIT_MOV_UH:
+       case SLJIT_MOV_SH:
+               SLJIT_ASSERT(src1 == TMP_REG1);
+               if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
+                       if (op == SLJIT_MOV_SH)
+                               return push_inst(compiler, EXTSH | S(src2) | A(dst));
+                       return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 16));
+               }
+               else {
+                       SLJIT_ASSERT(dst == src2);
+               }
+               return SLJIT_SUCCESS;
+
+       case SLJIT_NOT:
+               SLJIT_ASSERT(src1 == TMP_REG1);
+               return push_inst(compiler, NOR | RC(flags) | S(src2) | A(dst) | B(src2));
+
+       case SLJIT_NEG:
+               SLJIT_ASSERT(src1 == TMP_REG1);
+               return push_inst(compiler, NEG | OERC(flags) | D(dst) | A(src2));
+
+       case SLJIT_CLZ:
+               SLJIT_ASSERT(src1 == TMP_REG1);
+               return push_inst(compiler, CNTLZW | RC(flags) | S(src2) | A(dst));
+
+       case SLJIT_ADD:
+               if (flags & ALT_FORM1) {
+                       /* Flags does not set: BIN_IMM_EXTS unnecessary. */
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       return push_inst(compiler, ADDI | D(dst) | A(src1) | compiler->imm);
+               }
+               if (flags & ALT_FORM2) {
+                       /* Flags does not set: BIN_IMM_EXTS unnecessary. */
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       return push_inst(compiler, ADDIS | D(dst) | A(src1) | compiler->imm);
+               }
+               if (flags & ALT_FORM3) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       return push_inst(compiler, ADDIC | D(dst) | A(src1) | compiler->imm);
+               }
+               if (flags & ALT_FORM4) {
+                       /* Flags does not set: BIN_IMM_EXTS unnecessary. */
+                       FAIL_IF(push_inst(compiler, ADDI | D(dst) | A(src1) | (compiler->imm & 0xffff)));
+                       return push_inst(compiler, ADDIS | D(dst) | A(dst) | (((compiler->imm >> 16) & 0xffff) + ((compiler->imm >> 15) & 0x1)));
+               }
+               if (!(flags & ALT_SET_FLAGS))
+                       return push_inst(compiler, ADD | D(dst) | A(src1) | B(src2));
+               return push_inst(compiler, ADDC | OERC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2));
+
+       case SLJIT_ADDC:
+               if (flags & ALT_FORM1) {
+                       FAIL_IF(push_inst(compiler, MFXER | D(0)));
+                       FAIL_IF(push_inst(compiler, ADDE | D(dst) | A(src1) | B(src2)));
+                       return push_inst(compiler, MTXER | S(0));
+               }
+               return push_inst(compiler, ADDE | D(dst) | A(src1) | B(src2));
+
+       case SLJIT_SUB:
+               if (flags & ALT_FORM1) {
+                       /* Flags does not set: BIN_IMM_EXTS unnecessary. */
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       return push_inst(compiler, SUBFIC | D(dst) | A(src1) | compiler->imm);
+               }
+               if (flags & (ALT_FORM2 | ALT_FORM3)) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       if (flags & ALT_FORM2)
+                               FAIL_IF(push_inst(compiler, CMPI | CRD(0) | A(src1) | compiler->imm));
+                       if (flags & ALT_FORM3)
+                               return push_inst(compiler, CMPLI | CRD(4) | A(src1) | compiler->imm);
+                       return SLJIT_SUCCESS;
+               }
+               if (flags & (ALT_FORM4 | ALT_FORM5)) {
+                       if (flags & ALT_FORM4)
+                               FAIL_IF(push_inst(compiler, CMPL | CRD(4) | A(src1) | B(src2)));
+                       if (flags & ALT_FORM5)
+                               FAIL_IF(push_inst(compiler, CMP | CRD(0) | A(src1) | B(src2)));
+                       return SLJIT_SUCCESS;
+               }
+               if (!(flags & ALT_SET_FLAGS))
+                       return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1));
+               if (flags & ALT_FORM6)
+                       FAIL_IF(push_inst(compiler, CMPL | CRD(4) | A(src1) | B(src2)));
+               return push_inst(compiler, SUBFC | OERC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1));
+
+       case SLJIT_SUBC:
+               if (flags & ALT_FORM1) {
+                       FAIL_IF(push_inst(compiler, MFXER | D(0)));
+                       FAIL_IF(push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1)));
+                       return push_inst(compiler, MTXER | S(0));
+               }
+               return push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1));
+
+       case SLJIT_MUL:
+               if (flags & ALT_FORM1) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       return push_inst(compiler, MULLI | D(dst) | A(src1) | compiler->imm);
+               }
+               return push_inst(compiler, MULLW | OERC(flags) | D(dst) | A(src2) | B(src1));
+
+       case SLJIT_AND:
+               if (flags & ALT_FORM1) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       return push_inst(compiler, ANDI | S(src1) | A(dst) | compiler->imm);
+               }
+               if (flags & ALT_FORM2) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       return push_inst(compiler, ANDIS | S(src1) | A(dst) | compiler->imm);
+               }
+               return push_inst(compiler, AND | RC(flags) | S(src1) | A(dst) | B(src2));
+
+       case SLJIT_OR:
+               if (flags & ALT_FORM1) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       return push_inst(compiler, ORI | S(src1) | A(dst) | compiler->imm);
+               }
+               if (flags & ALT_FORM2) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       return push_inst(compiler, ORIS | S(src1) | A(dst) | compiler->imm);
+               }
+               if (flags & ALT_FORM3) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       FAIL_IF(push_inst(compiler, ORI | S(src1) | A(dst) | IMM(compiler->imm)));
+                       return push_inst(compiler, ORIS | S(dst) | A(dst) | IMM(compiler->imm >> 16));
+               }
+               return push_inst(compiler, OR | RC(flags) | S(src1) | A(dst) | B(src2));
+
+       case SLJIT_XOR:
+               if (flags & ALT_FORM1) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       return push_inst(compiler, XORI | S(src1) | A(dst) | compiler->imm);
+               }
+               if (flags & ALT_FORM2) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       return push_inst(compiler, XORIS | S(src1) | A(dst) | compiler->imm);
+               }
+               if (flags & ALT_FORM3) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       FAIL_IF(push_inst(compiler, XORI | S(src1) | A(dst) | IMM(compiler->imm)));
+                       return push_inst(compiler, XORIS | S(dst) | A(dst) | IMM(compiler->imm >> 16));
+               }
+               return push_inst(compiler, XOR | RC(flags) | S(src1) | A(dst) | B(src2));
+
+       case SLJIT_SHL:
+               if (flags & ALT_FORM1) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       compiler->imm &= 0x1f;
+                       return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11) | ((31 - compiler->imm) << 1));
+               }
+               return push_inst(compiler, SLW | RC(flags) | S(src1) | A(dst) | B(src2));
+
+       case SLJIT_LSHR:
+               if (flags & ALT_FORM1) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       compiler->imm &= 0x1f;
+                       return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | (((32 - compiler->imm) & 0x1f) << 11) | (compiler->imm << 6) | (31 << 1));
+               }
+               return push_inst(compiler, SRW | RC(flags) | S(src1) | A(dst) | B(src2));
+
+       case SLJIT_ASHR:
+               if (flags & ALT_FORM3)
+                       FAIL_IF(push_inst(compiler, MFXER | D(0)));
+               if (flags & ALT_FORM1) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       compiler->imm &= 0x1f;
+                       FAIL_IF(push_inst(compiler, SRAWI | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11)));
+               }
+               else
+                       FAIL_IF(push_inst(compiler, SRAW | RC(flags) | S(src1) | A(dst) | B(src2)));
+               return (flags & ALT_FORM3) ? push_inst(compiler, MTXER | S(0)) : SLJIT_SUCCESS;
+       }
+
+       SLJIT_ASSERT_STOP();
+       return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_si emit_const(struct sljit_compiler *compiler, sljit_si reg, sljit_sw init_value)
+{
+       FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(init_value >> 16)));
+       return push_inst(compiler, ORI | S(reg) | A(reg) | IMM(init_value));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
+{
+       sljit_ins *inst = (sljit_ins*)addr;
+
+       inst[0] = (inst[0] & 0xffff0000) | ((new_addr >> 16) & 0xffff);
+       inst[1] = (inst[1] & 0xffff0000) | (new_addr & 0xffff);
+       SLJIT_CACHE_FLUSH(inst, inst + 2);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
+{
+       sljit_ins *inst = (sljit_ins*)addr;
+
+       inst[0] = (inst[0] & 0xffff0000) | ((new_constant >> 16) & 0xffff);
+       inst[1] = (inst[1] & 0xffff0000) | (new_constant & 0xffff);
+       SLJIT_CACHE_FLUSH(inst, inst + 2);
+}
diff --git a/ext/pcre/pcrelib/sljit/sljitNativePPC_64.c b/ext/pcre/pcrelib/sljit/sljitNativePPC_64.c
new file mode 100644 (file)
index 0000000..182ac7b
--- /dev/null
@@ -0,0 +1,421 @@
+/*
+ *    Stack-less Just-In-Time compiler
+ *
+ *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice, this list of
+ *      conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *      of conditions and the following disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* ppc 64-bit arch dependent functions. */
+
+#if defined(__GNUC__) || (defined(__IBM_GCC_ASM) && __IBM_GCC_ASM)
+#define ASM_SLJIT_CLZ(src, dst) \
+       __asm__ volatile ( "cntlzd %0, %1" : "=r"(dst) : "r"(src) )
+#elif defined(__xlc__)
+#error "Please enable GCC syntax for inline assembly statements"
+#else
+#error "Must implement count leading zeroes"
+#endif
+
+#define RLDI(dst, src, sh, mb, type) \
+       (HI(30) | S(src) | A(dst) | ((type) << 2) | (((sh) & 0x1f) << 11) | (((sh) & 0x20) >> 4) | (((mb) & 0x1f) << 6) | ((mb) & 0x20))
+
+#define PUSH_RLDICR(reg, shift) \
+       push_inst(compiler, RLDI(reg, reg, 63 - shift, shift, 1))
+
+static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si reg, sljit_sw imm)
+{
+       sljit_uw tmp;
+       sljit_uw shift;
+       sljit_uw tmp2;
+       sljit_uw shift2;
+
+       if (imm <= SIMM_MAX && imm >= SIMM_MIN)
+               return push_inst(compiler, ADDI | D(reg) | A(0) | IMM(imm));
+
+       if (!(imm & ~0xffff))
+               return push_inst(compiler, ORI | S(TMP_ZERO) | A(reg) | IMM(imm));
+
+       if (imm <= 0x7fffffffl && imm >= -0x80000000l) {
+               FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(imm >> 16)));
+               return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm)) : SLJIT_SUCCESS;
+       }
+
+       /* Count leading zeroes. */
+       tmp = (imm >= 0) ? imm : ~imm;
+       ASM_SLJIT_CLZ(tmp, shift);
+       SLJIT_ASSERT(shift > 0);
+       shift--;
+       tmp = (imm << shift);
+
+       if ((tmp & ~0xffff000000000000ul) == 0) {
+               FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48)));
+               shift += 15;
+               return PUSH_RLDICR(reg, shift);
+       }
+
+       if ((tmp & ~0xffffffff00000000ul) == 0) {
+               FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(tmp >> 48)));
+               FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | IMM(tmp >> 32)));
+               shift += 31;
+               return PUSH_RLDICR(reg, shift);
+       }
+
+       /* Cut out the 16 bit from immediate. */
+       shift += 15;
+       tmp2 = imm & ((1ul << (63 - shift)) - 1);
+
+       if (tmp2 <= 0xffff) {
+               FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48)));
+               FAIL_IF(PUSH_RLDICR(reg, shift));
+               return push_inst(compiler, ORI | S(reg) | A(reg) | tmp2);
+       }
+
+       if (tmp2 <= 0xffffffff) {
+               FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48)));
+               FAIL_IF(PUSH_RLDICR(reg, shift));
+               FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | (tmp2 >> 16)));
+               return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(tmp2)) : SLJIT_SUCCESS;
+       }
+
+       ASM_SLJIT_CLZ(tmp2, shift2);
+       tmp2 <<= shift2;
+
+       if ((tmp2 & ~0xffff000000000000ul) == 0) {
+               FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48)));
+               shift2 += 15;
+               shift += (63 - shift2);
+               FAIL_IF(PUSH_RLDICR(reg, shift));
+               FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | (tmp2 >> 48)));
+               return PUSH_RLDICR(reg, shift2);
+       }
+
+       /* The general version. */
+       FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(imm >> 48)));
+       FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm >> 32)));
+       FAIL_IF(PUSH_RLDICR(reg, 31));
+       FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | IMM(imm >> 16)));
+       return push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm));
+}
+
+/* Simplified mnemonics: clrldi. */
+#define INS_CLEAR_LEFT(dst, src, from) \
+       (RLDICL | S(src) | A(dst) | ((from) << 6) | (1 << 5))
+
+/* Sign extension for integer operations. */
+#define UN_EXTS() \
+       if ((flags & (ALT_SIGN_EXT | REG2_SOURCE)) == (ALT_SIGN_EXT | REG2_SOURCE)) { \
+               FAIL_IF(push_inst(compiler, EXTSW | S(src2) | A(TMP_REG2))); \
+               src2 = TMP_REG2; \
+       }
+
+#define BIN_EXTS() \
+       if (flags & ALT_SIGN_EXT) { \
+               if (flags & REG1_SOURCE) { \
+                       FAIL_IF(push_inst(compiler, EXTSW | S(src1) | A(TMP_REG1))); \
+                       src1 = TMP_REG1; \
+               } \
+               if (flags & REG2_SOURCE) { \
+                       FAIL_IF(push_inst(compiler, EXTSW | S(src2) | A(TMP_REG2))); \
+                       src2 = TMP_REG2; \
+               } \
+       }
+
+#define BIN_IMM_EXTS() \
+       if ((flags & (ALT_SIGN_EXT | REG1_SOURCE)) == (ALT_SIGN_EXT | REG1_SOURCE)) { \
+               FAIL_IF(push_inst(compiler, EXTSW | S(src1) | A(TMP_REG1))); \
+               src1 = TMP_REG1; \
+       }
+
+static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags,
+       sljit_si dst, sljit_si src1, sljit_si src2)
+{
+       switch (op) {
+       case SLJIT_MOV:
+       case SLJIT_MOV_P:
+               SLJIT_ASSERT(src1 == TMP_REG1);
+               if (dst != src2)
+                       return push_inst(compiler, OR | S(src2) | A(dst) | B(src2));
+               return SLJIT_SUCCESS;
+
+       case SLJIT_MOV_UI:
+       case SLJIT_MOV_SI:
+               SLJIT_ASSERT(src1 == TMP_REG1);
+               if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
+                       if (op == SLJIT_MOV_SI)
+                               return push_inst(compiler, EXTSW | S(src2) | A(dst));
+                       return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 0));
+               }
+               else {
+                       SLJIT_ASSERT(dst == src2);
+               }
+               return SLJIT_SUCCESS;
+
+       case SLJIT_MOV_UB:
+       case SLJIT_MOV_SB:
+               SLJIT_ASSERT(src1 == TMP_REG1);
+               if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
+                       if (op == SLJIT_MOV_SB)
+                               return push_inst(compiler, EXTSB | S(src2) | A(dst));
+                       return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 24));
+               }
+               else if ((flags & REG_DEST) && op == SLJIT_MOV_SB)
+                       return push_inst(compiler, EXTSB | S(src2) | A(dst));
+               else {
+                       SLJIT_ASSERT(dst == src2);
+               }
+               return SLJIT_SUCCESS;
+
+       case SLJIT_MOV_UH:
+       case SLJIT_MOV_SH:
+               SLJIT_ASSERT(src1 == TMP_REG1);
+               if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
+                       if (op == SLJIT_MOV_SH)
+                               return push_inst(compiler, EXTSH | S(src2) | A(dst));
+                       return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 16));
+               }
+               else {
+                       SLJIT_ASSERT(dst == src2);
+               }
+               return SLJIT_SUCCESS;
+
+       case SLJIT_NOT:
+               SLJIT_ASSERT(src1 == TMP_REG1);
+               UN_EXTS();
+               return push_inst(compiler, NOR | RC(flags) | S(src2) | A(dst) | B(src2));
+
+       case SLJIT_NEG:
+               SLJIT_ASSERT(src1 == TMP_REG1);
+               UN_EXTS();
+               return push_inst(compiler, NEG | OERC(flags) | D(dst) | A(src2));
+
+       case SLJIT_CLZ:
+               SLJIT_ASSERT(src1 == TMP_REG1);
+               if (flags & ALT_FORM1)
+                       return push_inst(compiler, CNTLZW | RC(flags) | S(src2) | A(dst));
+               return push_inst(compiler, CNTLZD | RC(flags) | S(src2) | A(dst));
+
+       case SLJIT_ADD:
+               if (flags & ALT_FORM1) {
+                       /* Flags does not set: BIN_IMM_EXTS unnecessary. */
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       return push_inst(compiler, ADDI | D(dst) | A(src1) | compiler->imm);
+               }
+               if (flags & ALT_FORM2) {
+                       /* Flags does not set: BIN_IMM_EXTS unnecessary. */
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       return push_inst(compiler, ADDIS | D(dst) | A(src1) | compiler->imm);
+               }
+               if (flags & ALT_FORM3) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       BIN_IMM_EXTS();
+                       return push_inst(compiler, ADDIC | D(dst) | A(src1) | compiler->imm);
+               }
+               if (flags & ALT_FORM4) {
+                       /* Flags does not set: BIN_IMM_EXTS unnecessary. */
+                       FAIL_IF(push_inst(compiler, ADDI | D(dst) | A(src1) | (compiler->imm & 0xffff)));
+                       return push_inst(compiler, ADDIS | D(dst) | A(dst) | (((compiler->imm >> 16) & 0xffff) + ((compiler->imm >> 15) & 0x1)));
+               }
+               if (!(flags & ALT_SET_FLAGS))
+                       return push_inst(compiler, ADD | D(dst) | A(src1) | B(src2));
+               BIN_EXTS();
+               return push_inst(compiler, ADDC | OERC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2));
+
+       case SLJIT_ADDC:
+               if (flags & ALT_FORM1) {
+                       FAIL_IF(push_inst(compiler, MFXER | D(0)));
+                       FAIL_IF(push_inst(compiler, ADDE | D(dst) | A(src1) | B(src2)));
+                       return push_inst(compiler, MTXER | S(0));
+               }
+               BIN_EXTS();
+               return push_inst(compiler, ADDE | D(dst) | A(src1) | B(src2));
+
+       case SLJIT_SUB:
+               if (flags & ALT_FORM1) {
+                       /* Flags does not set: BIN_IMM_EXTS unnecessary. */
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       return push_inst(compiler, SUBFIC | D(dst) | A(src1) | compiler->imm);
+               }
+               if (flags & (ALT_FORM2 | ALT_FORM3)) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       if (flags & ALT_FORM2)
+                               FAIL_IF(push_inst(compiler, CMPI | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | compiler->imm));
+                       if (flags & ALT_FORM3)
+                               return push_inst(compiler, CMPLI | CRD(4 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | compiler->imm);
+                       return SLJIT_SUCCESS;
+               }
+               if (flags & (ALT_FORM4 | ALT_FORM5)) {
+                       if (flags & ALT_FORM4)
+                               FAIL_IF(push_inst(compiler, CMPL | CRD(4 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2)));
+                       if (flags & ALT_FORM5)
+                               return push_inst(compiler, CMP | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2));
+                       return SLJIT_SUCCESS;
+               }
+               if (!(flags & ALT_SET_FLAGS))
+                       return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1));
+               BIN_EXTS();
+               if (flags & ALT_FORM6)
+                       FAIL_IF(push_inst(compiler, CMPL | CRD(4 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2)));
+               return push_inst(compiler, SUBFC | OERC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1));
+
+       case SLJIT_SUBC:
+               if (flags & ALT_FORM1) {
+                       FAIL_IF(push_inst(compiler, MFXER | D(0)));
+                       FAIL_IF(push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1)));
+                       return push_inst(compiler, MTXER | S(0));
+               }
+               BIN_EXTS();
+               return push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1));
+
+       case SLJIT_MUL:
+               if (flags & ALT_FORM1) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       return push_inst(compiler, MULLI | D(dst) | A(src1) | compiler->imm);
+               }
+               BIN_EXTS();
+               if (flags & ALT_FORM2)
+                       return push_inst(compiler, MULLW | OERC(flags) | D(dst) | A(src2) | B(src1));
+               return push_inst(compiler, MULLD | OERC(flags) | D(dst) | A(src2) | B(src1));
+
+       case SLJIT_AND:
+               if (flags & ALT_FORM1) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       return push_inst(compiler, ANDI | S(src1) | A(dst) | compiler->imm);
+               }
+               if (flags & ALT_FORM2) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       return push_inst(compiler, ANDIS | S(src1) | A(dst) | compiler->imm);
+               }
+               return push_inst(compiler, AND | RC(flags) | S(src1) | A(dst) | B(src2));
+
+       case SLJIT_OR:
+               if (flags & ALT_FORM1) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       return push_inst(compiler, ORI | S(src1) | A(dst) | compiler->imm);
+               }
+               if (flags & ALT_FORM2) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       return push_inst(compiler, ORIS | S(src1) | A(dst) | compiler->imm);
+               }
+               if (flags & ALT_FORM3) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       FAIL_IF(push_inst(compiler, ORI | S(src1) | A(dst) | IMM(compiler->imm)));
+                       return push_inst(compiler, ORIS | S(dst) | A(dst) | IMM(compiler->imm >> 16));
+               }
+               return push_inst(compiler, OR | RC(flags) | S(src1) | A(dst) | B(src2));
+
+       case SLJIT_XOR:
+               if (flags & ALT_FORM1) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       return push_inst(compiler, XORI | S(src1) | A(dst) | compiler->imm);
+               }
+               if (flags & ALT_FORM2) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       return push_inst(compiler, XORIS | S(src1) | A(dst) | compiler->imm);
+               }
+               if (flags & ALT_FORM3) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       FAIL_IF(push_inst(compiler, XORI | S(src1) | A(dst) | IMM(compiler->imm)));
+                       return push_inst(compiler, XORIS | S(dst) | A(dst) | IMM(compiler->imm >> 16));
+               }
+               return push_inst(compiler, XOR | RC(flags) | S(src1) | A(dst) | B(src2));
+
+       case SLJIT_SHL:
+               if (flags & ALT_FORM1) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       if (flags & ALT_FORM2) {
+                               compiler->imm &= 0x1f;
+                               return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11) | ((31 - compiler->imm) << 1));
+                       }
+                       else {
+                               compiler->imm &= 0x3f;
+                               return push_inst(compiler, RLDI(dst, src1, compiler->imm, 63 - compiler->imm, 1) | RC(flags));
+                       }
+               }
+               return push_inst(compiler, ((flags & ALT_FORM2) ? SLW : SLD) | RC(flags) | S(src1) | A(dst) | B(src2));
+
+       case SLJIT_LSHR:
+               if (flags & ALT_FORM1) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       if (flags & ALT_FORM2) {
+                               compiler->imm &= 0x1f;
+                               return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | (((32 - compiler->imm) & 0x1f) << 11) | (compiler->imm << 6) | (31 << 1));
+                       }
+                       else {
+                               compiler->imm &= 0x3f;
+                               return push_inst(compiler, RLDI(dst, src1, 64 - compiler->imm, compiler->imm, 0) | RC(flags));
+                       }
+               }
+               return push_inst(compiler, ((flags & ALT_FORM2) ? SRW : SRD) | RC(flags) | S(src1) | A(dst) | B(src2));
+
+       case SLJIT_ASHR:
+               if (flags & ALT_FORM3)
+                       FAIL_IF(push_inst(compiler, MFXER | D(0)));
+               if (flags & ALT_FORM1) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       if (flags & ALT_FORM2) {
+                               compiler->imm &= 0x1f;
+                               FAIL_IF(push_inst(compiler, SRAWI | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11)));
+                       }
+                       else {
+                               compiler->imm &= 0x3f;
+                               FAIL_IF(push_inst(compiler, SRADI | RC(flags) | S(src1) | A(dst) | ((compiler->imm & 0x1f) << 11) | ((compiler->imm & 0x20) >> 4)));
+                       }
+               }
+               else
+                       FAIL_IF(push_inst(compiler, ((flags & ALT_FORM2) ? SRAW : SRAD) | RC(flags) | S(src1) | A(dst) | B(src2)));
+               return (flags & ALT_FORM3) ? push_inst(compiler, MTXER | S(0)) : SLJIT_SUCCESS;
+       }
+
+       SLJIT_ASSERT_STOP();
+       return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_si emit_const(struct sljit_compiler *compiler, sljit_si reg, sljit_sw init_value)
+{
+       FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(init_value >> 48)));
+       FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | IMM(init_value >> 32)));
+       FAIL_IF(PUSH_RLDICR(reg, 31));
+       FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | IMM(init_value >> 16)));
+       return push_inst(compiler, ORI | S(reg) | A(reg) | IMM(init_value));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
+{
+       sljit_ins *inst = (sljit_ins*)addr;
+
+       inst[0] = (inst[0] & 0xffff0000) | ((new_addr >> 48) & 0xffff);
+       inst[1] = (inst[1] & 0xffff0000) | ((new_addr >> 32) & 0xffff);
+       inst[3] = (inst[3] & 0xffff0000) | ((new_addr >> 16) & 0xffff);
+       inst[4] = (inst[4] & 0xffff0000) | (new_addr & 0xffff);
+       SLJIT_CACHE_FLUSH(inst, inst + 5);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
+{
+       sljit_ins *inst = (sljit_ins*)addr;
+
+       inst[0] = (inst[0] & 0xffff0000) | ((new_constant >> 48) & 0xffff);
+       inst[1] = (inst[1] & 0xffff0000) | ((new_constant >> 32) & 0xffff);
+       inst[3] = (inst[3] & 0xffff0000) | ((new_constant >> 16) & 0xffff);
+       inst[4] = (inst[4] & 0xffff0000) | (new_constant & 0xffff);
+       SLJIT_CACHE_FLUSH(inst, inst + 5);
+}
diff --git a/ext/pcre/pcrelib/sljit/sljitNativePPC_common.c b/ext/pcre/pcrelib/sljit/sljitNativePPC_common.c
new file mode 100644 (file)
index 0000000..5e06f2f
--- /dev/null
@@ -0,0 +1,2188 @@
+/*
+ *    Stack-less Just-In-Time compiler
+ *
+ *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice, this list of
+ *      conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *      of conditions and the following disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
+{
+       return "PowerPC" SLJIT_CPUINFO;
+}
+
+/* Length of an instruction word.
+   Both for ppc-32 and ppc-64. */
+typedef sljit_ui sljit_ins;
+
+#if ((defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) && (defined _AIX)) \
+       || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+#define SLJIT_PPC_STACK_FRAME_V2 1
+#endif
+
+#ifdef _AIX
+#include <sys/cache.h>
+#endif
+
+#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
+#define SLJIT_PASS_ENTRY_ADDR_TO_CALL 1
+#endif
+
+static void ppc_cache_flush(sljit_ins *from, sljit_ins *to)
+{
+#ifdef _AIX
+       _sync_cache_range((caddr_t)from, (int)((size_t)to - (size_t)from));
+#elif defined(__GNUC__) || (defined(__IBM_GCC_ASM) && __IBM_GCC_ASM)
+#      if defined(_ARCH_PWR) || defined(_ARCH_PWR2)
+       /* Cache flush for POWER architecture. */
+       while (from < to) {
+               __asm__ volatile (
+                       "clf 0, %0\n"
+                       "dcs\n"
+                       : : "r"(from)
+               );
+               from++;
+       }
+       __asm__ volatile ( "ics" );
+#      elif defined(_ARCH_COM) && !defined(_ARCH_PPC)
+#      error "Cache flush is not implemented for PowerPC/POWER common mode."
+#      else
+       /* Cache flush for PowerPC architecture. */
+       while (from < to) {
+               __asm__ volatile (
+                       "dcbf 0, %0\n"
+                       "sync\n"
+                       "icbi 0, %0\n"
+                       : : "r"(from)
+               );
+               from++;
+       }
+       __asm__ volatile ( "isync" );
+#      endif
+#      ifdef __xlc__
+#      warning "This file may fail to compile if -qfuncsect is used"
+#      endif
+#elif defined(__xlc__)
+#error "Please enable GCC syntax for inline assembly statements with -qasm=gcc"
+#else
+#error "This platform requires a cache flush implementation."
+#endif /* _AIX */
+}
+
+#define TMP_REG1       (SLJIT_NO_REGISTERS + 1)
+#define TMP_REG2       (SLJIT_NO_REGISTERS + 2)
+#define TMP_REG3       (SLJIT_NO_REGISTERS + 3)
+#define TMP_ZERO       (SLJIT_NO_REGISTERS + 4)
+
+#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
+#define TMP_CALL_REG   (SLJIT_NO_REGISTERS + 5)
+#else
+#define TMP_CALL_REG   TMP_REG2
+#endif
+
+#define TMP_FREG1      (0)
+#define TMP_FREG2      (SLJIT_FLOAT_REG6 + 1)
+
+static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 6] = {
+       0, 3, 4, 5, 6, 7, 30, 29, 28, 27, 26, 1, 8, 9, 10, 31, 12
+};
+
+/* --------------------------------------------------------------------- */
+/*  Instrucion forms                                                     */
+/* --------------------------------------------------------------------- */
+#define D(d)           (reg_map[d] << 21)
+#define S(s)           (reg_map[s] << 21)
+#define A(a)           (reg_map[a] << 16)
+#define B(b)           (reg_map[b] << 11)
+#define C(c)           (reg_map[c] << 6)
+#define FD(fd)         ((fd) << 21)
+#define FA(fa)         ((fa) << 16)
+#define FB(fb)         ((fb) << 11)
+#define FC(fc)         ((fc) << 6)
+#define IMM(imm)       ((imm) & 0xffff)
+#define CRD(d)         ((d) << 21)
+
+/* Instruction bit sections.
+   OE and Rc flag (see ALT_SET_FLAGS). */
+#define OERC(flags)    (((flags & ALT_SET_FLAGS) >> 10) | (flags & ALT_SET_FLAGS))
+/* Rc flag (see ALT_SET_FLAGS). */
+#define RC(flags)      ((flags & ALT_SET_FLAGS) >> 10)
+#define HI(opcode)     ((opcode) << 26)
+#define LO(opcode)     ((opcode) << 1)
+
+#define ADD            (HI(31) | LO(266))
+#define ADDC           (HI(31) | LO(10))
+#define ADDE           (HI(31) | LO(138))
+#define ADDI           (HI(14))
+#define ADDIC          (HI(13))
+#define ADDIS          (HI(15))
+#define ADDME          (HI(31) | LO(234))
+#define AND            (HI(31) | LO(28))
+#define ANDI           (HI(28))
+#define ANDIS          (HI(29))
+#define Bx             (HI(18))
+#define BCx            (HI(16))
+#define BCCTR          (HI(19) | LO(528) | (3 << 11))
+#define BLR            (HI(19) | LO(16) | (0x14 << 21))
+#define CNTLZD         (HI(31) | LO(58))
+#define CNTLZW         (HI(31) | LO(26))
+#define CMP            (HI(31) | LO(0))
+#define CMPI           (HI(11))
+#define CMPL           (HI(31) | LO(32))
+#define CMPLI          (HI(10))
+#define CROR           (HI(19) | LO(449))
+#define DIVD           (HI(31) | LO(489))
+#define DIVDU          (HI(31) | LO(457))
+#define DIVW           (HI(31) | LO(491))
+#define DIVWU          (HI(31) | LO(459))
+#define EXTSB          (HI(31) | LO(954))
+#define EXTSH          (HI(31) | LO(922))
+#define EXTSW          (HI(31) | LO(986))
+#define FABS           (HI(63) | LO(264))
+#define FADD           (HI(63) | LO(21))
+#define FADDS          (HI(59) | LO(21))
+#define FCMPU          (HI(63) | LO(0))
+#define FDIV           (HI(63) | LO(18))
+#define FDIVS          (HI(59) | LO(18))
+#define FMR            (HI(63) | LO(72))
+#define FMUL           (HI(63) | LO(25))
+#define FMULS          (HI(59) | LO(25))
+#define FNEG           (HI(63) | LO(40))
+#define FSUB           (HI(63) | LO(20))
+#define FSUBS          (HI(59) | LO(20))
+#define LD             (HI(58) | 0)
+#define LWZ            (HI(32))
+#define MFCR           (HI(31) | LO(19))
+#define MFLR           (HI(31) | LO(339) | 0x80000)
+#define MFXER          (HI(31) | LO(339) | 0x10000)
+#define MTCTR          (HI(31) | LO(467) | 0x90000)
+#define MTLR           (HI(31) | LO(467) | 0x80000)
+#define MTXER          (HI(31) | LO(467) | 0x10000)
+#define MULHD          (HI(31) | LO(73))
+#define MULHDU         (HI(31) | LO(9))
+#define MULHW          (HI(31) | LO(75))
+#define MULHWU         (HI(31) | LO(11))
+#define MULLD          (HI(31) | LO(233))
+#define MULLI          (HI(7))
+#define MULLW          (HI(31) | LO(235))
+#define NEG            (HI(31) | LO(104))
+#define NOP            (HI(24))
+#define NOR            (HI(31) | LO(124))
+#define OR             (HI(31) | LO(444))
+#define ORI            (HI(24))
+#define ORIS           (HI(25))
+#define RLDICL         (HI(30))
+#define RLWINM         (HI(21))
+#define SLD            (HI(31) | LO(27))
+#define SLW            (HI(31) | LO(24))
+#define SRAD           (HI(31) | LO(794))
+#define SRADI          (HI(31) | LO(413 << 1))
+#define SRAW           (HI(31) | LO(792))
+#define SRAWI          (HI(31) | LO(824))
+#define SRD            (HI(31) | LO(539))
+#define SRW            (HI(31) | LO(536))
+#define STD            (HI(62) | 0)
+#define STDU           (HI(62) | 1)
+#define STDUX          (HI(31) | LO(181))
+#define STW            (HI(36))
+#define STWU           (HI(37))
+#define STWUX          (HI(31) | LO(183))
+#define SUBF           (HI(31) | LO(40))
+#define SUBFC          (HI(31) | LO(8))
+#define SUBFE          (HI(31) | LO(136))
+#define SUBFIC         (HI(8))
+#define XOR            (HI(31) | LO(316))
+#define XORI           (HI(26))
+#define XORIS          (HI(27))
+
+#define SIMM_MAX       (0x7fff)
+#define SIMM_MIN       (-0x8000)
+#define UIMM_MAX       (0xffff)
+
+#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_sw addr, void* func)
+{
+       sljit_sw* ptrs;
+       if (func_ptr)
+               *func_ptr = (void*)context;
+       ptrs = (sljit_sw*)func;
+       context->addr = addr ? addr : ptrs[0];
+       context->r2 = ptrs[1];
+       context->r11 = ptrs[2];
+}
+#endif
+
+static sljit_si push_inst(struct sljit_compiler *compiler, sljit_ins ins)
+{
+       sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
+       FAIL_IF(!ptr);
+       *ptr = ins;
+       compiler->size++;
+       return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_si detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code)
+{
+       sljit_sw diff;
+       sljit_uw target_addr;
+       sljit_sw extra_jump_flags;
+
+#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
+       if (jump->flags & (SLJIT_REWRITABLE_JUMP | IS_CALL))
+               return 0;
+#else
+       if (jump->flags & SLJIT_REWRITABLE_JUMP)
+               return 0;
+#endif
+
+       if (jump->flags & JUMP_ADDR)
+               target_addr = jump->u.target;
+       else {
+               SLJIT_ASSERT(jump->flags & JUMP_LABEL);
+               target_addr = (sljit_uw)(code + jump->u.label->size);
+       }
+
+#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+       if (jump->flags & IS_CALL)
+               goto keep_address;
+#endif
+
+       diff = ((sljit_sw)target_addr - (sljit_sw)(code_ptr)) & ~0x3l;
+
+       extra_jump_flags = 0;
+       if (jump->flags & IS_COND) {
+               if (diff <= 0x7fff && diff >= -0x8000) {
+                       jump->flags |= PATCH_B;
+                       return 1;
+               }
+               if (target_addr <= 0xffff) {
+                       jump->flags |= PATCH_B | PATCH_ABS_B;
+                       return 1;
+               }
+               extra_jump_flags = REMOVE_COND;
+
+               diff -= sizeof(sljit_ins);
+       }
+
+       if (diff <= 0x01ffffff && diff >= -0x02000000) {
+               jump->flags |= PATCH_B | extra_jump_flags;
+               return 1;
+       }
+       if (target_addr <= 0x03ffffff) {
+               jump->flags |= PATCH_B | PATCH_ABS_B | extra_jump_flags;
+               return 1;
+       }
+
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
+keep_address:
+#endif
+       if (target_addr <= 0x7fffffff) {
+               jump->flags |= PATCH_ABS32;
+               return 1;
+       }
+       if (target_addr <= 0x7fffffffffffl) {
+               jump->flags |= PATCH_ABS48;
+               return 1;
+       }
+#endif
+
+       return 0;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
+{
+       struct sljit_memory_fragment *buf;
+       sljit_ins *code;
+       sljit_ins *code_ptr;
+       sljit_ins *buf_ptr;
+       sljit_ins *buf_end;
+       sljit_uw word_count;
+       sljit_uw addr;
+
+       struct sljit_label *label;
+       struct sljit_jump *jump;
+       struct sljit_const *const_;
+
+       CHECK_ERROR_PTR();
+       check_sljit_generate_code(compiler);
+       reverse_buf(compiler);
+
+#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+       compiler->size += (compiler->size & 0x1) + (sizeof(struct sljit_function_context) / sizeof(sljit_ins));
+#else
+       compiler->size += (sizeof(struct sljit_function_context) / sizeof(sljit_ins));
+#endif
+#endif
+       code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins));
+       PTR_FAIL_WITH_EXEC_IF(code);
+       buf = compiler->buf;
+
+       code_ptr = code;
+       word_count = 0;
+       label = compiler->labels;
+       jump = compiler->jumps;
+       const_ = compiler->consts;
+       do {
+               buf_ptr = (sljit_ins*)buf->memory;
+               buf_end = buf_ptr + (buf->used_size >> 2);
+               do {
+                       *code_ptr = *buf_ptr++;
+                       SLJIT_ASSERT(!label || label->size >= word_count);
+                       SLJIT_ASSERT(!jump || jump->addr >= word_count);
+                       SLJIT_ASSERT(!const_ || const_->addr >= word_count);
+                       /* These structures are ordered by their address. */
+                       if (label && label->size == word_count) {
+                               /* Just recording the address. */
+                               label->addr = (sljit_uw)code_ptr;
+                               label->size = code_ptr - code;
+                               label = label->next;
+                       }
+                       if (jump && jump->addr == word_count) {
+#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
+                               jump->addr = (sljit_uw)(code_ptr - 3);
+#else
+                               jump->addr = (sljit_uw)(code_ptr - 6);
+#endif
+                               if (detect_jump_type(jump, code_ptr, code)) {
+#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
+                                       code_ptr[-3] = code_ptr[0];
+                                       code_ptr -= 3;
+#else
+                                       if (jump->flags & PATCH_ABS32) {
+                                               code_ptr -= 3;
+                                               code_ptr[-1] = code_ptr[2];
+                                               code_ptr[0] = code_ptr[3];
+                                       }
+                                       else if (jump->flags & PATCH_ABS48) {
+                                               code_ptr--;
+                                               code_ptr[-1] = code_ptr[0];
+                                               code_ptr[0] = code_ptr[1];
+                                               /* rldicr rX,rX,32,31 -> rX,rX,16,47 */
+                                               SLJIT_ASSERT((code_ptr[-3] & 0xfc00ffff) == 0x780007c6);
+                                               code_ptr[-3] ^= 0x8422;
+                                               /* oris -> ori */
+                                               code_ptr[-2] ^= 0x4000000;
+                                       }
+                                       else {
+                                               code_ptr[-6] = code_ptr[0];
+                                               code_ptr -= 6;
+                                       }
+#endif
+                                       if (jump->flags & REMOVE_COND) {
+                                               code_ptr[0] = BCx | (2 << 2) | ((code_ptr[0] ^ (8 << 21)) & 0x03ff0001);
+                                               code_ptr++;
+                                               jump->addr += sizeof(sljit_ins);
+                                               code_ptr[0] = Bx;
+                                               jump->flags -= IS_COND;
+                                       }
+                               }
+                               jump = jump->next;
+                       }
+                       if (const_ && const_->addr == word_count) {
+                               const_->addr = (sljit_uw)code_ptr;
+                               const_ = const_->next;
+                       }
+                       code_ptr ++;
+                       word_count ++;
+               } while (buf_ptr < buf_end);
+
+               buf = buf->next;
+       } while (buf);
+
+       if (label && label->size == word_count) {
+               label->addr = (sljit_uw)code_ptr;
+               label->size = code_ptr - code;
+               label = label->next;
+       }
+
+       SLJIT_ASSERT(!label);
+       SLJIT_ASSERT(!jump);
+       SLJIT_ASSERT(!const_);
+#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
+       SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size - (sizeof(struct sljit_function_context) / sizeof(sljit_ins)));
+#else
+       SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
+#endif
+
+       jump = compiler->jumps;
+       while (jump) {
+               do {
+                       addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
+                       buf_ptr = (sljit_ins*)jump->addr;
+                       if (jump->flags & PATCH_B) {
+                               if (jump->flags & IS_COND) {
+                                       if (!(jump->flags & PATCH_ABS_B)) {
+                                               addr = addr - jump->addr;
+                                               SLJIT_ASSERT((sljit_sw)addr <= 0x7fff && (sljit_sw)addr >= -0x8000);
+                                               *buf_ptr = BCx | (addr & 0xfffc) | ((*buf_ptr) & 0x03ff0001);
+                                       }
+                                       else {
+                                               SLJIT_ASSERT(addr <= 0xffff);
+                                               *buf_ptr = BCx | (addr & 0xfffc) | 0x2 | ((*buf_ptr) & 0x03ff0001);
+                                       }
+                               }
+                               else {
+                                       if (!(jump->flags & PATCH_ABS_B)) {
+                                               addr = addr - jump->addr;
+                                               SLJIT_ASSERT((sljit_sw)addr <= 0x01ffffff && (sljit_sw)addr >= -0x02000000);
+                                               *buf_ptr = Bx | (addr & 0x03fffffc) | ((*buf_ptr) & 0x1);
+                                       }
+                                       else {
+                                               SLJIT_ASSERT(addr <= 0x03ffffff);
+                                               *buf_ptr = Bx | (addr & 0x03fffffc) | 0x2 | ((*buf_ptr) & 0x1);
+                                       }
+                               }
+                               break;
+                       }
+                       /* Set the fields of immediate loads. */
+#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
+                       buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff);
+                       buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff);
+#else
+                       if (jump->flags & PATCH_ABS32) {
+                               SLJIT_ASSERT(addr <= 0x7fffffff);
+                               buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff);
+                               buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff);
+                               break;
+                       }
+                       if (jump->flags & PATCH_ABS48) {
+                               SLJIT_ASSERT(addr <= 0x7fffffffffff);
+                               buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 32) & 0xffff);
+                               buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 16) & 0xffff);
+                               buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | (addr & 0xffff);
+                               break;
+                       }
+                       buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 48) & 0xffff);
+                       buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 32) & 0xffff);
+                       buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | ((addr >> 16) & 0xffff);
+                       buf_ptr[4] = (buf_ptr[4] & 0xffff0000) | (addr & 0xffff);
+#endif
+               } while (0);
+               jump = jump->next;
+       }
+
+       compiler->error = SLJIT_ERR_COMPILED;
+       compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
+       SLJIT_CACHE_FLUSH(code, code_ptr);
+
+#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+       if (((sljit_sw)code_ptr) & 0x4)
+               code_ptr++;
+       sljit_set_function_context(NULL, (struct sljit_function_context*)code_ptr, (sljit_sw)code, (void*)sljit_generate_code);
+       return code_ptr;
+#else
+       sljit_set_function_context(NULL, (struct sljit_function_context*)code_ptr, (sljit_sw)code, (void*)sljit_generate_code);
+       return code_ptr;
+#endif
+#else
+       return code;
+#endif
+}
+
+/* --------------------------------------------------------------------- */
+/*  Entry, exit                                                          */
+/* --------------------------------------------------------------------- */
+
+/* inp_flags: */
+
+/* Creates an index in data_transfer_insts array. */
+#define LOAD_DATA      0x01
+#define INDEXED                0x02
+#define WRITE_BACK     0x04
+#define WORD_DATA      0x00
+#define BYTE_DATA      0x08
+#define HALF_DATA      0x10
+#define INT_DATA       0x18
+#define SIGNED_DATA    0x20
+/* Separates integer and floating point registers */
+#define GPR_REG                0x3f
+#define DOUBLE_DATA    0x40
+
+#define MEM_MASK       0x7f
+
+/* Other inp_flags. */
+
+#define ARG_TEST       0x000100
+/* Integer opertion and set flags -> requires exts on 64 bit systems. */
+#define ALT_SIGN_EXT   0x000200
+/* This flag affects the RC() and OERC() macros. */
+#define ALT_SET_FLAGS  0x000400
+#define ALT_KEEP_CACHE 0x000800
+#define ALT_FORM1      0x010000
+#define ALT_FORM2      0x020000
+#define ALT_FORM3      0x040000
+#define ALT_FORM4      0x080000
+#define ALT_FORM5      0x100000
+#define ALT_FORM6      0x200000
+
+/* Source and destination is register. */
+#define REG_DEST       0x000001
+#define REG1_SOURCE    0x000002
+#define REG2_SOURCE    0x000004
+/* getput_arg_fast returned true. */
+#define FAST_DEST      0x000008
+/* Multiple instructions are required. */
+#define SLOW_DEST      0x000010
+/*
+ALT_SIGN_EXT           0x000200
+ALT_SET_FLAGS          0x000400
+ALT_FORM1              0x010000
+...
+ALT_FORM6              0x200000 */
+
+#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
+#include "sljitNativePPC_32.c"
+#else
+#include "sljitNativePPC_64.c"
+#endif
+
+#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
+#define STACK_STORE    STW
+#define STACK_LOAD     LWZ
+#else
+#define STACK_STORE    STD
+#define STACK_LOAD     LD
+#endif
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
+{
+       CHECK_ERROR();
+       check_sljit_emit_enter(compiler, args, scratches, saveds, local_size);
+
+       compiler->scratches = scratches;
+       compiler->saveds = saveds;
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       compiler->logical_local_size = local_size;
+#endif
+
+       FAIL_IF(push_inst(compiler, MFLR | D(0)));
+       FAIL_IF(push_inst(compiler, STACK_STORE | S(TMP_ZERO) | A(SLJIT_LOCALS_REG) | IMM(-(sljit_si)(sizeof(sljit_sw))) ));
+       if (saveds >= 1)
+               FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_REG1) | A(SLJIT_LOCALS_REG) | IMM(-2 * (sljit_si)(sizeof(sljit_sw))) ));
+       if (saveds >= 2)
+               FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_REG2) | A(SLJIT_LOCALS_REG) | IMM(-3 * (sljit_si)(sizeof(sljit_sw))) ));
+       if (saveds >= 3)
+               FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_REG3) | A(SLJIT_LOCALS_REG) | IMM(-4 * (sljit_si)(sizeof(sljit_sw))) ));
+       if (saveds >= 4)
+               FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_EREG1) | A(SLJIT_LOCALS_REG) | IMM(-5 * (sljit_si)(sizeof(sljit_sw))) ));
+       if (saveds >= 5)
+               FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_EREG2) | A(SLJIT_LOCALS_REG) | IMM(-6 * (sljit_si)(sizeof(sljit_sw))) ));
+#if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2)
+       FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_LOCALS_REG) | IMM(2 * sizeof(sljit_sw)) ));
+#else
+       FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_LOCALS_REG) | IMM(sizeof(sljit_sw)) ));
+#endif
+
+       FAIL_IF(push_inst(compiler, ADDI | D(TMP_ZERO) | A(0) | 0));
+       if (args >= 1)
+               FAIL_IF(push_inst(compiler, OR | S(SLJIT_SCRATCH_REG1) | A(SLJIT_SAVED_REG1) | B(SLJIT_SCRATCH_REG1)));
+       if (args >= 2)
+               FAIL_IF(push_inst(compiler, OR | S(SLJIT_SCRATCH_REG2) | A(SLJIT_SAVED_REG2) | B(SLJIT_SCRATCH_REG2)));
+       if (args >= 3)
+               FAIL_IF(push_inst(compiler, OR | S(SLJIT_SCRATCH_REG3) | A(SLJIT_SAVED_REG3) | B(SLJIT_SCRATCH_REG3)));
+
+#if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2)
+       compiler->local_size = (1 + saveds + 6 + 8) * sizeof(sljit_sw) + local_size;
+#else
+       compiler->local_size = (1 + saveds + 2) * sizeof(sljit_sw) + local_size;
+#endif
+       compiler->local_size = (compiler->local_size + 15) & ~0xf;
+
+#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
+       if (compiler->local_size <= SIMM_MAX)
+               FAIL_IF(push_inst(compiler, STWU | S(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | IMM(-compiler->local_size)));
+       else {
+               FAIL_IF(load_immediate(compiler, 0, -compiler->local_size));
+               FAIL_IF(push_inst(compiler, STWUX | S(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | B(0)));
+       }
+#else
+       if (compiler->local_size <= SIMM_MAX)
+               FAIL_IF(push_inst(compiler, STDU | S(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | IMM(-compiler->local_size)));
+       else {
+               FAIL_IF(load_immediate(compiler, 0, -compiler->local_size));
+               FAIL_IF(push_inst(compiler, STDUX | S(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | B(0)));
+       }
+#endif
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
+{
+       CHECK_ERROR_VOID();
+       check_sljit_set_context(compiler, args, scratches, saveds, local_size);
+
+       compiler->scratches = scratches;
+       compiler->saveds = saveds;
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       compiler->logical_local_size = local_size;
+#endif
+
+#if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2)
+       compiler->local_size = (1 + saveds + 6 + 8) * sizeof(sljit_sw) + local_size;
+#else
+       compiler->local_size = (1 + saveds + 2) * sizeof(sljit_sw) + local_size;
+#endif
+       compiler->local_size = (compiler->local_size + 15) & ~0xf;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
+{
+       CHECK_ERROR();
+       check_sljit_emit_return(compiler, op, src, srcw);
+
+       FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
+
+       if (compiler->local_size <= SIMM_MAX)
+               FAIL_IF(push_inst(compiler, ADDI | D(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | IMM(compiler->local_size)));
+       else {
+               FAIL_IF(load_immediate(compiler, 0, compiler->local_size));
+               FAIL_IF(push_inst(compiler, ADD | D(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | B(0)));
+       }
+
+#if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2)
+       FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_LOCALS_REG) | IMM(2 * sizeof(sljit_sw))));
+#else
+       FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_LOCALS_REG) | IMM(sizeof(sljit_sw))));
+#endif
+       if (compiler->saveds >= 5)
+               FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_EREG2) | A(SLJIT_LOCALS_REG) | IMM(-6 * (sljit_si)(sizeof(sljit_sw))) ));
+       if (compiler->saveds >= 4)
+               FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_EREG1) | A(SLJIT_LOCALS_REG) | IMM(-5 * (sljit_si)(sizeof(sljit_sw))) ));
+       if (compiler->saveds >= 3)
+               FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_REG3) | A(SLJIT_LOCALS_REG) | IMM(-4 * (sljit_si)(sizeof(sljit_sw))) ));
+       if (compiler->saveds >= 2)
+               FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_REG2) | A(SLJIT_LOCALS_REG) | IMM(-3 * (sljit_si)(sizeof(sljit_sw))) ));
+       if (compiler->saveds >= 1)
+               FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_REG1) | A(SLJIT_LOCALS_REG) | IMM(-2 * (sljit_si)(sizeof(sljit_sw))) ));
+       FAIL_IF(push_inst(compiler, STACK_LOAD | D(TMP_ZERO) | A(SLJIT_LOCALS_REG) | IMM(-(sljit_si)(sizeof(sljit_sw))) ));
+
+       FAIL_IF(push_inst(compiler, MTLR | S(0)));
+       FAIL_IF(push_inst(compiler, BLR));
+
+       return SLJIT_SUCCESS;
+}
+
+#undef STACK_STORE
+#undef STACK_LOAD
+
+/* --------------------------------------------------------------------- */
+/*  Operators                                                            */
+/* --------------------------------------------------------------------- */
+
+/* i/x - immediate/indexed form
+   n/w - no write-back / write-back (1 bit)
+   s/l - store/load (1 bit)
+   u/s - signed/unsigned (1 bit)
+   w/b/h/i - word/byte/half/int allowed (2 bit)
+   It contans 32 items, but not all are different. */
+
+/* 64 bit only: [reg+imm] must be aligned to 4 bytes. */
+#define INT_ALIGNED    0x10000
+/* 64-bit only: there is no lwau instruction. */
+#define UPDATE_REQ     0x20000
+
+#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
+#define ARCH_32_64(a, b)       a
+#define INST_CODE_AND_DST(inst, flags, reg) \
+       ((inst) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg)))
+#else
+#define ARCH_32_64(a, b)       b
+#define INST_CODE_AND_DST(inst, flags, reg) \
+       (((inst) & ~(INT_ALIGNED | UPDATE_REQ)) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg)))
+#endif
+
+static SLJIT_CONST sljit_ins data_transfer_insts[64 + 8] = {
+
+/* -------- Unsigned -------- */
+
+/* Word. */
+
+/* u w n i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | INT_ALIGNED | 0x0 /* std */),
+/* u w n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x0 /* ld */),
+/* u w n x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */),
+/* u w n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */),
+
+/* u w w i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | INT_ALIGNED | 0x1 /* stdu */),
+/* u w w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | 0x1 /* ldu */),
+/* u w w x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */),
+/* u w w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */),
+
+/* Byte. */
+
+/* u b n i s */ HI(38) /* stb */, 
+/* u b n i l */ HI(34) /* lbz */,
+/* u b n x s */ HI(31) | LO(215) /* stbx */,
+/* u b n x l */ HI(31) | LO(87) /* lbzx */,
+
+/* u b w i s */ HI(39) /* stbu */,
+/* u b w i l */ HI(35) /* lbzu */,
+/* u b w x s */ HI(31) | LO(247) /* stbux */,
+/* u b w x l */ HI(31) | LO(119) /* lbzux */,
+
+/* Half. */
+
+/* u h n i s */ HI(44) /* sth */,
+/* u h n i l */ HI(40) /* lhz */,
+/* u h n x s */ HI(31) | LO(407) /* sthx */,
+/* u h n x l */ HI(31) | LO(279) /* lhzx */,
+
+/* u h w i s */ HI(45) /* sthu */,
+/* u h w i l */ HI(41) /* lhzu */,
+/* u h w x s */ HI(31) | LO(439) /* sthux */,
+/* u h w x l */ HI(31) | LO(311) /* lhzux */,
+
+/* Int. */
+
+/* u i n i s */ HI(36) /* stw */,
+/* u i n i l */ HI(32) /* lwz */,
+/* u i n x s */ HI(31) | LO(151) /* stwx */,
+/* u i n x l */ HI(31) | LO(23) /* lwzx */,
+
+/* u i w i s */ HI(37) /* stwu */,
+/* u i w i l */ HI(33) /* lwzu */,
+/* u i w x s */ HI(31) | LO(183) /* stwux */,
+/* u i w x l */ HI(31) | LO(55) /* lwzux */,
+
+/* -------- Signed -------- */
+
+/* Word. */
+
+/* s w n i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | INT_ALIGNED | 0x0 /* std */),
+/* s w n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x0 /* ld */),
+/* s w n x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */),
+/* s w n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */),
+
+/* s w w i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | INT_ALIGNED | 0x1 /* stdu */),
+/* s w w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | 0x1 /* ldu */),
+/* s w w x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */),
+/* s w w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */),
+
+/* Byte. */
+
+/* s b n i s */ HI(38) /* stb */,
+/* s b n i l */ HI(34) /* lbz */ /* EXTS_REQ */,
+/* s b n x s */ HI(31) | LO(215) /* stbx */,
+/* s b n x l */ HI(31) | LO(87) /* lbzx */ /* EXTS_REQ */,
+
+/* s b w i s */ HI(39) /* stbu */,
+/* s b w i l */ HI(35) /* lbzu */ /* EXTS_REQ */,
+/* s b w x s */ HI(31) | LO(247) /* stbux */,
+/* s b w x l */ HI(31) | LO(119) /* lbzux */ /* EXTS_REQ */,
+
+/* Half. */
+
+/* s h n i s */ HI(44) /* sth */,
+/* s h n i l */ HI(42) /* lha */,
+/* s h n x s */ HI(31) | LO(407) /* sthx */,
+/* s h n x l */ HI(31) | LO(343) /* lhax */,
+
+/* s h w i s */ HI(45) /* sthu */,
+/* s h w i l */ HI(43) /* lhau */,
+/* s h w x s */ HI(31) | LO(439) /* sthux */,
+/* s h w x l */ HI(31) | LO(375) /* lhaux */,
+
+/* Int. */
+
+/* s i n i s */ HI(36) /* stw */,
+/* s i n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x2 /* lwa */),
+/* s i n x s */ HI(31) | LO(151) /* stwx */,
+/* s i n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(341) /* lwax */),
+
+/* s i w i s */ HI(37) /* stwu */,
+/* s i w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | UPDATE_REQ | 0x2 /* lwa */),
+/* s i w x s */ HI(31) | LO(183) /* stwux */,
+/* s i w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(373) /* lwaux */),
+
+/* -------- Double -------- */
+
+/* d   n i s */ HI(54) /* stfd */,
+/* d   n i l */ HI(50) /* lfd */,
+/* d   n x s */ HI(31) | LO(727) /* stfdx */,
+/* d   n x l */ HI(31) | LO(599) /* lfdx */,
+
+/* s   n i s */ HI(52) /* stfs */,
+/* s   n i l */ HI(48) /* lfs */,
+/* s   n x s */ HI(31) | LO(663) /* stfsx */,
+/* s   n x l */ HI(31) | LO(535) /* lfsx */,
+
+};
+
+#undef ARCH_32_64
+
+/* Simple cases, (no caching is required). */
+static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si inp_flags, sljit_si reg, sljit_si arg, sljit_sw argw)
+{
+       sljit_ins inst;
+
+       /* Should work when (arg & REG_MASK) == 0. */
+       SLJIT_COMPILE_ASSERT(A(0) == 0, a0_must_be_0);
+       SLJIT_ASSERT(arg & SLJIT_MEM);
+
+       if (arg & OFFS_REG_MASK) {
+               if (argw & 0x3)
+                       return 0;
+               if (inp_flags & ARG_TEST)
+                       return 1;
+
+               inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
+               SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
+               FAIL_IF(push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(OFFS_REG(arg))));
+               return -1;
+       }
+
+       if (SLJIT_UNLIKELY(!(arg & REG_MASK)))
+               inp_flags &= ~WRITE_BACK;
+
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+       inst = data_transfer_insts[inp_flags & MEM_MASK];
+       SLJIT_ASSERT((arg & REG_MASK) || !(inst & UPDATE_REQ));
+
+       if (argw > SIMM_MAX || argw < SIMM_MIN || ((inst & INT_ALIGNED) && (argw & 0x3)) || (inst & UPDATE_REQ))
+               return 0;
+       if (inp_flags & ARG_TEST)
+               return 1;
+#endif
+
+#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
+       if (argw > SIMM_MAX || argw < SIMM_MIN)
+               return 0;
+       if (inp_flags & ARG_TEST)
+               return 1;
+
+       inst = data_transfer_insts[inp_flags & MEM_MASK];
+       SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
+#endif
+
+       FAIL_IF(push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | IMM(argw)));
+       return -1;
+}
+
+/* See getput_arg below.
+   Note: can_cache is called only for binary operators. Those operator always
+   uses word arguments without write back. */
+static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw)
+{
+       sljit_sw high_short, next_high_short;
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+       sljit_sw diff;
+#endif
+
+       SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM));
+
+       if (arg & OFFS_REG_MASK)
+               return ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && (argw & 0x3) == (next_argw & 0x3));
+
+       if (next_arg & OFFS_REG_MASK)
+               return 0;
+
+#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
+       high_short = (argw + ((argw & 0x8000) << 1)) & ~0xffff;
+       next_high_short = (next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff;
+       return high_short == next_high_short;
+#else
+       if (argw <= 0x7fffffffl && argw >= -0x80000000l) {
+               high_short = (argw + ((argw & 0x8000) << 1)) & ~0xffff;
+               next_high_short = (next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff;
+               if (high_short == next_high_short)
+                       return 1;
+       }
+
+       diff = argw - next_argw;
+       if (!(arg & REG_MASK))
+               return diff <= SIMM_MAX && diff >= SIMM_MIN;
+
+       if (arg == next_arg && diff <= SIMM_MAX && diff >= SIMM_MIN)
+               return 1;
+
+       return 0;
+#endif
+}
+
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+#define ADJUST_CACHED_IMM(imm) \
+       if ((inst & INT_ALIGNED) && (imm & 0x3)) { \
+               /* Adjust cached value. Fortunately this is really a rare case */ \
+               compiler->cache_argw += imm & 0x3; \
+               FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG3) | A(TMP_REG3) | (imm & 0x3))); \
+               imm &= ~0x3; \
+       }
+#endif
+
+/* Emit the necessary instructions. See can_cache above. */
+static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si inp_flags, sljit_si reg, sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw)
+{
+       sljit_si tmp_r;
+       sljit_ins inst;
+       sljit_sw high_short, next_high_short;
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+       sljit_sw diff;
+#endif
+
+       SLJIT_ASSERT(arg & SLJIT_MEM);
+
+       tmp_r = ((inp_flags & LOAD_DATA) && ((inp_flags) & MEM_MASK) <= GPR_REG) ? reg : TMP_REG1;
+       /* Special case for "mov reg, [reg, ... ]". */
+       if ((arg & REG_MASK) == tmp_r)
+               tmp_r = TMP_REG1;
+
+       if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
+               argw &= 0x3;
+               /* Otherwise getput_arg_fast would capture it. */
+               SLJIT_ASSERT(argw);
+
+               if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg && argw == compiler->cache_argw)
+                       tmp_r = TMP_REG3;
+               else {
+                       if ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && argw == (next_argw & 0x3)) {
+                               compiler->cache_arg = SLJIT_MEM | (arg & OFFS_REG_MASK);
+                               compiler->cache_argw = argw;
+                               tmp_r = TMP_REG3;
+                       }
+#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
+                       FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(arg)) | A(tmp_r) | (argw << 11) | ((31 - argw) << 1)));
+#else
+                       FAIL_IF(push_inst(compiler, RLDI(tmp_r, OFFS_REG(arg), argw, 63 - argw, 1)));
+#endif
+               }
+               inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
+               SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
+               return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(tmp_r));
+       }
+
+       if (SLJIT_UNLIKELY(!(arg & REG_MASK)))
+               inp_flags &= ~WRITE_BACK;
+
+       inst = data_transfer_insts[inp_flags & MEM_MASK];
+       SLJIT_ASSERT((arg & REG_MASK) || !(inst & UPDATE_REQ));
+
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+       if (argw <= 0x7fff7fffl && argw >= -0x80000000l
+                       && (!(inst & INT_ALIGNED) || !(argw & 0x3)) && !(inst & UPDATE_REQ)) {
+#endif
+
+               arg &= REG_MASK;
+               high_short = (sljit_si)(argw + ((argw & 0x8000) << 1)) & ~0xffff;
+               /* The getput_arg_fast should handle this otherwise. */
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+               SLJIT_ASSERT(high_short && high_short <= 0x7fffffffl && high_short >= -0x80000000l);
+#else
+               SLJIT_ASSERT(high_short && !(inst & (INT_ALIGNED | UPDATE_REQ)));
+#endif
+
+               if (inp_flags & WRITE_BACK) {
+                       if (arg == reg) {
+                               FAIL_IF(push_inst(compiler, OR | S(reg) | A(tmp_r) | B(reg)));
+                               reg = tmp_r;
+                       }
+                       tmp_r = arg;
+                       FAIL_IF(push_inst(compiler, ADDIS | D(arg) | A(arg) | IMM(high_short >> 16)));
+               }
+               else if (compiler->cache_arg != arg || high_short != compiler->cache_argw) {
+                       if ((next_arg & SLJIT_MEM) && !(next_arg & OFFS_REG_MASK)) {
+                               next_high_short = (sljit_si)(next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff;
+                               if (high_short == next_high_short) {
+                                       compiler->cache_arg = SLJIT_IMM | arg;
+                                       compiler->cache_argw = next_high_short;
+                                       tmp_r = TMP_REG3;
+                               }
+                       }
+                       FAIL_IF(push_inst(compiler, ADDIS | D(tmp_r) | A(arg & REG_MASK) | IMM(high_short >> 16)));
+               }
+               else
+                       tmp_r = TMP_REG3;
+
+               return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(tmp_r) | IMM(argw));
+
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+       }
+
+       /* Everything else is PPC-64 only. */
+       if (SLJIT_UNLIKELY(!(arg & REG_MASK))) {
+               diff = argw - compiler->cache_argw;
+               if ((compiler->cache_arg & SLJIT_IMM) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
+                       ADJUST_CACHED_IMM(diff);
+                       return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3) | IMM(diff));
+               }
+
+               diff = argw - next_argw;
+               if ((next_arg & SLJIT_MEM) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
+                       SLJIT_ASSERT(inp_flags & LOAD_DATA);
+
+                       compiler->cache_arg = SLJIT_IMM;
+                       compiler->cache_argw = argw;
+                       tmp_r = TMP_REG3;
+               }
+
+               FAIL_IF(load_immediate(compiler, tmp_r, argw));
+               return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(tmp_r));
+       }
+
+       diff = argw - compiler->cache_argw;
+       if (compiler->cache_arg == arg && diff <= SIMM_MAX && diff >= SIMM_MIN) {
+               SLJIT_ASSERT(!(inp_flags & WRITE_BACK) && !(inst & UPDATE_REQ));
+               ADJUST_CACHED_IMM(diff);
+               return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3) | IMM(diff));
+       }
+
+       if ((compiler->cache_arg & SLJIT_IMM) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
+               inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
+               SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
+               if (compiler->cache_argw != argw) {
+                       FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG3) | A(TMP_REG3) | IMM(diff)));
+                       compiler->cache_argw = argw;
+               }
+               return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(TMP_REG3));
+       }
+
+       if (argw == next_argw && (next_arg & SLJIT_MEM)) {
+               SLJIT_ASSERT(inp_flags & LOAD_DATA);
+               FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
+
+               compiler->cache_arg = SLJIT_IMM;
+               compiler->cache_argw = argw;
+
+               inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
+               SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
+               return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(TMP_REG3));
+       }
+
+       diff = argw - next_argw;
+       if (arg == next_arg && !(inp_flags & WRITE_BACK) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
+               SLJIT_ASSERT(inp_flags & LOAD_DATA);
+               FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
+               FAIL_IF(push_inst(compiler, ADD | D(TMP_REG3) | A(TMP_REG3) | B(arg & REG_MASK)));
+
+               compiler->cache_arg = arg;
+               compiler->cache_argw = argw;
+
+               return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3));
+       }
+
+       if ((next_arg & SLJIT_MEM) && !(next_arg & OFFS_REG_MASK) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
+               SLJIT_ASSERT(inp_flags & LOAD_DATA);
+               FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
+
+               compiler->cache_arg = SLJIT_IMM;
+               compiler->cache_argw = argw;
+               tmp_r = TMP_REG3;
+       }
+       else
+               FAIL_IF(load_immediate(compiler, tmp_r, argw));
+
+       /* Get the indexed version instead of the normal one. */
+       inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
+       SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
+       return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(tmp_r));
+#endif
+}
+
+static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg1, sljit_sw arg1w, sljit_si arg2, sljit_sw arg2w)
+{
+       if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
+               return compiler->error;
+       return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
+}
+
+static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si input_flags,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       /* arg1 goes to TMP_REG1 or src reg
+          arg2 goes to TMP_REG2, imm or src reg
+          TMP_REG3 can be used for caching
+          result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */
+       sljit_si dst_r;
+       sljit_si src1_r;
+       sljit_si src2_r;
+       sljit_si sugg_src2_r = TMP_REG2;
+       sljit_si flags = input_flags & (ALT_FORM1 | ALT_FORM2 | ALT_FORM3 | ALT_FORM4 | ALT_FORM5 | ALT_FORM6 | ALT_SIGN_EXT | ALT_SET_FLAGS);
+
+       if (!(input_flags & ALT_KEEP_CACHE)) {
+               compiler->cache_arg = 0;
+               compiler->cache_argw = 0;
+       }
+
+       /* Destination check. */
+       if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
+               if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI && !(src2 & SLJIT_MEM))
+                       return SLJIT_SUCCESS;
+               dst_r = TMP_REG2;
+       }
+       else if (FAST_IS_REG(dst)) {
+               dst_r = dst;
+               flags |= REG_DEST;
+               if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI)
+                       sugg_src2_r = dst_r;
+       }
+       else {
+               SLJIT_ASSERT(dst & SLJIT_MEM);
+               if (getput_arg_fast(compiler, input_flags | ARG_TEST, TMP_REG2, dst, dstw)) {
+                       flags |= FAST_DEST;
+                       dst_r = TMP_REG2;
+               }
+               else {
+                       flags |= SLOW_DEST;
+                       dst_r = 0;
+               }
+       }
+
+       /* Source 1. */
+       if (FAST_IS_REG(src1)) {
+               src1_r = src1;
+               flags |= REG1_SOURCE;
+       }
+       else if (src1 & SLJIT_IMM) {
+               FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
+               src1_r = TMP_REG1;
+       }
+       else if (getput_arg_fast(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w)) {
+               FAIL_IF(compiler->error);
+               src1_r = TMP_REG1;
+       }
+       else
+               src1_r = 0;
+
+       /* Source 2. */
+       if (FAST_IS_REG(src2)) {
+               src2_r = src2;
+               flags |= REG2_SOURCE;
+               if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_SI)
+                       dst_r = src2_r;
+       }
+       else if (src2 & SLJIT_IMM) {
+               FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w));
+               src2_r = sugg_src2_r;
+       }
+       else if (getput_arg_fast(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w)) {
+               FAIL_IF(compiler->error);
+               src2_r = sugg_src2_r;
+       }
+       else
+               src2_r = 0;
+
+       /* src1_r, src2_r and dst_r can be zero (=unprocessed).
+          All arguments are complex addressing modes, and it is a binary operator. */
+       if (src1_r == 0 && src2_r == 0 && dst_r == 0) {
+               if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
+                       FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w));
+                       FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
+               }
+               else {
+                       FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
+                       FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw));
+               }
+               src1_r = TMP_REG1;
+               src2_r = TMP_REG2;
+       }
+       else if (src1_r == 0 && src2_r == 0) {
+               FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
+               src1_r = TMP_REG1;
+       }
+       else if (src1_r == 0 && dst_r == 0) {
+               FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
+               src1_r = TMP_REG1;
+       }
+       else if (src2_r == 0 && dst_r == 0) {
+               FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw));
+               src2_r = sugg_src2_r;
+       }
+
+       if (dst_r == 0)
+               dst_r = TMP_REG2;
+
+       if (src1_r == 0) {
+               FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, 0, 0));
+               src1_r = TMP_REG1;
+       }
+
+       if (src2_r == 0) {
+               FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, 0, 0));
+               src2_r = sugg_src2_r;
+       }
+
+       FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));
+
+       if (flags & (FAST_DEST | SLOW_DEST)) {
+               if (flags & FAST_DEST)
+                       FAIL_IF(getput_arg_fast(compiler, input_flags, dst_r, dst, dstw));
+               else
+                       FAIL_IF(getput_arg(compiler, input_flags, dst_r, dst, dstw, 0, 0));
+       }
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op)
+{
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+       sljit_si int_op = op & SLJIT_INT_OP;
+#endif
+
+       CHECK_ERROR();
+       check_sljit_emit_op0(compiler, op);
+
+       op = GET_OPCODE(op);
+       switch (op) {
+       case SLJIT_BREAKPOINT:
+       case SLJIT_NOP:
+               return push_inst(compiler, NOP);
+       case SLJIT_UMUL:
+       case SLJIT_SMUL:
+               FAIL_IF(push_inst(compiler, OR | S(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG1)));
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+               FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2)));
+               return push_inst(compiler, (op == SLJIT_UMUL ? MULHDU : MULHD) | D(SLJIT_SCRATCH_REG2) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2));
+#else
+               FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2)));
+               return push_inst(compiler, (op == SLJIT_UMUL ? MULHWU : MULHW) | D(SLJIT_SCRATCH_REG2) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2));
+#endif
+       case SLJIT_UDIV:
+       case SLJIT_SDIV:
+               FAIL_IF(push_inst(compiler, OR | S(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG1)));
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+               if (int_op) {
+                       FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVWU : DIVW) | D(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2)));
+                       FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_SCRATCH_REG2) | A(SLJIT_SCRATCH_REG1) | B(SLJIT_SCRATCH_REG2)));
+               } else {
+                       FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVDU : DIVD) | D(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2)));
+                       FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_SCRATCH_REG2) | A(SLJIT_SCRATCH_REG1) | B(SLJIT_SCRATCH_REG2)));
+               }
+               return push_inst(compiler, SUBF | D(SLJIT_SCRATCH_REG2) | A(SLJIT_SCRATCH_REG2) | B(TMP_REG1));
+#else
+               FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVWU : DIVW) | D(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2)));
+               FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_SCRATCH_REG2) | A(SLJIT_SCRATCH_REG1) | B(SLJIT_SCRATCH_REG2)));
+               return push_inst(compiler, SUBF | D(SLJIT_SCRATCH_REG2) | A(SLJIT_SCRATCH_REG2) | B(TMP_REG1));
+#endif
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+#define EMIT_MOV(type, type_flags, type_cast) \
+       emit_op(compiler, (src & SLJIT_IMM) ? SLJIT_MOV : type, flags | (type_flags), dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? type_cast srcw : srcw)
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       sljit_si flags = GET_FLAGS(op) ? ALT_SET_FLAGS : 0;
+       sljit_si op_flags = GET_ALL_FLAGS(op);
+
+       CHECK_ERROR();
+       check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       op = GET_OPCODE(op);
+       if ((src & SLJIT_IMM) && srcw == 0)
+               src = TMP_ZERO;
+
+       if (op_flags & SLJIT_SET_O)
+               FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO)));
+
+       if (op_flags & SLJIT_INT_OP) {
+               if (op < SLJIT_NOT) {
+                       if (FAST_IS_REG(src) && src == dst) {
+                               if (!TYPE_CAST_NEEDED(op))
+                                       return SLJIT_SUCCESS;
+                       }
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+                       if (op == SLJIT_MOV_SI && (src & SLJIT_MEM))
+                               op = SLJIT_MOV_UI;
+                       if (op == SLJIT_MOVU_SI && (src & SLJIT_MEM))
+                               op = SLJIT_MOVU_UI;
+                       if (op == SLJIT_MOV_UI && (src & SLJIT_IMM))
+                               op = SLJIT_MOV_SI;
+                       if (op == SLJIT_MOVU_UI && (src & SLJIT_IMM))
+                               op = SLJIT_MOVU_SI;
+#endif
+               }
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+               else {
+                       /* Most operations expect sign extended arguments. */
+                       flags |= INT_DATA | SIGNED_DATA;
+                       if (src & SLJIT_IMM)
+                               srcw = (sljit_si)srcw;
+               }
+#endif
+       }
+
+       switch (op) {
+       case SLJIT_MOV:
+       case SLJIT_MOV_P:
+#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
+       case SLJIT_MOV_UI:
+       case SLJIT_MOV_SI:
+#endif
+               return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
+
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+       case SLJIT_MOV_UI:
+               return EMIT_MOV(SLJIT_MOV_UI, INT_DATA, (sljit_ui));
+
+       case SLJIT_MOV_SI:
+               return EMIT_MOV(SLJIT_MOV_SI, INT_DATA | SIGNED_DATA, (sljit_si));
+#endif
+
+       case SLJIT_MOV_UB:
+               return EMIT_MOV(SLJIT_MOV_UB, BYTE_DATA, (sljit_ub));
+
+       case SLJIT_MOV_SB:
+               return EMIT_MOV(SLJIT_MOV_SB, BYTE_DATA | SIGNED_DATA, (sljit_sb));
+
+       case SLJIT_MOV_UH:
+               return EMIT_MOV(SLJIT_MOV_UH, HALF_DATA, (sljit_uh));
+
+       case SLJIT_MOV_SH:
+               return EMIT_MOV(SLJIT_MOV_SH, HALF_DATA | SIGNED_DATA, (sljit_sh));
+
+       case SLJIT_MOVU:
+       case SLJIT_MOVU_P:
+#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
+       case SLJIT_MOVU_UI:
+       case SLJIT_MOVU_SI:
+#endif
+               return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
+
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+       case SLJIT_MOVU_UI:
+               return EMIT_MOV(SLJIT_MOV_UI, INT_DATA | WRITE_BACK, (sljit_ui));
+
+       case SLJIT_MOVU_SI:
+               return EMIT_MOV(SLJIT_MOV_SI, INT_DATA | SIGNED_DATA | WRITE_BACK, (sljit_si));
+#endif
+
+       case SLJIT_MOVU_UB:
+               return EMIT_MOV(SLJIT_MOV_UB, BYTE_DATA | WRITE_BACK, (sljit_ub));
+
+       case SLJIT_MOVU_SB:
+               return EMIT_MOV(SLJIT_MOV_SB, BYTE_DATA | SIGNED_DATA | WRITE_BACK, (sljit_sb));
+
+       case SLJIT_MOVU_UH:
+               return EMIT_MOV(SLJIT_MOV_UH, HALF_DATA | WRITE_BACK, (sljit_uh));
+
+       case SLJIT_MOVU_SH:
+               return EMIT_MOV(SLJIT_MOV_SH, HALF_DATA | SIGNED_DATA | WRITE_BACK, (sljit_sh));
+
+       case SLJIT_NOT:
+               return emit_op(compiler, SLJIT_NOT, flags, dst, dstw, TMP_REG1, 0, src, srcw);
+
+       case SLJIT_NEG:
+               return emit_op(compiler, SLJIT_NEG, flags, dst, dstw, TMP_REG1, 0, src, srcw);
+
+       case SLJIT_CLZ:
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+               return emit_op(compiler, SLJIT_CLZ, flags | (!(op_flags & SLJIT_INT_OP) ? 0 : ALT_FORM1), dst, dstw, TMP_REG1, 0, src, srcw);
+#else
+               return emit_op(compiler, SLJIT_CLZ, flags, dst, dstw, TMP_REG1, 0, src, srcw);
+#endif
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+#undef EMIT_MOV
+
+#define TEST_SL_IMM(src, srcw) \
+       (((src) & SLJIT_IMM) && (srcw) <= SIMM_MAX && (srcw) >= SIMM_MIN)
+
+#define TEST_UL_IMM(src, srcw) \
+       (((src) & SLJIT_IMM) && !((srcw) & ~0xffff))
+
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+#define TEST_SH_IMM(src, srcw) \
+       (((src) & SLJIT_IMM) && !((srcw) & 0xffff) && (srcw) <= 0x7fffffffl && (srcw) >= -0x80000000l)
+#else
+#define TEST_SH_IMM(src, srcw) \
+       (((src) & SLJIT_IMM) && !((srcw) & 0xffff))
+#endif
+
+#define TEST_UH_IMM(src, srcw) \
+       (((src) & SLJIT_IMM) && !((srcw) & ~0xffff0000))
+
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+#define TEST_ADD_IMM(src, srcw) \
+       (((src) & SLJIT_IMM) && (srcw) <= 0x7fff7fffl && (srcw) >= -0x80000000l)
+#else
+#define TEST_ADD_IMM(src, srcw) \
+       ((src) & SLJIT_IMM)
+#endif
+
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+#define TEST_UI_IMM(src, srcw) \
+       (((src) & SLJIT_IMM) && !((srcw) & ~0xffffffff))
+#else
+#define TEST_UI_IMM(src, srcw) \
+       ((src) & SLJIT_IMM)
+#endif
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       sljit_si flags = GET_FLAGS(op) ? ALT_SET_FLAGS : 0;
+
+       CHECK_ERROR();
+       check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+       ADJUST_LOCAL_OFFSET(src1, src1w);
+       ADJUST_LOCAL_OFFSET(src2, src2w);
+
+       if ((src1 & SLJIT_IMM) && src1w == 0)
+               src1 = TMP_ZERO;
+       if ((src2 & SLJIT_IMM) && src2w == 0)
+               src2 = TMP_ZERO;
+
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+       if (op & SLJIT_INT_OP) {
+               /* Most operations expect sign extended arguments. */
+               flags |= INT_DATA | SIGNED_DATA;
+               if (src1 & SLJIT_IMM)
+                       src1w = (sljit_si)(src1w);
+               if (src2 & SLJIT_IMM)
+                       src2w = (sljit_si)(src2w);
+               if (GET_FLAGS(op))
+                       flags |= ALT_SIGN_EXT;
+       }
+#endif
+       if (op & SLJIT_SET_O)
+               FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO)));
+       if (src2 == TMP_REG2)
+               flags |= ALT_KEEP_CACHE;
+
+       switch (GET_OPCODE(op)) {
+       case SLJIT_ADD:
+               if (!GET_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) {
+                       if (TEST_SL_IMM(src2, src2w)) {
+                               compiler->imm = src2w & 0xffff;
+                               return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
+                       }
+                       if (TEST_SL_IMM(src1, src1w)) {
+                               compiler->imm = src1w & 0xffff;
+                               return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
+                       }
+                       if (TEST_SH_IMM(src2, src2w)) {
+                               compiler->imm = (src2w >> 16) & 0xffff;
+                               return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
+                       }
+                       if (TEST_SH_IMM(src1, src1w)) {
+                               compiler->imm = (src1w >> 16) & 0xffff;
+                               return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
+                       }
+                       /* Range between -1 and -32768 is covered above. */
+                       if (TEST_ADD_IMM(src2, src2w)) {
+                               compiler->imm = src2w & 0xffffffff;
+                               return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0);
+                       }
+                       if (TEST_ADD_IMM(src1, src1w)) {
+                               compiler->imm = src1w & 0xffffffff;
+                               return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src2, src2w, TMP_REG2, 0);
+                       }
+               }
+               if (!(GET_FLAGS(op) & (SLJIT_SET_E | SLJIT_SET_O))) {
+                       if (TEST_SL_IMM(src2, src2w)) {
+                               compiler->imm = src2w & 0xffff;
+                               return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
+                       }
+                       if (TEST_SL_IMM(src1, src1w)) {
+                               compiler->imm = src1w & 0xffff;
+                               return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0);
+                       }
+               }
+               return emit_op(compiler, SLJIT_ADD, flags, dst, dstw, src1, src1w, src2, src2w);
+
+       case SLJIT_ADDC:
+               return emit_op(compiler, SLJIT_ADDC, flags | (!(op & SLJIT_KEEP_FLAGS) ? 0 : ALT_FORM1), dst, dstw, src1, src1w, src2, src2w);
+
+       case SLJIT_SUB:
+               if (!GET_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) {
+                       if (TEST_SL_IMM(src2, -src2w)) {
+                               compiler->imm = (-src2w) & 0xffff;
+                               return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
+                       }
+                       if (TEST_SL_IMM(src1, src1w)) {
+                               compiler->imm = src1w & 0xffff;
+                               return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
+                       }
+                       if (TEST_SH_IMM(src2, -src2w)) {
+                               compiler->imm = ((-src2w) >> 16) & 0xffff;
+                               return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
+                       }
+                       /* Range between -1 and -32768 is covered above. */
+                       if (TEST_ADD_IMM(src2, -src2w)) {
+                               compiler->imm = -src2w & 0xffffffff;
+                               return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0);
+                       }
+               }
+               if (dst == SLJIT_UNUSED && (op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S)) && !(op & (SLJIT_SET_O | SLJIT_SET_C))) {
+                       if (!(op & SLJIT_SET_U)) {
+                               /* We know ALT_SIGN_EXT is set if it is an SLJIT_INT_OP on 64 bit systems. */
+                               if (TEST_SL_IMM(src2, src2w)) {
+                                       compiler->imm = src2w & 0xffff;
+                                       return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
+                               }
+                               if (GET_FLAGS(op) == SLJIT_SET_E && TEST_SL_IMM(src1, src1w)) {
+                                       compiler->imm = src1w & 0xffff;
+                                       return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
+                               }
+                       }
+                       if (!(op & (SLJIT_SET_E | SLJIT_SET_S))) {
+                               /* We know ALT_SIGN_EXT is set if it is an SLJIT_INT_OP on 64 bit systems. */
+                               if (TEST_UL_IMM(src2, src2w)) {
+                                       compiler->imm = src2w & 0xffff;
+                                       return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
+                               }
+                               return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM4, dst, dstw, src1, src1w, src2, src2w);
+                       }
+                       if ((src2 & SLJIT_IMM) && src2w >= 0 && src2w <= 0x7fff) {
+                               compiler->imm = src2w;
+                               return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
+                       }
+                       return emit_op(compiler, SLJIT_SUB, flags | ((op & SLJIT_SET_U) ? ALT_FORM4 : 0) | ((op & (SLJIT_SET_E | SLJIT_SET_S)) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w);
+               }
+               if (!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O))) {
+                       if (TEST_SL_IMM(src2, -src2w)) {
+                               compiler->imm = (-src2w) & 0xffff;
+                               return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
+                       }
+               }
+               /* We know ALT_SIGN_EXT is set if it is an SLJIT_INT_OP on 64 bit systems. */
+               return emit_op(compiler, SLJIT_SUB, flags | (!(op & SLJIT_SET_U) ? 0 : ALT_FORM6), dst, dstw, src1, src1w, src2, src2w);
+
+       case SLJIT_SUBC:
+               return emit_op(compiler, SLJIT_SUBC, flags | (!(op & SLJIT_KEEP_FLAGS) ? 0 : ALT_FORM1), dst, dstw, src1, src1w, src2, src2w);
+
+       case SLJIT_MUL:
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+               if (op & SLJIT_INT_OP)
+                       flags |= ALT_FORM2;
+#endif
+               if (!GET_FLAGS(op)) {
+                       if (TEST_SL_IMM(src2, src2w)) {
+                               compiler->imm = src2w & 0xffff;
+                               return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
+                       }
+                       if (TEST_SL_IMM(src1, src1w)) {
+                               compiler->imm = src1w & 0xffff;
+                               return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
+                       }
+               }
+               return emit_op(compiler, SLJIT_MUL, flags, dst, dstw, src1, src1w, src2, src2w);
+
+       case SLJIT_AND:
+       case SLJIT_OR:
+       case SLJIT_XOR:
+               /* Commutative unsigned operations. */
+               if (!GET_FLAGS(op) || GET_OPCODE(op) == SLJIT_AND) {
+                       if (TEST_UL_IMM(src2, src2w)) {
+                               compiler->imm = src2w;
+                               return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
+                       }
+                       if (TEST_UL_IMM(src1, src1w)) {
+                               compiler->imm = src1w;
+                               return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
+                       }
+                       if (TEST_UH_IMM(src2, src2w)) {
+                               compiler->imm = (src2w >> 16) & 0xffff;
+                               return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
+                       }
+                       if (TEST_UH_IMM(src1, src1w)) {
+                               compiler->imm = (src1w >> 16) & 0xffff;
+                               return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
+                       }
+               }
+               if (!GET_FLAGS(op) && GET_OPCODE(op) != SLJIT_AND) {
+                       if (TEST_UI_IMM(src2, src2w)) {
+                               compiler->imm = src2w;
+                               return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
+                       }
+                       if (TEST_UI_IMM(src1, src1w)) {
+                               compiler->imm = src1w;
+                               return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0);
+                       }
+               }
+               return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w);
+
+       case SLJIT_ASHR:
+               if (op & SLJIT_KEEP_FLAGS)
+                       flags |= ALT_FORM3;
+               /* Fall through. */
+       case SLJIT_SHL:
+       case SLJIT_LSHR:
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+               if (op & SLJIT_INT_OP)
+                       flags |= ALT_FORM2;
+#endif
+               if (src2 & SLJIT_IMM) {
+                       compiler->imm = src2w;
+                       return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
+               }
+               return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w);
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
+{
+       check_sljit_get_register_index(reg);
+       return reg_map[reg];
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg)
+{
+       check_sljit_get_float_register_index(reg);
+       return reg;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
+       void *instruction, sljit_si size)
+{
+       CHECK_ERROR();
+       check_sljit_emit_op_custom(compiler, instruction, size);
+       SLJIT_ASSERT(size == 4);
+
+       return push_inst(compiler, *(sljit_ins*)instruction);
+}
+
+/* --------------------------------------------------------------------- */
+/*  Floating point operators                                             */
+/* --------------------------------------------------------------------- */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
+{
+#ifdef SLJIT_IS_FPU_AVAILABLE
+       return SLJIT_IS_FPU_AVAILABLE;
+#else
+       /* Available by default. */
+       return 1;
+#endif
+}
+
+#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_SINGLE_OP) >> 6))
+#define SELECT_FOP(op, single, double) ((op & SLJIT_SINGLE_OP) ? single : double)
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       sljit_si dst_fr;
+
+       CHECK_ERROR();
+       check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
+       SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100) && !(DOUBLE_DATA & 0x4), float_transfer_bit_error);
+
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+
+       if (GET_OPCODE(op) == SLJIT_CMPD) {
+               if (dst & SLJIT_MEM) {
+                       FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, dst, dstw, src, srcw));
+                       dst = TMP_FREG1;
+               }
+
+               if (src & SLJIT_MEM) {
+                       FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src, srcw, 0, 0));
+                       src = TMP_FREG2;
+               }
+
+               return push_inst(compiler, FCMPU | CRD(4) | FA(dst) | FB(src));
+       }
+
+       dst_fr = FAST_IS_REG(dst) ? dst : TMP_FREG1;
+
+       if (src & SLJIT_MEM) {
+               FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_fr, src, srcw, dst, dstw));
+               src = dst_fr;
+       }
+
+       switch (GET_OPCODE(op)) {
+               case SLJIT_MOVD:
+                       if (src != dst_fr && dst_fr != TMP_FREG1)
+                               FAIL_IF(push_inst(compiler, FMR | FD(dst_fr) | FB(src)));
+                       break;
+               case SLJIT_NEGD:
+                       FAIL_IF(push_inst(compiler, FNEG | FD(dst_fr) | FB(src)));
+                       break;
+               case SLJIT_ABSD:
+                       FAIL_IF(push_inst(compiler, FABS | FD(dst_fr) | FB(src)));
+                       break;
+       }
+
+       if (dst_fr == TMP_FREG1) {
+               if (GET_OPCODE(op) == SLJIT_MOVD)
+                       dst_fr = src;
+               FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), dst_fr, dst, dstw, 0, 0));
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       sljit_si dst_fr, flags = 0;
+
+       CHECK_ERROR();
+       check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
+
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+
+       dst_fr = FAST_IS_REG(dst) ? dst : TMP_FREG2;
+
+       if (src1 & SLJIT_MEM) {
+               if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) {
+                       FAIL_IF(compiler->error);
+                       src1 = TMP_FREG1;
+               } else
+                       flags |= ALT_FORM1;
+       }
+
+       if (src2 & SLJIT_MEM) {
+               if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) {
+                       FAIL_IF(compiler->error);
+                       src2 = TMP_FREG2;
+               } else
+                       flags |= ALT_FORM2;
+       }
+
+       if ((flags & (ALT_FORM1 | ALT_FORM2)) == (ALT_FORM1 | ALT_FORM2)) {
+               if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
+                       FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w));
+                       FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
+               }
+               else {
+                       FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
+                       FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
+               }
+       }
+       else if (flags & ALT_FORM1)
+               FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
+       else if (flags & ALT_FORM2)
+               FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
+
+       if (flags & ALT_FORM1)
+               src1 = TMP_FREG1;
+       if (flags & ALT_FORM2)
+               src2 = TMP_FREG2;
+
+       switch (GET_OPCODE(op)) {
+       case SLJIT_ADDD:
+               FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADD) | FD(dst_fr) | FA(src1) | FB(src2)));
+               break;
+
+       case SLJIT_SUBD:
+               FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUB) | FD(dst_fr) | FA(src1) | FB(src2)));
+               break;
+
+       case SLJIT_MULD:
+               FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMUL) | FD(dst_fr) | FA(src1) | FC(src2) /* FMUL use FC as src2 */));
+               break;
+
+       case SLJIT_DIVD:
+               FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIV) | FD(dst_fr) | FA(src1) | FB(src2)));
+               break;
+       }
+
+       if (dst_fr == TMP_FREG2)
+               FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0));
+
+       return SLJIT_SUCCESS;
+}
+
+#undef FLOAT_DATA
+#undef SELECT_FOP
+
+/* --------------------------------------------------------------------- */
+/*  Other instructions                                                   */
+/* --------------------------------------------------------------------- */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw)
+{
+       CHECK_ERROR();
+       check_sljit_emit_fast_enter(compiler, dst, dstw);
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+
+       /* For UNUSED dst. Uncommon, but possible. */
+       if (dst == SLJIT_UNUSED)
+               return SLJIT_SUCCESS;
+
+       if (FAST_IS_REG(dst))
+               return push_inst(compiler, MFLR | D(dst));
+
+       /* Memory. */
+       FAIL_IF(push_inst(compiler, MFLR | D(TMP_REG2)));
+       return emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw)
+{
+       CHECK_ERROR();
+       check_sljit_emit_fast_return(compiler, src, srcw);
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       if (FAST_IS_REG(src))
+               FAIL_IF(push_inst(compiler, MTLR | S(src)));
+       else {
+               if (src & SLJIT_MEM)
+                       FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw));
+               else if (src & SLJIT_IMM)
+                       FAIL_IF(load_immediate(compiler, TMP_REG2, srcw));
+               FAIL_IF(push_inst(compiler, MTLR | S(TMP_REG2)));
+       }
+       return push_inst(compiler, BLR);
+}
+
+/* --------------------------------------------------------------------- */
+/*  Conditional instructions                                             */
+/* --------------------------------------------------------------------- */
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
+{
+       struct sljit_label *label;
+
+       CHECK_ERROR_PTR();
+       check_sljit_emit_label(compiler);
+
+       if (compiler->last_label && compiler->last_label->size == compiler->size)
+               return compiler->last_label;
+
+       label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
+       PTR_FAIL_IF(!label);
+       set_label(label, compiler);
+       return label;
+}
+
+static sljit_ins get_bo_bi_flags(sljit_si type)
+{
+       switch (type) {
+       case SLJIT_C_EQUAL:
+               return (12 << 21) | (2 << 16);
+
+       case SLJIT_C_NOT_EQUAL:
+               return (4 << 21) | (2 << 16);
+
+       case SLJIT_C_LESS:
+       case SLJIT_C_FLOAT_LESS:
+               return (12 << 21) | ((4 + 0) << 16);
+
+       case SLJIT_C_GREATER_EQUAL:
+       case SLJIT_C_FLOAT_GREATER_EQUAL:
+               return (4 << 21) | ((4 + 0) << 16);
+
+       case SLJIT_C_GREATER:
+       case SLJIT_C_FLOAT_GREATER:
+               return (12 << 21) | ((4 + 1) << 16);
+
+       case SLJIT_C_LESS_EQUAL:
+       case SLJIT_C_FLOAT_LESS_EQUAL:
+               return (4 << 21) | ((4 + 1) << 16);
+
+       case SLJIT_C_SIG_LESS:
+               return (12 << 21) | (0 << 16);
+
+       case SLJIT_C_SIG_GREATER_EQUAL:
+               return (4 << 21) | (0 << 16);
+
+       case SLJIT_C_SIG_GREATER:
+               return (12 << 21) | (1 << 16);
+
+       case SLJIT_C_SIG_LESS_EQUAL:
+               return (4 << 21) | (1 << 16);
+
+       case SLJIT_C_OVERFLOW:
+       case SLJIT_C_MUL_OVERFLOW:
+               return (12 << 21) | (3 << 16);
+
+       case SLJIT_C_NOT_OVERFLOW:
+       case SLJIT_C_MUL_NOT_OVERFLOW:
+               return (4 << 21) | (3 << 16);
+
+       case SLJIT_C_FLOAT_EQUAL:
+               return (12 << 21) | ((4 + 2) << 16);
+
+       case SLJIT_C_FLOAT_NOT_EQUAL:
+               return (4 << 21) | ((4 + 2) << 16);
+
+       case SLJIT_C_FLOAT_UNORDERED:
+               return (12 << 21) | ((4 + 3) << 16);
+
+       case SLJIT_C_FLOAT_ORDERED:
+               return (4 << 21) | ((4 + 3) << 16);
+
+       default:
+               SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL3);
+               return (20 << 21);
+       }
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type)
+{
+       struct sljit_jump *jump;
+       sljit_ins bo_bi_flags;
+
+       CHECK_ERROR_PTR();
+       check_sljit_emit_jump(compiler, type);
+
+       bo_bi_flags = get_bo_bi_flags(type & 0xff);
+       if (!bo_bi_flags)
+               return NULL;
+
+       jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
+       PTR_FAIL_IF(!jump);
+       set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
+       type &= 0xff;
+
+       /* In PPC, we don't need to touch the arguments. */
+       if (type < SLJIT_JUMP)
+               jump->flags |= IS_COND;
+#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
+       if (type >= SLJIT_CALL0)
+               jump->flags |= IS_CALL;
+#endif
+
+       PTR_FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0));
+       PTR_FAIL_IF(push_inst(compiler, MTCTR | S(TMP_CALL_REG)));
+       jump->addr = compiler->size;
+       PTR_FAIL_IF(push_inst(compiler, BCCTR | bo_bi_flags | (type >= SLJIT_FAST_CALL ? 1 : 0)));
+       return jump;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw)
+{
+       struct sljit_jump *jump = NULL;
+       sljit_si src_r;
+
+       CHECK_ERROR();
+       check_sljit_emit_ijump(compiler, type, src, srcw);
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       if (FAST_IS_REG(src)) {
+#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
+               if (type >= SLJIT_CALL0) {
+                       FAIL_IF(push_inst(compiler, OR | S(src) | A(TMP_CALL_REG) | B(src)));
+                       src_r = TMP_CALL_REG;
+               }
+               else
+                       src_r = src;
+#else
+               src_r = src;
+#endif
+       } else if (src & SLJIT_IMM) {
+               jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
+               FAIL_IF(!jump);
+               set_jump(jump, compiler, JUMP_ADDR);
+               jump->u.target = srcw;
+#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
+               if (type >= SLJIT_CALL0)
+                       jump->flags |= IS_CALL;
+#endif
+               FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0));
+               src_r = TMP_CALL_REG;
+       }
+       else {
+               FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_CALL_REG, 0, TMP_REG1, 0, src, srcw));
+               src_r = TMP_CALL_REG;
+       }
+
+       FAIL_IF(push_inst(compiler, MTCTR | S(src_r)));
+       if (jump)
+               jump->addr = compiler->size;
+       return push_inst(compiler, BCCTR | (20 << 21) | (type >= SLJIT_FAST_CALL ? 1 : 0));
+}
+
+/* Get a bit from CR, all other bits are zeroed. */
+#define GET_CR_BIT(bit, dst) \
+       FAIL_IF(push_inst(compiler, MFCR | D(dst))); \
+       FAIL_IF(push_inst(compiler, RLWINM | S(dst) | A(dst) | ((1 + (bit)) << 11) | (31 << 6) | (31 << 1)));
+
+#define INVERT_BIT(dst) \
+       FAIL_IF(push_inst(compiler, XORI | S(dst) | A(dst) | 0x1));
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw,
+       sljit_si type)
+{
+       sljit_si reg, input_flags;
+       sljit_si flags = GET_ALL_FLAGS(op);
+       sljit_sw original_dstw = dstw;
+
+       CHECK_ERROR();
+       check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type);
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+
+       if (dst == SLJIT_UNUSED)
+               return SLJIT_SUCCESS;
+
+       op = GET_OPCODE(op);
+       reg = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2;
+
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+       if (op >= SLJIT_ADD && (src & SLJIT_MEM)) {
+               ADJUST_LOCAL_OFFSET(src, srcw);
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+               input_flags = (flags & SLJIT_INT_OP) ? INT_DATA : WORD_DATA;
+#else
+               input_flags = WORD_DATA;
+#endif
+               FAIL_IF(emit_op_mem2(compiler, input_flags | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
+               src = TMP_REG1;
+               srcw = 0;
+       }
+
+       switch (type) {
+       case SLJIT_C_EQUAL:
+               GET_CR_BIT(2, reg);
+               break;
+
+       case SLJIT_C_NOT_EQUAL:
+               GET_CR_BIT(2, reg);
+               INVERT_BIT(reg);
+               break;
+
+       case SLJIT_C_LESS:
+       case SLJIT_C_FLOAT_LESS:
+               GET_CR_BIT(4 + 0, reg);
+               break;
+
+       case SLJIT_C_GREATER_EQUAL:
+       case SLJIT_C_FLOAT_GREATER_EQUAL:
+               GET_CR_BIT(4 + 0, reg);
+               INVERT_BIT(reg);
+               break;
+
+       case SLJIT_C_GREATER:
+       case SLJIT_C_FLOAT_GREATER:
+               GET_CR_BIT(4 + 1, reg);
+               break;
+
+       case SLJIT_C_LESS_EQUAL:
+       case SLJIT_C_FLOAT_LESS_EQUAL:
+               GET_CR_BIT(4 + 1, reg);
+               INVERT_BIT(reg);
+               break;
+
+       case SLJIT_C_SIG_LESS:
+               GET_CR_BIT(0, reg);
+               break;
+
+       case SLJIT_C_SIG_GREATER_EQUAL:
+               GET_CR_BIT(0, reg);
+               INVERT_BIT(reg);
+               break;
+
+       case SLJIT_C_SIG_GREATER:
+               GET_CR_BIT(1, reg);
+               break;
+
+       case SLJIT_C_SIG_LESS_EQUAL:
+               GET_CR_BIT(1, reg);
+               INVERT_BIT(reg);
+               break;
+
+       case SLJIT_C_OVERFLOW:
+       case SLJIT_C_MUL_OVERFLOW:
+               GET_CR_BIT(3, reg);
+               break;
+
+       case SLJIT_C_NOT_OVERFLOW:
+       case SLJIT_C_MUL_NOT_OVERFLOW:
+               GET_CR_BIT(3, reg);
+               INVERT_BIT(reg);
+               break;
+
+       case SLJIT_C_FLOAT_EQUAL:
+               GET_CR_BIT(4 + 2, reg);
+               break;
+
+       case SLJIT_C_FLOAT_NOT_EQUAL:
+               GET_CR_BIT(4 + 2, reg);
+               INVERT_BIT(reg);
+               break;
+
+       case SLJIT_C_FLOAT_UNORDERED:
+               GET_CR_BIT(4 + 3, reg);
+               break;
+
+       case SLJIT_C_FLOAT_ORDERED:
+               GET_CR_BIT(4 + 3, reg);
+               INVERT_BIT(reg);
+               break;
+
+       default:
+               SLJIT_ASSERT_STOP();
+               break;
+       }
+
+       if (op < SLJIT_ADD) {
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+               if (op == SLJIT_MOV)
+                       input_flags = WORD_DATA;
+               else {
+                       op = SLJIT_MOV_UI;
+                       input_flags = INT_DATA;
+               }
+#else
+               op = SLJIT_MOV;
+               input_flags = WORD_DATA;
+#endif
+               if (reg != TMP_REG2)
+                       return SLJIT_SUCCESS;
+               return emit_op(compiler, op, input_flags, dst, dstw, TMP_REG1, 0, TMP_REG2, 0);
+       }
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       compiler->skip_checks = 1;
+#endif
+       return sljit_emit_op2(compiler, op | flags, dst, original_dstw, src, srcw, TMP_REG2, 0);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value)
+{
+       struct sljit_const *const_;
+       sljit_si reg;
+
+       CHECK_ERROR_PTR();
+       check_sljit_emit_const(compiler, dst, dstw, init_value);
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+
+       const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
+       PTR_FAIL_IF(!const_);
+       set_const(const_, compiler);
+
+       reg = SLOW_IS_REG(dst) ? dst : TMP_REG2;
+
+       PTR_FAIL_IF(emit_const(compiler, reg, init_value));
+
+       if (dst & SLJIT_MEM)
+               PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0));
+       return const_;
+}
diff --git a/ext/pcre/pcrelib/sljit/sljitNativeSPARC_32.c b/ext/pcre/pcrelib/sljit/sljitNativeSPARC_32.c
new file mode 100644 (file)
index 0000000..80479bf
--- /dev/null
@@ -0,0 +1,164 @@
+/*
+ *    Stack-less Just-In-Time compiler
+ *
+ *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice, this list of
+ *      conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *      of conditions and the following disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si dst, sljit_sw imm)
+{
+       if (imm <= SIMM_MAX && imm >= SIMM_MIN)
+               return push_inst(compiler, OR | D(dst) | S1(0) | IMM(imm), DR(dst));
+
+       FAIL_IF(push_inst(compiler, SETHI | D(dst) | ((imm >> 10) & 0x3fffff), DR(dst)));
+       return (imm & 0x3ff) ? push_inst(compiler, OR | D(dst) | S1(dst) | IMM_ARG | (imm & 0x3ff), DR(dst)) : SLJIT_SUCCESS;
+}
+
+#define ARG2(flags, src2) ((flags & SRC2_IMM) ? IMM(src2) : S2(src2))
+
+static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags,
+       sljit_si dst, sljit_si src1, sljit_sw src2)
+{
+       SLJIT_COMPILE_ASSERT(ICC_IS_SET == SET_FLAGS, icc_is_set_and_set_flags_must_be_the_same);
+
+       switch (op) {
+       case SLJIT_MOV:
+       case SLJIT_MOV_UI:
+       case SLJIT_MOV_SI:
+       case SLJIT_MOV_P:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+               if (dst != src2)
+                       return push_inst(compiler, OR | D(dst) | S1(0) | S2(src2), DR(dst));
+               return SLJIT_SUCCESS;
+
+       case SLJIT_MOV_UB:
+       case SLJIT_MOV_SB:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+               if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
+                       if (op == SLJIT_MOV_UB)
+                               return push_inst(compiler, AND | D(dst) | S1(src2) | IMM(0xff), DR(dst));
+                       FAIL_IF(push_inst(compiler, SLL | D(dst) | S1(src2) | IMM(24), DR(dst)));
+                       return push_inst(compiler, SRA | D(dst) | S1(dst) | IMM(24), DR(dst));
+               }
+               else if (dst != src2)
+                       SLJIT_ASSERT_STOP();
+               return SLJIT_SUCCESS;
+
+       case SLJIT_MOV_UH:
+       case SLJIT_MOV_SH:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+               if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
+                       FAIL_IF(push_inst(compiler, SLL | D(dst) | S1(src2) | IMM(16), DR(dst)));
+                       return push_inst(compiler, (op == SLJIT_MOV_SH ? SRA : SRL) | D(dst) | S1(dst) | IMM(16), DR(dst));
+               }
+               else if (dst != src2)
+                       SLJIT_ASSERT_STOP();
+               return SLJIT_SUCCESS;
+
+       case SLJIT_NOT:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+               return push_inst(compiler, XNOR | (flags & SET_FLAGS) | D(dst) | S1(0) | S2(src2), DR(dst) | (flags & SET_FLAGS));
+
+       case SLJIT_CLZ:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+               /* sparc 32 does not support SLJIT_KEEP_FLAGS. Not sure I can fix this. */
+               FAIL_IF(push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(src2) | S2(0), SET_FLAGS));
+               FAIL_IF(push_inst(compiler, OR | D(TMP_REG1) | S1(0) | S2(src2), DR(TMP_REG1)));
+               FAIL_IF(push_inst(compiler, BICC | DA(0x1) | (7 & DISP_MASK), UNMOVABLE_INS));
+               FAIL_IF(push_inst(compiler, OR | (flags & SET_FLAGS) | D(dst) | S1(0) | IMM(32), UNMOVABLE_INS | (flags & SET_FLAGS)));
+               FAIL_IF(push_inst(compiler, OR | D(dst) | S1(0) | IMM(-1), DR(dst)));
+
+               /* Loop. */
+               FAIL_IF(push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(TMP_REG1) | S2(0), SET_FLAGS));
+               FAIL_IF(push_inst(compiler, SLL | D(TMP_REG1) | S1(TMP_REG1) | IMM(1), DR(TMP_REG1)));
+               FAIL_IF(push_inst(compiler, BICC | DA(0xe) | (-2 & DISP_MASK), UNMOVABLE_INS));
+               return push_inst(compiler, ADD | (flags & SET_FLAGS) | D(dst) | S1(dst) | IMM(1), UNMOVABLE_INS | (flags & SET_FLAGS));
+
+       case SLJIT_ADD:
+               return push_inst(compiler, ADD | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS));
+
+       case SLJIT_ADDC:
+               return push_inst(compiler, ADDC | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS));
+
+       case SLJIT_SUB:
+               return push_inst(compiler, SUB | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS));
+
+       case SLJIT_SUBC:
+               return push_inst(compiler, SUBC | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS));
+
+       case SLJIT_MUL:
+               FAIL_IF(push_inst(compiler, SMUL | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst)));
+               if (!(flags & SET_FLAGS))
+                       return SLJIT_SUCCESS;
+               FAIL_IF(push_inst(compiler, SRA | D(TMP_REG1) | S1(dst) | IMM(31), DR(TMP_REG1)));
+               FAIL_IF(push_inst(compiler, RDY | D(TMP_REG4), DR(TMP_REG4)));
+               return push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(TMP_REG1) | S2(TMP_REG4), MOVABLE_INS | SET_FLAGS);
+
+       case SLJIT_AND:
+               return push_inst(compiler, AND | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS));
+
+       case SLJIT_OR:
+               return push_inst(compiler, OR | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS));
+
+       case SLJIT_XOR:
+               return push_inst(compiler, XOR | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS));
+
+       case SLJIT_SHL:
+               FAIL_IF(push_inst(compiler, SLL | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst)));
+               return !(flags & SET_FLAGS) ? SLJIT_SUCCESS : push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(dst) | S2(0), SET_FLAGS);
+
+       case SLJIT_LSHR:
+               FAIL_IF(push_inst(compiler, SRL | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst)));
+               return !(flags & SET_FLAGS) ? SLJIT_SUCCESS : push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(dst) | S2(0), SET_FLAGS);
+
+       case SLJIT_ASHR:
+               FAIL_IF(push_inst(compiler, SRA | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst)));
+               return !(flags & SET_FLAGS) ? SLJIT_SUCCESS : push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(dst) | S2(0), SET_FLAGS);
+       }
+
+       SLJIT_ASSERT_STOP();
+       return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_si emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw init_value)
+{
+       FAIL_IF(push_inst(compiler, SETHI | D(dst) | ((init_value >> 10) & 0x3fffff), DR(dst)));
+       return push_inst(compiler, OR | D(dst) | S1(dst) | IMM_ARG | (init_value & 0x3ff), DR(dst));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
+{
+       sljit_ins *inst = (sljit_ins*)addr;
+
+       inst[0] = (inst[0] & 0xffc00000) | ((new_addr >> 10) & 0x3fffff);
+       inst[1] = (inst[1] & 0xfffffc00) | (new_addr & 0x3ff);
+       SLJIT_CACHE_FLUSH(inst, inst + 2);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
+{
+       sljit_ins *inst = (sljit_ins*)addr;
+
+       inst[0] = (inst[0] & 0xffc00000) | ((new_constant >> 10) & 0x3fffff);
+       inst[1] = (inst[1] & 0xfffffc00) | (new_constant & 0x3ff);
+       SLJIT_CACHE_FLUSH(inst, inst + 2);
+}
diff --git a/ext/pcre/pcrelib/sljit/sljitNativeSPARC_common.c b/ext/pcre/pcrelib/sljit/sljitNativeSPARC_common.c
new file mode 100644 (file)
index 0000000..d6a1e12
--- /dev/null
@@ -0,0 +1,1383 @@
+/*
+ *    Stack-less Just-In-Time compiler
+ *
+ *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice, this list of
+ *      conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *      of conditions and the following disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
+{
+       return "SPARC" SLJIT_CPUINFO;
+}
+
+/* Length of an instruction word
+   Both for sparc-32 and sparc-64 */
+typedef sljit_ui sljit_ins;
+
+static void sparc_cache_flush(sljit_ins *from, sljit_ins *to)
+{
+#if defined(__SUNPRO_C) && __SUNPRO_C < 0x590
+       __asm (
+               /* if (from == to) return */
+               "cmp %i0, %i1\n"
+               "be .leave\n"
+               "nop\n"
+
+               /* loop until from >= to */
+               ".mainloop:\n"
+               "flush %i0\n"
+               "add %i0, 8, %i0\n"
+               "cmp %i0, %i1\n"
+               "bcs .mainloop\n"
+               "nop\n"
+
+               /* The comparison was done above. */
+               "bne .leave\n"
+               /* nop is not necessary here, since the
+                  sub operation has no side effect. */
+               "sub %i0, 4, %i0\n"
+               "flush %i0\n"
+               ".leave:"
+       );
+#else
+       if (SLJIT_UNLIKELY(from == to))
+               return;
+
+       do {
+               __asm__ volatile (
+                       "flush %0\n"
+                       : : "r"(from)
+               );
+               /* Operates at least on doubleword. */
+               from += 2;
+       } while (from < to);
+
+       if (from == to) {
+               /* Flush the last word. */
+               from --;
+               __asm__ volatile (
+                       "flush %0\n"
+                       : : "r"(from)
+               );
+       }
+#endif
+}
+
+/* TMP_REG2 is not used by getput_arg */
+#define TMP_REG1       (SLJIT_NO_REGISTERS + 1)
+#define TMP_REG2       (SLJIT_NO_REGISTERS + 2)
+#define TMP_REG3       (SLJIT_NO_REGISTERS + 3)
+#define TMP_REG4       (SLJIT_NO_REGISTERS + 4)
+#define TMP_LINK       (SLJIT_NO_REGISTERS + 5)
+
+#define TMP_FREG1      (0)
+#define TMP_FREG2      ((SLJIT_FLOAT_REG6 + 1) << 1)
+
+static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 7] = {
+       0, 8, 9, 10, 11, 12, 16, 17, 18, 19, 20, 14, 1, 24, 25, 26, 15
+};
+
+/* --------------------------------------------------------------------- */
+/*  Instrucion forms                                                     */
+/* --------------------------------------------------------------------- */
+
+#define D(d)           (reg_map[d] << 25)
+#define DA(d)          ((d) << 25)
+#define S1(s1)         (reg_map[s1] << 14)
+#define S2(s2)         (reg_map[s2])
+#define S1A(s1)                ((s1) << 14)
+#define S2A(s2)                (s2)
+#define IMM_ARG                0x2000
+#define DOP(op)                ((op) << 5)
+#define IMM(imm)       (((imm) & 0x1fff) | IMM_ARG)
+
+#define DR(dr)         (reg_map[dr])
+#define OPC1(opcode)   ((opcode) << 30)
+#define OPC2(opcode)   ((opcode) << 22)
+#define OPC3(opcode)   ((opcode) << 19)
+#define SET_FLAGS      OPC3(0x10)
+
+#define ADD            (OPC1(0x2) | OPC3(0x00))
+#define ADDC           (OPC1(0x2) | OPC3(0x08))
+#define AND            (OPC1(0x2) | OPC3(0x01))
+#define ANDN           (OPC1(0x2) | OPC3(0x05))
+#define CALL           (OPC1(0x1))
+#define FABSS          (OPC1(0x2) | OPC3(0x34) | DOP(0x09))
+#define FADDD          (OPC1(0x2) | OPC3(0x34) | DOP(0x42))
+#define FADDS          (OPC1(0x2) | OPC3(0x34) | DOP(0x41))
+#define FCMPD          (OPC1(0x2) | OPC3(0x35) | DOP(0x52))
+#define FCMPS          (OPC1(0x2) | OPC3(0x35) | DOP(0x51))
+#define FDIVD          (OPC1(0x2) | OPC3(0x34) | DOP(0x4e))
+#define FDIVS          (OPC1(0x2) | OPC3(0x34) | DOP(0x4d))
+#define FMOVS          (OPC1(0x2) | OPC3(0x34) | DOP(0x01))
+#define FMULD          (OPC1(0x2) | OPC3(0x34) | DOP(0x4a))
+#define FMULS          (OPC1(0x2) | OPC3(0x34) | DOP(0x49))
+#define FNEGS          (OPC1(0x2) | OPC3(0x34) | DOP(0x05))
+#define FSUBD          (OPC1(0x2) | OPC3(0x34) | DOP(0x46))
+#define FSUBS          (OPC1(0x2) | OPC3(0x34) | DOP(0x45))
+#define JMPL           (OPC1(0x2) | OPC3(0x38))
+#define NOP            (OPC1(0x0) | OPC2(0x04))
+#define OR             (OPC1(0x2) | OPC3(0x02))
+#define ORN            (OPC1(0x2) | OPC3(0x06))
+#define RDY            (OPC1(0x2) | OPC3(0x28) | S1A(0))
+#define RESTORE                (OPC1(0x2) | OPC3(0x3d))
+#define SAVE           (OPC1(0x2) | OPC3(0x3c))
+#define SETHI          (OPC1(0x0) | OPC2(0x04))
+#define SLL            (OPC1(0x2) | OPC3(0x25))
+#define SLLX           (OPC1(0x2) | OPC3(0x25) | (1 << 12))
+#define SRA            (OPC1(0x2) | OPC3(0x27))
+#define SRAX           (OPC1(0x2) | OPC3(0x27) | (1 << 12))
+#define SRL            (OPC1(0x2) | OPC3(0x26))
+#define SRLX           (OPC1(0x2) | OPC3(0x26) | (1 << 12))
+#define SUB            (OPC1(0x2) | OPC3(0x04))
+#define SUBC           (OPC1(0x2) | OPC3(0x0c))
+#define TA             (OPC1(0x2) | OPC3(0x3a) | (8 << 25))
+#define WRY            (OPC1(0x2) | OPC3(0x30) | DA(0))
+#define XOR            (OPC1(0x2) | OPC3(0x03))
+#define XNOR           (OPC1(0x2) | OPC3(0x07))
+
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+#define MAX_DISP       (0x1fffff)
+#define MIN_DISP       (-0x200000)
+#define DISP_MASK      (0x3fffff)
+
+#define BICC           (OPC1(0x0) | OPC2(0x2))
+#define FBFCC          (OPC1(0x0) | OPC2(0x6))
+#define SLL_W          SLL
+#define SDIV           (OPC1(0x2) | OPC3(0x0f))
+#define SMUL           (OPC1(0x2) | OPC3(0x0b))
+#define UDIV           (OPC1(0x2) | OPC3(0x0e))
+#define UMUL           (OPC1(0x2) | OPC3(0x0a))
+#else
+#define SLL_W          SLLX
+#endif
+
+#define SIMM_MAX       (0x0fff)
+#define SIMM_MIN       (-0x1000)
+
+/* dest_reg is the absolute name of the register
+   Useful for reordering instructions in the delay slot. */
+static sljit_si push_inst(struct sljit_compiler *compiler, sljit_ins ins, sljit_si delay_slot)
+{
+       sljit_ins *ptr;
+       SLJIT_ASSERT((delay_slot & DST_INS_MASK) == UNMOVABLE_INS
+               || (delay_slot & DST_INS_MASK) == MOVABLE_INS
+               || (delay_slot & DST_INS_MASK) == ((ins >> 25) & 0x1f));
+       ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
+       FAIL_IF(!ptr);
+       *ptr = ins;
+       compiler->size++;
+       compiler->delay_slot = delay_slot;
+       return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code)
+{
+       sljit_sw diff;
+       sljit_uw target_addr;
+       sljit_ins *inst;
+       sljit_ins saved_inst;
+
+       if (jump->flags & SLJIT_REWRITABLE_JUMP)
+               return code_ptr;
+
+       if (jump->flags & JUMP_ADDR)
+               target_addr = jump->u.target;
+       else {
+               SLJIT_ASSERT(jump->flags & JUMP_LABEL);
+               target_addr = (sljit_uw)(code + jump->u.label->size);
+       }
+       inst = (sljit_ins*)jump->addr;
+
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+       if (jump->flags & IS_CALL) {
+               /* Call is always patchable on sparc 32. */
+               jump->flags |= PATCH_CALL;
+               if (jump->flags & IS_MOVABLE) {
+                       inst[0] = inst[-1];
+                       inst[-1] = CALL;
+                       jump->addr -= sizeof(sljit_ins);
+                       return inst;
+               }
+               inst[0] = CALL;
+               inst[1] = NOP;
+               return inst + 1;
+       }
+#else
+       /* Both calls and BPr instructions shall not pass this point. */
+#error "Implementation required"
+#endif
+
+       if (jump->flags & IS_COND)
+               inst--;
+
+       if (jump->flags & IS_MOVABLE) {
+               diff = ((sljit_sw)target_addr - (sljit_sw)(inst - 1)) >> 2;
+               if (diff <= MAX_DISP && diff >= MIN_DISP) {
+                       jump->flags |= PATCH_B;
+                       inst--;
+                       if (jump->flags & IS_COND) {
+                               saved_inst = inst[0];
+                               inst[0] = inst[1] ^ (1 << 28);
+                               inst[1] = saved_inst;
+                       } else {
+                               inst[1] = inst[0];
+                               inst[0] = BICC | DA(0x8);
+                       }
+                       jump->addr = (sljit_uw)inst;
+                       return inst + 1;
+               }
+       }
+
+       diff = ((sljit_sw)target_addr - (sljit_sw)(inst)) >> 2;
+       if (diff <= MAX_DISP && diff >= MIN_DISP) {
+               jump->flags |= PATCH_B;
+               if (jump->flags & IS_COND)
+                       inst[0] ^= (1 << 28);
+               else
+                       inst[0] = BICC | DA(0x8);
+               inst[1] = NOP;
+               jump->addr = (sljit_uw)inst;
+               return inst + 1;
+       }
+
+       return code_ptr;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
+{
+       struct sljit_memory_fragment *buf;
+       sljit_ins *code;
+       sljit_ins *code_ptr;
+       sljit_ins *buf_ptr;
+       sljit_ins *buf_end;
+       sljit_uw word_count;
+       sljit_uw addr;
+
+       struct sljit_label *label;
+       struct sljit_jump *jump;
+       struct sljit_const *const_;
+
+       CHECK_ERROR_PTR();
+       check_sljit_generate_code(compiler);
+       reverse_buf(compiler);
+
+       code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins));
+       PTR_FAIL_WITH_EXEC_IF(code);
+       buf = compiler->buf;
+
+       code_ptr = code;
+       word_count = 0;
+       label = compiler->labels;
+       jump = compiler->jumps;
+       const_ = compiler->consts;
+       do {
+               buf_ptr = (sljit_ins*)buf->memory;
+               buf_end = buf_ptr + (buf->used_size >> 2);
+               do {
+                       *code_ptr = *buf_ptr++;
+                       SLJIT_ASSERT(!label || label->size >= word_count);
+                       SLJIT_ASSERT(!jump || jump->addr >= word_count);
+                       SLJIT_ASSERT(!const_ || const_->addr >= word_count);
+                       /* These structures are ordered by their address. */
+                       if (label && label->size == word_count) {
+                               /* Just recording the address. */
+                               label->addr = (sljit_uw)code_ptr;
+                               label->size = code_ptr - code;
+                               label = label->next;
+                       }
+                       if (jump && jump->addr == word_count) {
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+                               jump->addr = (sljit_uw)(code_ptr - 3);
+#else
+                               jump->addr = (sljit_uw)(code_ptr - 6);
+#endif
+                               code_ptr = detect_jump_type(jump, code_ptr, code);
+                               jump = jump->next;
+                       }
+                       if (const_ && const_->addr == word_count) {
+                               /* Just recording the address. */
+                               const_->addr = (sljit_uw)code_ptr;
+                               const_ = const_->next;
+                       }
+                       code_ptr ++;
+                       word_count ++;
+               } while (buf_ptr < buf_end);
+
+               buf = buf->next;
+       } while (buf);
+
+       if (label && label->size == word_count) {
+               label->addr = (sljit_uw)code_ptr;
+               label->size = code_ptr - code;
+               label = label->next;
+       }
+
+       SLJIT_ASSERT(!label);
+       SLJIT_ASSERT(!jump);
+       SLJIT_ASSERT(!const_);
+       SLJIT_ASSERT(code_ptr - code <= (sljit_si)compiler->size);
+
+       jump = compiler->jumps;
+       while (jump) {
+               do {
+                       addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
+                       buf_ptr = (sljit_ins*)jump->addr;
+
+                       if (jump->flags & PATCH_CALL) {
+                               addr = (sljit_sw)(addr - jump->addr) >> 2;
+                               SLJIT_ASSERT((sljit_sw)addr <= 0x1fffffff && (sljit_sw)addr >= -0x20000000);
+                               buf_ptr[0] = CALL | (addr & 0x3fffffff);
+                               break;
+                       }
+                       if (jump->flags & PATCH_B) {
+                               addr = (sljit_sw)(addr - jump->addr) >> 2;
+                               SLJIT_ASSERT((sljit_sw)addr <= MAX_DISP && (sljit_sw)addr >= MIN_DISP);
+                               buf_ptr[0] = (buf_ptr[0] & ~DISP_MASK) | (addr & DISP_MASK);
+                               break;
+                       }
+
+                       /* Set the fields of immediate loads. */
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+                       buf_ptr[0] = (buf_ptr[0] & 0xffc00000) | ((addr >> 10) & 0x3fffff);
+                       buf_ptr[1] = (buf_ptr[1] & 0xfffffc00) | (addr & 0x3ff);
+#else
+#error "Implementation required"
+#endif
+               } while (0);
+               jump = jump->next;
+       }
+
+
+       compiler->error = SLJIT_ERR_COMPILED;
+       compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
+       SLJIT_CACHE_FLUSH(code, code_ptr);
+       return code;
+}
+
+/* --------------------------------------------------------------------- */
+/*  Entry, exit                                                          */
+/* --------------------------------------------------------------------- */
+
+/* Creates an index in data_transfer_insts array. */
+#define LOAD_DATA      0x01
+#define WORD_DATA      0x00
+#define BYTE_DATA      0x02
+#define HALF_DATA      0x04
+#define INT_DATA       0x06
+#define SIGNED_DATA    0x08
+/* Separates integer and floating point registers */
+#define GPR_REG                0x0f
+#define DOUBLE_DATA    0x10
+
+#define MEM_MASK       0x1f
+
+#define WRITE_BACK     0x00020
+#define ARG_TEST       0x00040
+#define ALT_KEEP_CACHE 0x00080
+#define CUMULATIVE_OP  0x00100
+#define IMM_OP         0x00200
+#define SRC2_IMM       0x00400
+
+#define REG_DEST       0x00800
+#define REG2_SOURCE    0x01000
+#define SLOW_SRC1      0x02000
+#define SLOW_SRC2      0x04000
+#define SLOW_DEST      0x08000
+
+/* SET_FLAGS (0x10 << 19) also belong here! */
+
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+#include "sljitNativeSPARC_32.c"
+#else
+#include "sljitNativeSPARC_64.c"
+#endif
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
+{
+       CHECK_ERROR();
+       check_sljit_emit_enter(compiler, args, scratches, saveds, local_size);
+
+       compiler->scratches = scratches;
+       compiler->saveds = saveds;
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       compiler->logical_local_size = local_size;
+#endif
+
+       local_size += 23 * sizeof(sljit_sw);
+       local_size = (local_size + 7) & ~0x7;
+       compiler->local_size = local_size;
+
+       if (local_size <= SIMM_MAX) {
+               FAIL_IF(push_inst(compiler, SAVE | D(SLJIT_LOCALS_REG) | S1(SLJIT_LOCALS_REG) | IMM(-local_size), UNMOVABLE_INS));
+       }
+       else {
+               FAIL_IF(load_immediate(compiler, TMP_REG1, -local_size));
+               FAIL_IF(push_inst(compiler, SAVE | D(SLJIT_LOCALS_REG) | S1(SLJIT_LOCALS_REG) | S2(TMP_REG1), UNMOVABLE_INS));
+       }
+
+       if (args >= 1)
+               FAIL_IF(push_inst(compiler, OR | D(SLJIT_SAVED_REG1) | S1(0) | S2A(24), DR(SLJIT_SAVED_REG1)));
+       if (args >= 2)
+               FAIL_IF(push_inst(compiler, OR | D(SLJIT_SAVED_REG2) | S1(0) | S2A(25), DR(SLJIT_SAVED_REG2)));
+       if (args >= 3)
+               FAIL_IF(push_inst(compiler, OR | D(SLJIT_SAVED_REG3) | S1(0) | S2A(26), DR(SLJIT_SAVED_REG3)));
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
+{
+       CHECK_ERROR_VOID();
+       check_sljit_set_context(compiler, args, scratches, saveds, local_size);
+
+       compiler->scratches = scratches;
+       compiler->saveds = saveds;
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       compiler->logical_local_size = local_size;
+#endif
+
+       local_size += 23 * sizeof(sljit_sw);
+       compiler->local_size = (local_size + 7) & ~0x7;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
+{
+       CHECK_ERROR();
+       check_sljit_emit_return(compiler, op, src, srcw);
+
+       if (op != SLJIT_MOV || !FAST_IS_REG(src)) {
+               FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
+               src = SLJIT_SCRATCH_REG1;
+       }
+
+       FAIL_IF(push_inst(compiler, JMPL | D(0) | S1A(31) | IMM(8), UNMOVABLE_INS));
+       return push_inst(compiler, RESTORE | D(SLJIT_SCRATCH_REG1) | S1(src) | S2(0), UNMOVABLE_INS);
+}
+
+/* --------------------------------------------------------------------- */
+/*  Operators                                                            */
+/* --------------------------------------------------------------------- */
+
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+#define ARCH_32_64(a, b)       a
+#else
+#define ARCH_32_64(a, b)       b
+#endif
+
+static SLJIT_CONST sljit_ins data_transfer_insts[16 + 4] = {
+/* u w s */ ARCH_32_64(OPC1(3) | OPC3(0x04) /* stw */, OPC1(3) | OPC3(0x0e) /* stx */),
+/* u w l */ ARCH_32_64(OPC1(3) | OPC3(0x00) /* lduw */, OPC1(3) | OPC3(0x0b) /* ldx */),
+/* u b s */ OPC1(3) | OPC3(0x05) /* stb */,
+/* u b l */ OPC1(3) | OPC3(0x01) /* ldub */,
+/* u h s */ OPC1(3) | OPC3(0x06) /* sth */,
+/* u h l */ OPC1(3) | OPC3(0x02) /* lduh */,
+/* u i s */ OPC1(3) | OPC3(0x04) /* stw */,
+/* u i l */ OPC1(3) | OPC3(0x00) /* lduw */,
+
+/* s w s */ ARCH_32_64(OPC1(3) | OPC3(0x04) /* stw */, OPC1(3) | OPC3(0x0e) /* stx */),
+/* s w l */ ARCH_32_64(OPC1(3) | OPC3(0x00) /* lduw */, OPC1(3) | OPC3(0x0b) /* ldx */),
+/* s b s */ OPC1(3) | OPC3(0x05) /* stb */,
+/* s b l */ OPC1(3) | OPC3(0x09) /* ldsb */,
+/* s h s */ OPC1(3) | OPC3(0x06) /* sth */,
+/* s h l */ OPC1(3) | OPC3(0x0a) /* ldsh */,
+/* s i s */ OPC1(3) | OPC3(0x04) /* stw */,
+/* s i l */ ARCH_32_64(OPC1(3) | OPC3(0x00) /* lduw */, OPC1(3) | OPC3(0x08) /* ldsw */),
+
+/* d   s */ OPC1(3) | OPC3(0x27),
+/* d   l */ OPC1(3) | OPC3(0x23),
+/* s   s */ OPC1(3) | OPC3(0x24),
+/* s   l */ OPC1(3) | OPC3(0x20),
+};
+
+#undef ARCH_32_64
+
+/* Can perform an operation using at most 1 instruction. */
+static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw)
+{
+       SLJIT_ASSERT(arg & SLJIT_MEM);
+
+       if (!(flags & WRITE_BACK) || !(arg & REG_MASK)) {
+               if ((!(arg & OFFS_REG_MASK) && argw <= SIMM_MAX && argw >= SIMM_MIN)
+                               || ((arg & OFFS_REG_MASK) && (argw & 0x3) == 0)) {
+                       /* Works for both absoulte and relative addresses (immediate case). */
+                       if (SLJIT_UNLIKELY(flags & ARG_TEST))
+                               return 1;
+                       FAIL_IF(push_inst(compiler, data_transfer_insts[flags & MEM_MASK]
+                               | ((flags & MEM_MASK) <= GPR_REG ? D(reg) : DA(reg))
+                               | S1(arg & REG_MASK) | ((arg & OFFS_REG_MASK) ? S2(OFFS_REG(arg)) : IMM(argw)),
+                               ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) ? DR(reg) : MOVABLE_INS));
+                       return -1;
+               }
+       }
+       return 0;
+}
+
+/* See getput_arg below.
+   Note: can_cache is called only for binary operators. Those
+   operators always uses word arguments without write back. */
+static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw)
+{
+       SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM));
+
+       /* Simple operation except for updates. */
+       if (arg & OFFS_REG_MASK) {
+               argw &= 0x3;
+               SLJIT_ASSERT(argw);
+               next_argw &= 0x3;
+               if ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && argw == next_argw)
+                       return 1;
+               return 0;
+       }
+
+       if (((next_argw - argw) <= SIMM_MAX && (next_argw - argw) >= SIMM_MIN))
+               return 1;
+       return 0;
+}
+
+/* Emit the necessary instructions. See can_cache above. */
+static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw)
+{
+       sljit_si base, arg2, delay_slot;
+       sljit_ins dest;
+
+       SLJIT_ASSERT(arg & SLJIT_MEM);
+       if (!(next_arg & SLJIT_MEM)) {
+               next_arg = 0;
+               next_argw = 0;
+       }
+
+       base = arg & REG_MASK;
+       if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
+               argw &= 0x3;
+               SLJIT_ASSERT(argw != 0);
+
+               /* Using the cache. */
+               if (((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) && (argw == compiler->cache_argw))
+                       arg2 = TMP_REG3;
+               else {
+                       if ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && argw == (next_argw & 0x3)) {
+                               compiler->cache_arg = SLJIT_MEM | (arg & OFFS_REG_MASK);
+                               compiler->cache_argw = argw;
+                               arg2 = TMP_REG3;
+                       }
+                       else if ((flags & LOAD_DATA) && ((flags & MEM_MASK) <= GPR_REG) && reg != base && reg != OFFS_REG(arg))
+                               arg2 = reg;
+                       else /* It must be a mov operation, so tmp1 must be free to use. */
+                               arg2 = TMP_REG1;
+                       FAIL_IF(push_inst(compiler, SLL_W | D(arg2) | S1(OFFS_REG(arg)) | IMM_ARG | argw, DR(arg2)));
+               }
+       }
+       else {
+               /* Using the cache. */
+               if ((compiler->cache_arg == SLJIT_MEM) && (argw - compiler->cache_argw) <= SIMM_MAX && (argw - compiler->cache_argw) >= SIMM_MIN) {
+                       if (argw != compiler->cache_argw) {
+                               FAIL_IF(push_inst(compiler, ADD | D(TMP_REG3) | S1(TMP_REG3) | IMM(argw - compiler->cache_argw), DR(TMP_REG3)));
+                               compiler->cache_argw = argw;
+                       }
+                       arg2 = TMP_REG3;
+               } else {
+                       if ((next_argw - argw) <= SIMM_MAX && (next_argw - argw) >= SIMM_MIN) {
+                               compiler->cache_arg = SLJIT_MEM;
+                               compiler->cache_argw = argw;
+                               arg2 = TMP_REG3;
+                       }
+                       else if ((flags & LOAD_DATA) && ((flags & MEM_MASK) <= GPR_REG) && reg != base)
+                               arg2 = reg;
+                       else /* It must be a mov operation, so tmp1 must be free to use. */
+                               arg2 = TMP_REG1;
+                       FAIL_IF(load_immediate(compiler, arg2, argw));
+               }
+       }
+
+       dest = ((flags & MEM_MASK) <= GPR_REG ? D(reg) : DA(reg));
+       delay_slot = ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) ? DR(reg) : MOVABLE_INS;
+       if (!base)
+               return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | dest | S1(arg2) | IMM(0), delay_slot);
+       if (!(flags & WRITE_BACK))
+               return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | dest | S1(base) | S2(arg2), delay_slot);
+       FAIL_IF(push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | dest | S1(base) | S2(arg2), delay_slot));
+       return push_inst(compiler, ADD | D(base) | S1(base) | S2(arg2), DR(base));
+}
+
+static SLJIT_INLINE sljit_si emit_op_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw)
+{
+       if (getput_arg_fast(compiler, flags, reg, arg, argw))
+               return compiler->error;
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+       return getput_arg(compiler, flags, reg, arg, argw, 0, 0);
+}
+
+static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg1, sljit_sw arg1w, sljit_si arg2, sljit_sw arg2w)
+{
+       if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
+               return compiler->error;
+       return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
+}
+
+static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       /* arg1 goes to TMP_REG1 or src reg
+          arg2 goes to TMP_REG2, imm or src reg
+          TMP_REG3 can be used for caching
+          result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */
+       sljit_si dst_r = TMP_REG2;
+       sljit_si src1_r;
+       sljit_sw src2_r = 0;
+       sljit_si sugg_src2_r = TMP_REG2;
+
+       if (!(flags & ALT_KEEP_CACHE)) {
+               compiler->cache_arg = 0;
+               compiler->cache_argw = 0;
+       }
+
+       if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
+               if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI && !(src2 & SLJIT_MEM))
+                       return SLJIT_SUCCESS;
+       }
+       else if (FAST_IS_REG(dst)) {
+               dst_r = dst;
+               flags |= REG_DEST;
+               if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI)
+                       sugg_src2_r = dst_r;
+       }
+       else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1, dst, dstw))
+               flags |= SLOW_DEST;
+
+       if (flags & IMM_OP) {
+               if ((src2 & SLJIT_IMM) && src2w) {
+                       if (src2w <= SIMM_MAX && src2w >= SIMM_MIN) {
+                               flags |= SRC2_IMM;
+                               src2_r = src2w;
+                       }
+               }
+               if (!(flags & SRC2_IMM) && (flags & CUMULATIVE_OP) && (src1 & SLJIT_IMM) && src1w) {
+                       if (src1w <= SIMM_MAX && src1w >= SIMM_MIN) {
+                               flags |= SRC2_IMM;
+                               src2_r = src1w;
+
+                               /* And swap arguments. */
+                               src1 = src2;
+                               src1w = src2w;
+                               src2 = SLJIT_IMM;
+                               /* src2w = src2_r unneeded. */
+                       }
+               }
+       }
+
+       /* Source 1. */
+       if (FAST_IS_REG(src1))
+               src1_r = src1;
+       else if (src1 & SLJIT_IMM) {
+               if (src1w) {
+                       FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
+                       src1_r = TMP_REG1;
+               }
+               else
+                       src1_r = 0;
+       }
+       else {
+               if (getput_arg_fast(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w))
+                       FAIL_IF(compiler->error);
+               else
+                       flags |= SLOW_SRC1;
+               src1_r = TMP_REG1;
+       }
+
+       /* Source 2. */
+       if (FAST_IS_REG(src2)) {
+               src2_r = src2;
+               flags |= REG2_SOURCE;
+               if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_SI)
+                       dst_r = src2_r;
+       }
+       else if (src2 & SLJIT_IMM) {
+               if (!(flags & SRC2_IMM)) {
+                       if (src2w) {
+                               FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w));
+                               src2_r = sugg_src2_r;
+                       }
+                       else {
+                               src2_r = 0;
+                               if ((op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) && (dst & SLJIT_MEM))
+                                       dst_r = 0;
+                       }
+               }
+       }
+       else {
+               if (getput_arg_fast(compiler, flags | LOAD_DATA, sugg_src2_r, src2, src2w))
+                       FAIL_IF(compiler->error);
+               else
+                       flags |= SLOW_SRC2;
+               src2_r = sugg_src2_r;
+       }
+
+       if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
+               SLJIT_ASSERT(src2_r == TMP_REG2);
+               if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
+                       FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w));
+                       FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
+               }
+               else {
+                       FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
+                       FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw));
+               }
+       }
+       else if (flags & SLOW_SRC1)
+               FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
+       else if (flags & SLOW_SRC2)
+               FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw));
+
+       FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));
+
+       if (dst & SLJIT_MEM) {
+               if (!(flags & SLOW_DEST)) {
+                       getput_arg_fast(compiler, flags, dst_r, dst, dstw);
+                       return compiler->error;
+               }
+               return getput_arg(compiler, flags, dst_r, dst, dstw, 0, 0);
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op)
+{
+       CHECK_ERROR();
+       check_sljit_emit_op0(compiler, op);
+
+       op = GET_OPCODE(op);
+       switch (op) {
+       case SLJIT_BREAKPOINT:
+               return push_inst(compiler, TA, UNMOVABLE_INS);
+       case SLJIT_NOP:
+               return push_inst(compiler, NOP, UNMOVABLE_INS);
+       case SLJIT_UMUL:
+       case SLJIT_SMUL:
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+               FAIL_IF(push_inst(compiler, (op == SLJIT_UMUL ? UMUL : SMUL) | D(SLJIT_SCRATCH_REG1) | S1(SLJIT_SCRATCH_REG1) | S2(SLJIT_SCRATCH_REG2), DR(SLJIT_SCRATCH_REG1)));
+               return push_inst(compiler, RDY | D(SLJIT_SCRATCH_REG2), DR(SLJIT_SCRATCH_REG2));
+#else
+#error "Implementation required"
+#endif
+       case SLJIT_UDIV:
+       case SLJIT_SDIV:
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+               if (op == SLJIT_UDIV)
+                       FAIL_IF(push_inst(compiler, WRY | S1(0), MOVABLE_INS));
+               else {
+                       FAIL_IF(push_inst(compiler, SRA | D(TMP_REG1) | S1(SLJIT_SCRATCH_REG1) | IMM(31), DR(TMP_REG1)));
+                       FAIL_IF(push_inst(compiler, WRY | S1(TMP_REG1), MOVABLE_INS));
+               }
+               FAIL_IF(push_inst(compiler, OR | D(TMP_REG2) | S1(0) | S2(SLJIT_SCRATCH_REG1), DR(TMP_REG2)));
+               FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? UDIV : SDIV) | D(SLJIT_SCRATCH_REG1) | S1(SLJIT_SCRATCH_REG1) | S2(SLJIT_SCRATCH_REG2), DR(SLJIT_SCRATCH_REG1)));
+               FAIL_IF(push_inst(compiler, SMUL | D(SLJIT_SCRATCH_REG2) | S1(SLJIT_SCRATCH_REG1) | S2(SLJIT_SCRATCH_REG2), DR(SLJIT_SCRATCH_REG2)));
+               FAIL_IF(push_inst(compiler, SUB | D(SLJIT_SCRATCH_REG2) | S1(TMP_REG2) | S2(SLJIT_SCRATCH_REG2), DR(SLJIT_SCRATCH_REG2)));
+               return SLJIT_SUCCESS;
+#else
+#error "Implementation required"
+#endif
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       sljit_si flags = GET_FLAGS(op) ? SET_FLAGS : 0;
+
+       CHECK_ERROR();
+       check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       op = GET_OPCODE(op);
+       switch (op) {
+       case SLJIT_MOV:
+       case SLJIT_MOV_P:
+               return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
+
+       case SLJIT_MOV_UI:
+               return emit_op(compiler, SLJIT_MOV_UI, flags | INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
+
+       case SLJIT_MOV_SI:
+               return emit_op(compiler, SLJIT_MOV_SI, flags | INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
+
+       case SLJIT_MOV_UB:
+               return emit_op(compiler, SLJIT_MOV_UB, flags | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ub)srcw : srcw);
+
+       case SLJIT_MOV_SB:
+               return emit_op(compiler, SLJIT_MOV_SB, flags | BYTE_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sb)srcw : srcw);
+
+       case SLJIT_MOV_UH:
+               return emit_op(compiler, SLJIT_MOV_UH, flags | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_uh)srcw : srcw);
+
+       case SLJIT_MOV_SH:
+               return emit_op(compiler, SLJIT_MOV_SH, flags | HALF_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sh)srcw : srcw);
+
+       case SLJIT_MOVU:
+       case SLJIT_MOVU_P:
+               return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
+
+       case SLJIT_MOVU_UI:
+               return emit_op(compiler, SLJIT_MOV_UI, flags | INT_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
+
+       case SLJIT_MOVU_SI:
+               return emit_op(compiler, SLJIT_MOV_SI, flags | INT_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
+
+       case SLJIT_MOVU_UB:
+               return emit_op(compiler, SLJIT_MOV_UB, flags | BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ub)srcw : srcw);
+
+       case SLJIT_MOVU_SB:
+               return emit_op(compiler, SLJIT_MOV_SB, flags | BYTE_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sb)srcw : srcw);
+
+       case SLJIT_MOVU_UH:
+               return emit_op(compiler, SLJIT_MOV_UH, flags | HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_uh)srcw : srcw);
+
+       case SLJIT_MOVU_SH:
+               return emit_op(compiler, SLJIT_MOV_SH, flags | HALF_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sh)srcw : srcw);
+
+       case SLJIT_NOT:
+       case SLJIT_CLZ:
+               return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw);
+
+       case SLJIT_NEG:
+               return emit_op(compiler, SLJIT_SUB, flags | IMM_OP, dst, dstw, SLJIT_IMM, 0, src, srcw);
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       sljit_si flags = GET_FLAGS(op) ? SET_FLAGS : 0;
+
+       CHECK_ERROR();
+       check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+       ADJUST_LOCAL_OFFSET(src1, src1w);
+       ADJUST_LOCAL_OFFSET(src2, src2w);
+
+       op = GET_OPCODE(op);
+       switch (op) {
+       case SLJIT_ADD:
+       case SLJIT_ADDC:
+       case SLJIT_MUL:
+       case SLJIT_AND:
+       case SLJIT_OR:
+       case SLJIT_XOR:
+               return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
+
+       case SLJIT_SUB:
+       case SLJIT_SUBC:
+               return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
+
+       case SLJIT_SHL:
+       case SLJIT_LSHR:
+       case SLJIT_ASHR:
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+               if (src2 & SLJIT_IMM)
+                       src2w &= 0x1f;
+#else
+               SLJIT_ASSERT_STOP();
+#endif
+               return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
+{
+       check_sljit_get_register_index(reg);
+       return reg_map[reg];
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg)
+{
+       check_sljit_get_float_register_index(reg);
+       return reg << 1;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
+       void *instruction, sljit_si size)
+{
+       CHECK_ERROR();
+       check_sljit_emit_op_custom(compiler, instruction, size);
+       SLJIT_ASSERT(size == 4);
+
+       return push_inst(compiler, *(sljit_ins*)instruction, UNMOVABLE_INS);
+}
+
+/* --------------------------------------------------------------------- */
+/*  Floating point operators                                             */
+/* --------------------------------------------------------------------- */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
+{
+#ifdef SLJIT_IS_FPU_AVAILABLE
+       return SLJIT_IS_FPU_AVAILABLE;
+#else
+       /* Available by default. */
+       return 1;
+#endif
+}
+
+#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_SINGLE_OP) >> 7))
+#define SELECT_FOP(op, single, double) ((op & SLJIT_SINGLE_OP) ? single : double)
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       sljit_si dst_fr;
+
+       CHECK_ERROR();
+       check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
+       SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error);
+
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+
+       if (GET_OPCODE(op) == SLJIT_CMPD) {
+               if (dst & SLJIT_MEM) {
+                       FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, dst, dstw, src, srcw));
+                       dst = TMP_FREG1;
+               }
+               else
+                       dst <<= 1;
+
+               if (src & SLJIT_MEM) {
+                       FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src, srcw, 0, 0));
+                       src = TMP_FREG2;
+               }
+               else
+                       src <<= 1;
+
+               return push_inst(compiler, SELECT_FOP(op, FCMPS, FCMPD) | S1A(dst) | S2A(src), FCC_IS_SET | MOVABLE_INS);
+       }
+
+       dst_fr = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1;
+
+       if (src & SLJIT_MEM) {
+               FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_fr, src, srcw, dst, dstw));
+               src = dst_fr;
+       }
+       else
+               src <<= 1;
+
+       switch (GET_OPCODE(op)) {
+               case SLJIT_MOVD:
+                       if (src != dst_fr && dst_fr != TMP_FREG1) {
+                               FAIL_IF(push_inst(compiler, FMOVS | DA(dst_fr) | S2A(src), MOVABLE_INS));
+                               if (!(op & SLJIT_SINGLE_OP))
+                                       FAIL_IF(push_inst(compiler, FMOVS | DA(dst_fr | 1) | S2A(src | 1), MOVABLE_INS));
+                       }
+                       break;
+               case SLJIT_NEGD:
+                       FAIL_IF(push_inst(compiler, FNEGS | DA(dst_fr) | S2A(src), MOVABLE_INS));
+                       if (dst_fr != src && !(op & SLJIT_SINGLE_OP))
+                               FAIL_IF(push_inst(compiler, FMOVS | DA(dst_fr | 1) | S2A(src | 1), MOVABLE_INS));
+                       break;
+               case SLJIT_ABSD:
+                       FAIL_IF(push_inst(compiler, FABSS | DA(dst_fr) | S2A(src), MOVABLE_INS));
+                       if (dst_fr != src && !(op & SLJIT_SINGLE_OP))
+                               FAIL_IF(push_inst(compiler, FMOVS | DA(dst_fr | 1) | S2A(src | 1), MOVABLE_INS));
+                       break;
+       }
+
+       if (dst_fr == TMP_FREG1) {
+               if (GET_OPCODE(op) == SLJIT_MOVD)
+                       dst_fr = src;
+               FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), dst_fr, dst, dstw, 0, 0));
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       sljit_si dst_fr, flags = 0;
+
+       CHECK_ERROR();
+       check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
+
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+
+       dst_fr = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG2;
+
+       if (src1 & SLJIT_MEM) {
+               if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) {
+                       FAIL_IF(compiler->error);
+                       src1 = TMP_FREG1;
+               } else
+                       flags |= SLOW_SRC1;
+       }
+       else
+               src1 <<= 1;
+
+       if (src2 & SLJIT_MEM) {
+               if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) {
+                       FAIL_IF(compiler->error);
+                       src2 = TMP_FREG2;
+               } else
+                       flags |= SLOW_SRC2;
+       }
+       else
+               src2 <<= 1;
+
+       if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
+               if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
+                       FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w));
+                       FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
+               }
+               else {
+                       FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
+                       FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
+               }
+       }
+       else if (flags & SLOW_SRC1)
+               FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
+       else if (flags & SLOW_SRC2)
+               FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
+
+       if (flags & SLOW_SRC1)
+               src1 = TMP_FREG1;
+       if (flags & SLOW_SRC2)
+               src2 = TMP_FREG2;
+
+       switch (GET_OPCODE(op)) {
+       case SLJIT_ADDD:
+               FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADDD) | DA(dst_fr) | S1A(src1) | S2A(src2), MOVABLE_INS));
+               break;
+
+       case SLJIT_SUBD:
+               FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUBD) | DA(dst_fr) | S1A(src1) | S2A(src2), MOVABLE_INS));
+               break;
+
+       case SLJIT_MULD:
+               FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMULD) | DA(dst_fr) | S1A(src1) | S2A(src2), MOVABLE_INS));
+               break;
+
+       case SLJIT_DIVD:
+               FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIVD) | DA(dst_fr) | S1A(src1) | S2A(src2), MOVABLE_INS));
+               break;
+       }
+
+       if (dst_fr == TMP_FREG2)
+               FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0));
+
+       return SLJIT_SUCCESS;
+}
+
+#undef FLOAT_DATA
+#undef SELECT_FOP
+
+/* --------------------------------------------------------------------- */
+/*  Other instructions                                                   */
+/* --------------------------------------------------------------------- */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw)
+{
+       CHECK_ERROR();
+       check_sljit_emit_fast_enter(compiler, dst, dstw);
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+
+       /* For UNUSED dst. Uncommon, but possible. */
+       if (dst == SLJIT_UNUSED)
+               return SLJIT_SUCCESS;
+
+       if (FAST_IS_REG(dst))
+               return push_inst(compiler, OR | D(dst) | S1(0) | S2(TMP_LINK), DR(dst));
+
+       /* Memory. */
+       return emit_op_mem(compiler, WORD_DATA, TMP_LINK, dst, dstw);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw)
+{
+       CHECK_ERROR();
+       check_sljit_emit_fast_return(compiler, src, srcw);
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       if (FAST_IS_REG(src))
+               FAIL_IF(push_inst(compiler, OR | D(TMP_LINK) | S1(0) | S2(src), DR(TMP_LINK)));
+       else if (src & SLJIT_MEM)
+               FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_LINK, src, srcw));
+       else if (src & SLJIT_IMM)
+               FAIL_IF(load_immediate(compiler, TMP_LINK, srcw));
+
+       FAIL_IF(push_inst(compiler, JMPL | D(0) | S1(TMP_LINK) | IMM(8), UNMOVABLE_INS));
+       return push_inst(compiler, NOP, UNMOVABLE_INS);
+}
+
+/* --------------------------------------------------------------------- */
+/*  Conditional instructions                                             */
+/* --------------------------------------------------------------------- */
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
+{
+       struct sljit_label *label;
+
+       CHECK_ERROR_PTR();
+       check_sljit_emit_label(compiler);
+
+       if (compiler->last_label && compiler->last_label->size == compiler->size)
+               return compiler->last_label;
+
+       label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
+       PTR_FAIL_IF(!label);
+       set_label(label, compiler);
+       compiler->delay_slot = UNMOVABLE_INS;
+       return label;
+}
+
+static sljit_ins get_cc(sljit_si type)
+{
+       switch (type) {
+       case SLJIT_C_EQUAL:
+       case SLJIT_C_MUL_NOT_OVERFLOW:
+               return DA(0x1);
+
+       case SLJIT_C_NOT_EQUAL:
+       case SLJIT_C_MUL_OVERFLOW:
+               return DA(0x9);
+
+       case SLJIT_C_LESS:
+               return DA(0x5);
+
+       case SLJIT_C_GREATER_EQUAL:
+               return DA(0xd);
+
+       case SLJIT_C_GREATER:
+               return DA(0xc);
+
+       case SLJIT_C_LESS_EQUAL:
+               return DA(0x4);
+
+       case SLJIT_C_SIG_LESS:
+               return DA(0x3);
+
+       case SLJIT_C_SIG_GREATER_EQUAL:
+               return DA(0xb);
+
+       case SLJIT_C_SIG_GREATER:
+               return DA(0xa);
+
+       case SLJIT_C_SIG_LESS_EQUAL:
+               return DA(0x2);
+
+       case SLJIT_C_OVERFLOW:
+               return DA(0x7);
+
+       case SLJIT_C_NOT_OVERFLOW:
+               return DA(0xf);
+
+       case SLJIT_C_FLOAT_EQUAL:
+               return DA(0x9);
+
+       case SLJIT_C_FLOAT_NOT_EQUAL: /* Unordered. */
+               return DA(0x1);
+
+       case SLJIT_C_FLOAT_LESS:
+               return DA(0x4);
+
+       case SLJIT_C_FLOAT_GREATER_EQUAL: /* Unordered. */
+               return DA(0xc);
+
+       case SLJIT_C_FLOAT_LESS_EQUAL:
+               return DA(0xd);
+
+       case SLJIT_C_FLOAT_GREATER: /* Unordered. */
+               return DA(0x5);
+
+       case SLJIT_C_FLOAT_UNORDERED:
+               return DA(0x7);
+
+       case SLJIT_C_FLOAT_ORDERED:
+               return DA(0xf);
+
+       default:
+               SLJIT_ASSERT_STOP();
+               return DA(0x8);
+       }
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type)
+{
+       struct sljit_jump *jump;
+
+       CHECK_ERROR_PTR();
+       check_sljit_emit_jump(compiler, type);
+
+       jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
+       PTR_FAIL_IF(!jump);
+       set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
+       type &= 0xff;
+
+       if (type < SLJIT_C_FLOAT_EQUAL) {
+               jump->flags |= IS_COND;
+               if (((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS) && !(compiler->delay_slot & ICC_IS_SET))
+                       jump->flags |= IS_MOVABLE;
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+               PTR_FAIL_IF(push_inst(compiler, BICC | get_cc(type ^ 1) | 5, UNMOVABLE_INS));
+#else
+#error "Implementation required"
+#endif
+       }
+       else if (type < SLJIT_JUMP) {
+               jump->flags |= IS_COND;
+               if (((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS) && !(compiler->delay_slot & FCC_IS_SET))
+                       jump->flags |= IS_MOVABLE;
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+               PTR_FAIL_IF(push_inst(compiler, FBFCC | get_cc(type ^ 1) | 5, UNMOVABLE_INS));
+#else
+#error "Implementation required"
+#endif
+       } else {
+               if ((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS)
+                       jump->flags |= IS_MOVABLE;
+               if (type >= SLJIT_FAST_CALL)
+                       jump->flags |= IS_CALL;
+       }
+
+       PTR_FAIL_IF(emit_const(compiler, TMP_REG2, 0));
+       PTR_FAIL_IF(push_inst(compiler, JMPL | D(type >= SLJIT_FAST_CALL ? TMP_LINK : 0) | S1(TMP_REG2) | IMM(0), UNMOVABLE_INS));
+       jump->addr = compiler->size;
+       PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
+
+       return jump;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw)
+{
+       struct sljit_jump *jump = NULL;
+       sljit_si src_r;
+
+       CHECK_ERROR();
+       check_sljit_emit_ijump(compiler, type, src, srcw);
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       if (FAST_IS_REG(src))
+               src_r = src;
+       else if (src & SLJIT_IMM) {
+               jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
+               FAIL_IF(!jump);
+               set_jump(jump, compiler, JUMP_ADDR);
+               jump->u.target = srcw;
+               if ((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS)
+                       jump->flags |= IS_MOVABLE;
+               if (type >= SLJIT_FAST_CALL)
+                       jump->flags |= IS_CALL;
+
+               FAIL_IF(emit_const(compiler, TMP_REG2, 0));
+               src_r = TMP_REG2;
+       }
+       else {
+               FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src, srcw));
+               src_r = TMP_REG2;
+       }
+
+       FAIL_IF(push_inst(compiler, JMPL | D(type >= SLJIT_FAST_CALL ? TMP_LINK : 0) | S1(src_r) | IMM(0), UNMOVABLE_INS));
+       if (jump)
+               jump->addr = compiler->size;
+       return push_inst(compiler, NOP, UNMOVABLE_INS);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw,
+       sljit_si type)
+{
+       sljit_si reg, flags = (GET_FLAGS(op) ? SET_FLAGS : 0);
+
+       CHECK_ERROR();
+       check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type);
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+
+       if (dst == SLJIT_UNUSED)
+               return SLJIT_SUCCESS;
+
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+       op = GET_OPCODE(op);
+       reg = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2;
+
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+       if (op >= SLJIT_ADD && (src & SLJIT_MEM)) {
+               ADJUST_LOCAL_OFFSET(src, srcw);
+               FAIL_IF(emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
+               src = TMP_REG1;
+               srcw = 0;
+       }
+
+       if (type < SLJIT_C_FLOAT_EQUAL)
+               FAIL_IF(push_inst(compiler, BICC | get_cc(type) | 3, UNMOVABLE_INS));
+       else
+               FAIL_IF(push_inst(compiler, FBFCC | get_cc(type) | 3, UNMOVABLE_INS));
+
+       FAIL_IF(push_inst(compiler, OR | D(reg) | S1(0) | IMM(1), UNMOVABLE_INS));
+       FAIL_IF(push_inst(compiler, OR | D(reg) | S1(0) | IMM(0), UNMOVABLE_INS));
+
+       if (op >= SLJIT_ADD)
+               return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP | ALT_KEEP_CACHE, dst, dstw, src, srcw, TMP_REG2, 0);
+
+       return (reg == TMP_REG2) ? emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw) : SLJIT_SUCCESS;
+#else
+#error "Implementation required"
+#endif
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value)
+{
+       sljit_si reg;
+       struct sljit_const *const_;
+
+       CHECK_ERROR_PTR();
+       check_sljit_emit_const(compiler, dst, dstw, init_value);
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+
+       const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
+       PTR_FAIL_IF(!const_);
+       set_const(const_, compiler);
+
+       reg = SLOW_IS_REG(dst) ? dst : TMP_REG2;
+
+       PTR_FAIL_IF(emit_const(compiler, reg, init_value));
+
+       if (dst & SLJIT_MEM)
+               PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw));
+       return const_;
+}
diff --git a/ext/pcre/pcrelib/sljit/sljitNativeTILEGX-encoder.c b/ext/pcre/pcrelib/sljit/sljitNativeTILEGX-encoder.c
new file mode 100644 (file)
index 0000000..7196329
--- /dev/null
@@ -0,0 +1,10159 @@
+/*
+ *    Stack-less Just-In-Time compiler
+ *
+ *    Copyright 2013-2013 Tilera Corporation(jiwang@tilera.com). All rights reserved.
+ *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice, this list of
+ *      conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *      of conditions and the following disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* This code is owned by Tilera Corporation, and distributed as part
+   of multiple projects. In sljit, the code is under BSD licence.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#define BFD_RELOC(x) R_##x
+
+/* Special registers. */
+#define TREG_LR 55
+#define TREG_SN 56
+#define TREG_ZERO 63
+
+/* Canonical name of each register. */
+const char *const tilegx_register_names[] =
+{
+  "r0",   "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",
+  "r8",   "r9",  "r10", "r11", "r12", "r13", "r14", "r15",
+  "r16",  "r17", "r18", "r19", "r20", "r21", "r22", "r23",
+  "r24",  "r25", "r26", "r27", "r28", "r29", "r30", "r31",
+  "r32",  "r33", "r34", "r35", "r36", "r37", "r38", "r39",
+  "r40",  "r41", "r42", "r43", "r44", "r45", "r46", "r47",
+  "r48",  "r49", "r50", "r51", "r52", "tp",  "sp",  "lr",
+  "sn",  "idn0", "idn1", "udn0", "udn1", "udn2", "udn3", "zero"
+};
+
+enum
+{
+  R_NONE = 0,
+  R_TILEGX_NONE = 0,
+  R_TILEGX_64 = 1,
+  R_TILEGX_32 = 2,
+  R_TILEGX_16 = 3,
+  R_TILEGX_8 = 4,
+  R_TILEGX_64_PCREL = 5,
+  R_TILEGX_32_PCREL = 6,
+  R_TILEGX_16_PCREL = 7,
+  R_TILEGX_8_PCREL = 8,
+  R_TILEGX_HW0 = 9,
+  R_TILEGX_HW1 = 10,
+  R_TILEGX_HW2 = 11,
+  R_TILEGX_HW3 = 12,
+  R_TILEGX_HW0_LAST = 13,
+  R_TILEGX_HW1_LAST = 14,
+  R_TILEGX_HW2_LAST = 15,
+  R_TILEGX_COPY = 16,
+  R_TILEGX_GLOB_DAT = 17,
+  R_TILEGX_JMP_SLOT = 18,
+  R_TILEGX_RELATIVE = 19,
+  R_TILEGX_BROFF_X1 = 20,
+  R_TILEGX_JUMPOFF_X1 = 21,
+  R_TILEGX_JUMPOFF_X1_PLT = 22,
+  R_TILEGX_IMM8_X0 = 23,
+  R_TILEGX_IMM8_Y0 = 24,
+  R_TILEGX_IMM8_X1 = 25,
+  R_TILEGX_IMM8_Y1 = 26,
+  R_TILEGX_DEST_IMM8_X1 = 27,
+  R_TILEGX_MT_IMM14_X1 = 28,
+  R_TILEGX_MF_IMM14_X1 = 29,
+  R_TILEGX_MMSTART_X0 = 30,
+  R_TILEGX_MMEND_X0 = 31,
+  R_TILEGX_SHAMT_X0 = 32,
+  R_TILEGX_SHAMT_X1 = 33,
+  R_TILEGX_SHAMT_Y0 = 34,
+  R_TILEGX_SHAMT_Y1 = 35,
+  R_TILEGX_IMM16_X0_HW0 = 36,
+  R_TILEGX_IMM16_X1_HW0 = 37,
+  R_TILEGX_IMM16_X0_HW1 = 38,
+  R_TILEGX_IMM16_X1_HW1 = 39,
+  R_TILEGX_IMM16_X0_HW2 = 40,
+  R_TILEGX_IMM16_X1_HW2 = 41,
+  R_TILEGX_IMM16_X0_HW3 = 42,
+  R_TILEGX_IMM16_X1_HW3 = 43,
+  R_TILEGX_IMM16_X0_HW0_LAST = 44,
+  R_TILEGX_IMM16_X1_HW0_LAST = 45,
+  R_TILEGX_IMM16_X0_HW1_LAST = 46,
+  R_TILEGX_IMM16_X1_HW1_LAST = 47,
+  R_TILEGX_IMM16_X0_HW2_LAST = 48,
+  R_TILEGX_IMM16_X1_HW2_LAST = 49,
+  R_TILEGX_IMM16_X0_HW0_PCREL = 50,
+  R_TILEGX_IMM16_X1_HW0_PCREL = 51,
+  R_TILEGX_IMM16_X0_HW1_PCREL = 52,
+  R_TILEGX_IMM16_X1_HW1_PCREL = 53,
+  R_TILEGX_IMM16_X0_HW2_PCREL = 54,
+  R_TILEGX_IMM16_X1_HW2_PCREL = 55,
+  R_TILEGX_IMM16_X0_HW3_PCREL = 56,
+  R_TILEGX_IMM16_X1_HW3_PCREL = 57,
+  R_TILEGX_IMM16_X0_HW0_LAST_PCREL = 58,
+  R_TILEGX_IMM16_X1_HW0_LAST_PCREL = 59,
+  R_TILEGX_IMM16_X0_HW1_LAST_PCREL = 60,
+  R_TILEGX_IMM16_X1_HW1_LAST_PCREL = 61,
+  R_TILEGX_IMM16_X0_HW2_LAST_PCREL = 62,
+  R_TILEGX_IMM16_X1_HW2_LAST_PCREL = 63,
+  R_TILEGX_IMM16_X0_HW0_GOT = 64,
+  R_TILEGX_IMM16_X1_HW0_GOT = 65,
+
+  R_TILEGX_IMM16_X0_HW0_PLT_PCREL = 66,
+  R_TILEGX_IMM16_X1_HW0_PLT_PCREL = 67,
+  R_TILEGX_IMM16_X0_HW1_PLT_PCREL = 68,
+  R_TILEGX_IMM16_X1_HW1_PLT_PCREL = 69,
+  R_TILEGX_IMM16_X0_HW2_PLT_PCREL = 70,
+  R_TILEGX_IMM16_X1_HW2_PLT_PCREL = 71,
+
+  R_TILEGX_IMM16_X0_HW0_LAST_GOT = 72,
+  R_TILEGX_IMM16_X1_HW0_LAST_GOT = 73,
+  R_TILEGX_IMM16_X0_HW1_LAST_GOT = 74,
+  R_TILEGX_IMM16_X1_HW1_LAST_GOT = 75,
+  R_TILEGX_IMM16_X0_HW0_TLS_GD = 78,
+  R_TILEGX_IMM16_X1_HW0_TLS_GD = 79,
+  R_TILEGX_IMM16_X0_HW0_TLS_LE = 80,
+  R_TILEGX_IMM16_X1_HW0_TLS_LE = 81,
+  R_TILEGX_IMM16_X0_HW0_LAST_TLS_LE = 82,
+  R_TILEGX_IMM16_X1_HW0_LAST_TLS_LE = 83,
+  R_TILEGX_IMM16_X0_HW1_LAST_TLS_LE = 84,
+  R_TILEGX_IMM16_X1_HW1_LAST_TLS_LE = 85,
+  R_TILEGX_IMM16_X0_HW0_LAST_TLS_GD = 86,
+  R_TILEGX_IMM16_X1_HW0_LAST_TLS_GD = 87,
+  R_TILEGX_IMM16_X0_HW1_LAST_TLS_GD = 88,
+  R_TILEGX_IMM16_X1_HW1_LAST_TLS_GD = 89,
+  R_TILEGX_IMM16_X0_HW0_TLS_IE = 92,
+  R_TILEGX_IMM16_X1_HW0_TLS_IE = 93,
+
+  R_TILEGX_IMM16_X0_HW0_LAST_PLT_PCREL = 94,
+  R_TILEGX_IMM16_X1_HW0_LAST_PLT_PCREL = 95,
+  R_TILEGX_IMM16_X0_HW1_LAST_PLT_PCREL = 96,
+  R_TILEGX_IMM16_X1_HW1_LAST_PLT_PCREL = 97,
+  R_TILEGX_IMM16_X0_HW2_LAST_PLT_PCREL = 98,
+  R_TILEGX_IMM16_X1_HW2_LAST_PLT_PCREL = 99,
+
+  R_TILEGX_IMM16_X0_HW0_LAST_TLS_IE = 100,
+  R_TILEGX_IMM16_X1_HW0_LAST_TLS_IE = 101,
+  R_TILEGX_IMM16_X0_HW1_LAST_TLS_IE = 102,
+  R_TILEGX_IMM16_X1_HW1_LAST_TLS_IE = 103,
+  R_TILEGX_TLS_DTPMOD64 = 106,
+  R_TILEGX_TLS_DTPOFF64 = 107,
+  R_TILEGX_TLS_TPOFF64 = 108,
+  R_TILEGX_TLS_DTPMOD32 = 109,
+  R_TILEGX_TLS_DTPOFF32 = 110,
+  R_TILEGX_TLS_TPOFF32 = 111,
+  R_TILEGX_TLS_GD_CALL = 112,
+  R_TILEGX_IMM8_X0_TLS_GD_ADD = 113,
+  R_TILEGX_IMM8_X1_TLS_GD_ADD = 114,
+  R_TILEGX_IMM8_Y0_TLS_GD_ADD = 115,
+  R_TILEGX_IMM8_Y1_TLS_GD_ADD = 116,
+  R_TILEGX_TLS_IE_LOAD = 117,
+  R_TILEGX_IMM8_X0_TLS_ADD = 118,
+  R_TILEGX_IMM8_X1_TLS_ADD = 119,
+  R_TILEGX_IMM8_Y0_TLS_ADD = 120,
+  R_TILEGX_IMM8_Y1_TLS_ADD = 121,
+  R_TILEGX_GNU_VTINHERIT = 128,
+  R_TILEGX_GNU_VTENTRY = 129,
+  R_TILEGX_IRELATIVE = 130,
+  R_TILEGX_NUM = 131
+};
+
+typedef enum
+{
+  TILEGX_PIPELINE_X0,
+  TILEGX_PIPELINE_X1,
+  TILEGX_PIPELINE_Y0,
+  TILEGX_PIPELINE_Y1,
+  TILEGX_PIPELINE_Y2,
+} tilegx_pipeline;
+
+typedef unsigned long long tilegx_bundle_bits;
+
+/* These are the bits that determine if a bundle is in the X encoding. */
+#define TILEGX_BUNDLE_MODE_MASK ((tilegx_bundle_bits)3 << 62)
+
+enum
+{
+  /* Maximum number of instructions in a bundle (2 for X, 3 for Y). */
+  TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE = 3,
+
+  /* How many different pipeline encodings are there? X0, X1, Y0, Y1, Y2. */
+  TILEGX_NUM_PIPELINE_ENCODINGS = 5,
+
+  /* Log base 2 of TILEGX_BUNDLE_SIZE_IN_BYTES. */
+  TILEGX_LOG2_BUNDLE_SIZE_IN_BYTES = 3,
+
+  /* Instructions take this many bytes. */
+  TILEGX_BUNDLE_SIZE_IN_BYTES = 1 << TILEGX_LOG2_BUNDLE_SIZE_IN_BYTES,
+
+  /* Log base 2 of TILEGX_BUNDLE_ALIGNMENT_IN_BYTES. */
+  TILEGX_LOG2_BUNDLE_ALIGNMENT_IN_BYTES = 3,
+
+  /* Bundles should be aligned modulo this number of bytes. */
+  TILEGX_BUNDLE_ALIGNMENT_IN_BYTES =
+    (1 << TILEGX_LOG2_BUNDLE_ALIGNMENT_IN_BYTES),
+
+  /* Number of registers (some are magic, such as network I/O). */
+  TILEGX_NUM_REGISTERS = 64,
+};
+
+/* Make a few "tile_" variables to simplify common code between
+   architectures.  */
+
+typedef tilegx_bundle_bits tile_bundle_bits;
+#define TILE_BUNDLE_SIZE_IN_BYTES TILEGX_BUNDLE_SIZE_IN_BYTES
+#define TILE_BUNDLE_ALIGNMENT_IN_BYTES TILEGX_BUNDLE_ALIGNMENT_IN_BYTES
+#define TILE_LOG2_BUNDLE_ALIGNMENT_IN_BYTES \
+  TILEGX_LOG2_BUNDLE_ALIGNMENT_IN_BYTES
+
+/* 64-bit pattern for a { bpt ; nop } bundle. */
+#define TILEGX_BPT_BUNDLE 0x286a44ae51485000ULL
+
+typedef enum
+{
+  TILEGX_OP_TYPE_REGISTER,
+  TILEGX_OP_TYPE_IMMEDIATE,
+  TILEGX_OP_TYPE_ADDRESS,
+  TILEGX_OP_TYPE_SPR
+} tilegx_operand_type;
+
+struct tilegx_operand
+{
+  /* Is this operand a register, immediate or address? */
+  tilegx_operand_type type;
+
+  /* The default relocation type for this operand.  */
+  signed int default_reloc : 16;
+
+  /* How many bits is this value? (used for range checking) */
+  unsigned int num_bits : 5;
+
+  /* Is the value signed? (used for range checking) */
+  unsigned int is_signed : 1;
+
+  /* Is this operand a source register? */
+  unsigned int is_src_reg : 1;
+
+  /* Is this operand written? (i.e. is it a destination register) */
+  unsigned int is_dest_reg : 1;
+
+  /* Is this operand PC-relative? */
+  unsigned int is_pc_relative : 1;
+
+  /* By how many bits do we right shift the value before inserting? */
+  unsigned int rightshift : 2;
+
+  /* Return the bits for this operand to be ORed into an existing bundle. */
+  tilegx_bundle_bits (*insert) (int op);
+
+  /* Extract this operand and return it. */
+  unsigned int (*extract) (tilegx_bundle_bits bundle);
+};
+
+typedef enum
+{
+  TILEGX_OPC_BPT,
+  TILEGX_OPC_INFO,
+  TILEGX_OPC_INFOL,
+  TILEGX_OPC_LD4S_TLS,
+  TILEGX_OPC_LD_TLS,
+  TILEGX_OPC_MOVE,
+  TILEGX_OPC_MOVEI,
+  TILEGX_OPC_MOVELI,
+  TILEGX_OPC_PREFETCH,
+  TILEGX_OPC_PREFETCH_ADD_L1,
+  TILEGX_OPC_PREFETCH_ADD_L1_FAULT,
+  TILEGX_OPC_PREFETCH_ADD_L2,
+  TILEGX_OPC_PREFETCH_ADD_L2_FAULT,
+  TILEGX_OPC_PREFETCH_ADD_L3,
+  TILEGX_OPC_PREFETCH_ADD_L3_FAULT,
+  TILEGX_OPC_PREFETCH_L1,
+  TILEGX_OPC_PREFETCH_L1_FAULT,
+  TILEGX_OPC_PREFETCH_L2,
+  TILEGX_OPC_PREFETCH_L2_FAULT,
+  TILEGX_OPC_PREFETCH_L3,
+  TILEGX_OPC_PREFETCH_L3_FAULT,
+  TILEGX_OPC_RAISE,
+  TILEGX_OPC_ADD,
+  TILEGX_OPC_ADDI,
+  TILEGX_OPC_ADDLI,
+  TILEGX_OPC_ADDX,
+  TILEGX_OPC_ADDXI,
+  TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXSC,
+  TILEGX_OPC_AND,
+  TILEGX_OPC_ANDI,
+  TILEGX_OPC_BEQZ,
+  TILEGX_OPC_BEQZT,
+  TILEGX_OPC_BFEXTS,
+  TILEGX_OPC_BFEXTU,
+  TILEGX_OPC_BFINS,
+  TILEGX_OPC_BGEZ,
+  TILEGX_OPC_BGEZT,
+  TILEGX_OPC_BGTZ,
+  TILEGX_OPC_BGTZT,
+  TILEGX_OPC_BLBC,
+  TILEGX_OPC_BLBCT,
+  TILEGX_OPC_BLBS,
+  TILEGX_OPC_BLBST,
+  TILEGX_OPC_BLEZ,
+  TILEGX_OPC_BLEZT,
+  TILEGX_OPC_BLTZ,
+  TILEGX_OPC_BLTZT,
+  TILEGX_OPC_BNEZ,
+  TILEGX_OPC_BNEZT,
+  TILEGX_OPC_CLZ,
+  TILEGX_OPC_CMOVEQZ,
+  TILEGX_OPC_CMOVNEZ,
+  TILEGX_OPC_CMPEQ,
+  TILEGX_OPC_CMPEQI,
+  TILEGX_OPC_CMPEXCH,
+  TILEGX_OPC_CMPEXCH4,
+  TILEGX_OPC_CMPLES,
+  TILEGX_OPC_CMPLEU,
+  TILEGX_OPC_CMPLTS,
+  TILEGX_OPC_CMPLTSI,
+  TILEGX_OPC_CMPLTU,
+  TILEGX_OPC_CMPLTUI,
+  TILEGX_OPC_CMPNE,
+  TILEGX_OPC_CMUL,
+  TILEGX_OPC_CMULA,
+  TILEGX_OPC_CMULAF,
+  TILEGX_OPC_CMULF,
+  TILEGX_OPC_CMULFR,
+  TILEGX_OPC_CMULH,
+  TILEGX_OPC_CMULHR,
+  TILEGX_OPC_CRC32_32,
+  TILEGX_OPC_CRC32_8,
+  TILEGX_OPC_CTZ,
+  TILEGX_OPC_DBLALIGN,
+  TILEGX_OPC_DBLALIGN2,
+  TILEGX_OPC_DBLALIGN4,
+  TILEGX_OPC_DBLALIGN6,
+  TILEGX_OPC_DRAIN,
+  TILEGX_OPC_DTLBPR,
+  TILEGX_OPC_EXCH,
+  TILEGX_OPC_EXCH4,
+  TILEGX_OPC_FDOUBLE_ADD_FLAGS,
+  TILEGX_OPC_FDOUBLE_ADDSUB,
+  TILEGX_OPC_FDOUBLE_MUL_FLAGS,
+  TILEGX_OPC_FDOUBLE_PACK1,
+  TILEGX_OPC_FDOUBLE_PACK2,
+  TILEGX_OPC_FDOUBLE_SUB_FLAGS,
+  TILEGX_OPC_FDOUBLE_UNPACK_MAX,
+  TILEGX_OPC_FDOUBLE_UNPACK_MIN,
+  TILEGX_OPC_FETCHADD,
+  TILEGX_OPC_FETCHADD4,
+  TILEGX_OPC_FETCHADDGEZ,
+  TILEGX_OPC_FETCHADDGEZ4,
+  TILEGX_OPC_FETCHAND,
+  TILEGX_OPC_FETCHAND4,
+  TILEGX_OPC_FETCHOR,
+  TILEGX_OPC_FETCHOR4,
+  TILEGX_OPC_FINV,
+  TILEGX_OPC_FLUSH,
+  TILEGX_OPC_FLUSHWB,
+  TILEGX_OPC_FNOP,
+  TILEGX_OPC_FSINGLE_ADD1,
+  TILEGX_OPC_FSINGLE_ADDSUB2,
+  TILEGX_OPC_FSINGLE_MUL1,
+  TILEGX_OPC_FSINGLE_MUL2,
+  TILEGX_OPC_FSINGLE_PACK1,
+  TILEGX_OPC_FSINGLE_PACK2,
+  TILEGX_OPC_FSINGLE_SUB1,
+  TILEGX_OPC_ICOH,
+  TILEGX_OPC_ILL,
+  TILEGX_OPC_INV,
+  TILEGX_OPC_IRET,
+  TILEGX_OPC_J,
+  TILEGX_OPC_JAL,
+  TILEGX_OPC_JALR,
+  TILEGX_OPC_JALRP,
+  TILEGX_OPC_JR,
+  TILEGX_OPC_JRP,
+  TILEGX_OPC_LD,
+  TILEGX_OPC_LD1S,
+  TILEGX_OPC_LD1S_ADD,
+  TILEGX_OPC_LD1U,
+  TILEGX_OPC_LD1U_ADD,
+  TILEGX_OPC_LD2S,
+  TILEGX_OPC_LD2S_ADD,
+  TILEGX_OPC_LD2U,
+  TILEGX_OPC_LD2U_ADD,
+  TILEGX_OPC_LD4S,
+  TILEGX_OPC_LD4S_ADD,
+  TILEGX_OPC_LD4U,
+  TILEGX_OPC_LD4U_ADD,
+  TILEGX_OPC_LD_ADD,
+  TILEGX_OPC_LDNA,
+  TILEGX_OPC_LDNA_ADD,
+  TILEGX_OPC_LDNT,
+  TILEGX_OPC_LDNT1S,
+  TILEGX_OPC_LDNT1S_ADD,
+  TILEGX_OPC_LDNT1U,
+  TILEGX_OPC_LDNT1U_ADD,
+  TILEGX_OPC_LDNT2S,
+  TILEGX_OPC_LDNT2S_ADD,
+  TILEGX_OPC_LDNT2U,
+  TILEGX_OPC_LDNT2U_ADD,
+  TILEGX_OPC_LDNT4S,
+  TILEGX_OPC_LDNT4S_ADD,
+  TILEGX_OPC_LDNT4U,
+  TILEGX_OPC_LDNT4U_ADD,
+  TILEGX_OPC_LDNT_ADD,
+  TILEGX_OPC_LNK,
+  TILEGX_OPC_MF,
+  TILEGX_OPC_MFSPR,
+  TILEGX_OPC_MM,
+  TILEGX_OPC_MNZ,
+  TILEGX_OPC_MTSPR,
+  TILEGX_OPC_MUL_HS_HS,
+  TILEGX_OPC_MUL_HS_HU,
+  TILEGX_OPC_MUL_HS_LS,
+  TILEGX_OPC_MUL_HS_LU,
+  TILEGX_OPC_MUL_HU_HU,
+  TILEGX_OPC_MUL_HU_LS,
+  TILEGX_OPC_MUL_HU_LU,
+  TILEGX_OPC_MUL_LS_LS,
+  TILEGX_OPC_MUL_LS_LU,
+  TILEGX_OPC_MUL_LU_LU,
+  TILEGX_OPC_MULA_HS_HS,
+  TILEGX_OPC_MULA_HS_HU,
+  TILEGX_OPC_MULA_HS_LS,
+  TILEGX_OPC_MULA_HS_LU,
+  TILEGX_OPC_MULA_HU_HU,
+  TILEGX_OPC_MULA_HU_LS,
+  TILEGX_OPC_MULA_HU_LU,
+  TILEGX_OPC_MULA_LS_LS,
+  TILEGX_OPC_MULA_LS_LU,
+  TILEGX_OPC_MULA_LU_LU,
+  TILEGX_OPC_MULAX,
+  TILEGX_OPC_MULX,
+  TILEGX_OPC_MZ,
+  TILEGX_OPC_NAP,
+  TILEGX_OPC_NOP,
+  TILEGX_OPC_NOR,
+  TILEGX_OPC_OR,
+  TILEGX_OPC_ORI,
+  TILEGX_OPC_PCNT,
+  TILEGX_OPC_REVBITS,
+  TILEGX_OPC_REVBYTES,
+  TILEGX_OPC_ROTL,
+  TILEGX_OPC_ROTLI,
+  TILEGX_OPC_SHL,
+  TILEGX_OPC_SHL16INSLI,
+  TILEGX_OPC_SHL1ADD,
+  TILEGX_OPC_SHL1ADDX,
+  TILEGX_OPC_SHL2ADD,
+  TILEGX_OPC_SHL2ADDX,
+  TILEGX_OPC_SHL3ADD,
+  TILEGX_OPC_SHL3ADDX,
+  TILEGX_OPC_SHLI,
+  TILEGX_OPC_SHLX,
+  TILEGX_OPC_SHLXI,
+  TILEGX_OPC_SHRS,
+  TILEGX_OPC_SHRSI,
+  TILEGX_OPC_SHRU,
+  TILEGX_OPC_SHRUI,
+  TILEGX_OPC_SHRUX,
+  TILEGX_OPC_SHRUXI,
+  TILEGX_OPC_SHUFFLEBYTES,
+  TILEGX_OPC_ST,
+  TILEGX_OPC_ST1,
+  TILEGX_OPC_ST1_ADD,
+  TILEGX_OPC_ST2,
+  TILEGX_OPC_ST2_ADD,
+  TILEGX_OPC_ST4,
+  TILEGX_OPC_ST4_ADD,
+  TILEGX_OPC_ST_ADD,
+  TILEGX_OPC_STNT,
+  TILEGX_OPC_STNT1,
+  TILEGX_OPC_STNT1_ADD,
+  TILEGX_OPC_STNT2,
+  TILEGX_OPC_STNT2_ADD,
+  TILEGX_OPC_STNT4,
+  TILEGX_OPC_STNT4_ADD,
+  TILEGX_OPC_STNT_ADD,
+  TILEGX_OPC_SUB,
+  TILEGX_OPC_SUBX,
+  TILEGX_OPC_SUBXSC,
+  TILEGX_OPC_SWINT0,
+  TILEGX_OPC_SWINT1,
+  TILEGX_OPC_SWINT2,
+  TILEGX_OPC_SWINT3,
+  TILEGX_OPC_TBLIDXB0,
+  TILEGX_OPC_TBLIDXB1,
+  TILEGX_OPC_TBLIDXB2,
+  TILEGX_OPC_TBLIDXB3,
+  TILEGX_OPC_V1ADD,
+  TILEGX_OPC_V1ADDI,
+  TILEGX_OPC_V1ADDUC,
+  TILEGX_OPC_V1ADIFFU,
+  TILEGX_OPC_V1AVGU,
+  TILEGX_OPC_V1CMPEQ,
+  TILEGX_OPC_V1CMPEQI,
+  TILEGX_OPC_V1CMPLES,
+  TILEGX_OPC_V1CMPLEU,
+  TILEGX_OPC_V1CMPLTS,
+  TILEGX_OPC_V1CMPLTSI,
+  TILEGX_OPC_V1CMPLTU,
+  TILEGX_OPC_V1CMPLTUI,
+  TILEGX_OPC_V1CMPNE,
+  TILEGX_OPC_V1DDOTPU,
+  TILEGX_OPC_V1DDOTPUA,
+  TILEGX_OPC_V1DDOTPUS,
+  TILEGX_OPC_V1DDOTPUSA,
+  TILEGX_OPC_V1DOTP,
+  TILEGX_OPC_V1DOTPA,
+  TILEGX_OPC_V1DOTPU,
+  TILEGX_OPC_V1DOTPUA,
+  TILEGX_OPC_V1DOTPUS,
+  TILEGX_OPC_V1DOTPUSA,
+  TILEGX_OPC_V1INT_H,
+  TILEGX_OPC_V1INT_L,
+  TILEGX_OPC_V1MAXU,
+  TILEGX_OPC_V1MAXUI,
+  TILEGX_OPC_V1MINU,
+  TILEGX_OPC_V1MINUI,
+  TILEGX_OPC_V1MNZ,
+  TILEGX_OPC_V1MULTU,
+  TILEGX_OPC_V1MULU,
+  TILEGX_OPC_V1MULUS,
+  TILEGX_OPC_V1MZ,
+  TILEGX_OPC_V1SADAU,
+  TILEGX_OPC_V1SADU,
+  TILEGX_OPC_V1SHL,
+  TILEGX_OPC_V1SHLI,
+  TILEGX_OPC_V1SHRS,
+  TILEGX_OPC_V1SHRSI,
+  TILEGX_OPC_V1SHRU,
+  TILEGX_OPC_V1SHRUI,
+  TILEGX_OPC_V1SUB,
+  TILEGX_OPC_V1SUBUC,
+  TILEGX_OPC_V2ADD,
+  TILEGX_OPC_V2ADDI,
+  TILEGX_OPC_V2ADDSC,
+  TILEGX_OPC_V2ADIFFS,
+  TILEGX_OPC_V2AVGS,
+  TILEGX_OPC_V2CMPEQ,
+  TILEGX_OPC_V2CMPEQI,
+  TILEGX_OPC_V2CMPLES,
+  TILEGX_OPC_V2CMPLEU,
+  TILEGX_OPC_V2CMPLTS,
+  TILEGX_OPC_V2CMPLTSI,
+  TILEGX_OPC_V2CMPLTU,
+  TILEGX_OPC_V2CMPLTUI,
+  TILEGX_OPC_V2CMPNE,
+  TILEGX_OPC_V2DOTP,
+  TILEGX_OPC_V2DOTPA,
+  TILEGX_OPC_V2INT_H,
+  TILEGX_OPC_V2INT_L,
+  TILEGX_OPC_V2MAXS,
+  TILEGX_OPC_V2MAXSI,
+  TILEGX_OPC_V2MINS,
+  TILEGX_OPC_V2MINSI,
+  TILEGX_OPC_V2MNZ,
+  TILEGX_OPC_V2MULFSC,
+  TILEGX_OPC_V2MULS,
+  TILEGX_OPC_V2MULTS,
+  TILEGX_OPC_V2MZ,
+  TILEGX_OPC_V2PACKH,
+  TILEGX_OPC_V2PACKL,
+  TILEGX_OPC_V2PACKUC,
+  TILEGX_OPC_V2SADAS,
+  TILEGX_OPC_V2SADAU,
+  TILEGX_OPC_V2SADS,
+  TILEGX_OPC_V2SADU,
+  TILEGX_OPC_V2SHL,
+  TILEGX_OPC_V2SHLI,
+  TILEGX_OPC_V2SHLSC,
+  TILEGX_OPC_V2SHRS,
+  TILEGX_OPC_V2SHRSI,
+  TILEGX_OPC_V2SHRU,
+  TILEGX_OPC_V2SHRUI,
+  TILEGX_OPC_V2SUB,
+  TILEGX_OPC_V2SUBSC,
+  TILEGX_OPC_V4ADD,
+  TILEGX_OPC_V4ADDSC,
+  TILEGX_OPC_V4INT_H,
+  TILEGX_OPC_V4INT_L,
+  TILEGX_OPC_V4PACKSC,
+  TILEGX_OPC_V4SHL,
+  TILEGX_OPC_V4SHLSC,
+  TILEGX_OPC_V4SHRS,
+  TILEGX_OPC_V4SHRU,
+  TILEGX_OPC_V4SUB,
+  TILEGX_OPC_V4SUBSC,
+  TILEGX_OPC_WH64,
+  TILEGX_OPC_XOR,
+  TILEGX_OPC_XORI,
+  TILEGX_OPC_NONE
+} tilegx_mnemonic;
+
+enum
+{
+  TILEGX_MAX_OPERANDS = 4 /* bfexts */
+};
+
+struct tilegx_opcode
+{
+  /* The opcode mnemonic, e.g. "add" */
+  const char *name;
+
+  /* The enum value for this mnemonic. */
+  tilegx_mnemonic mnemonic;
+
+  /* A bit mask of which of the five pipes this instruction
+     is compatible with:
+     X0  0x01
+     X1  0x02
+     Y0  0x04
+     Y1  0x08
+     Y2  0x10 */
+  unsigned char pipes;
+
+  /* How many operands are there? */
+  unsigned char num_operands;
+
+  /* Which register does this write implicitly, or TREG_ZERO if none? */
+  unsigned char implicitly_written_register;
+
+  /* Can this be bundled with other instructions (almost always true). */
+  unsigned char can_bundle;
+
+  /* The description of the operands. Each of these is an
+   * index into the tilegx_operands[] table. */
+  unsigned char operands[TILEGX_NUM_PIPELINE_ENCODINGS][TILEGX_MAX_OPERANDS];
+
+  /* A mask of which bits have predefined values for each pipeline.
+   * This is useful for disassembly. */
+  tilegx_bundle_bits fixed_bit_masks[TILEGX_NUM_PIPELINE_ENCODINGS];
+
+  /* For each bit set in fixed_bit_masks, what the value is for this
+   * instruction. */
+  tilegx_bundle_bits fixed_bit_values[TILEGX_NUM_PIPELINE_ENCODINGS];
+};
+
+/* Used for non-textual disassembly into structs. */
+struct tilegx_decoded_instruction
+{
+  const struct tilegx_opcode *opcode;
+  const struct tilegx_operand *operands[TILEGX_MAX_OPERANDS];
+  long long operand_values[TILEGX_MAX_OPERANDS];
+};
+
+enum
+{
+  ADDI_IMM8_OPCODE_X0 = 1,
+  ADDI_IMM8_OPCODE_X1 = 1,
+  ADDI_OPCODE_Y0 = 0,
+  ADDI_OPCODE_Y1 = 1,
+  ADDLI_OPCODE_X0 = 1,
+  ADDLI_OPCODE_X1 = 0,
+  ADDXI_IMM8_OPCODE_X0 = 2,
+  ADDXI_IMM8_OPCODE_X1 = 2,
+  ADDXI_OPCODE_Y0 = 1,
+  ADDXI_OPCODE_Y1 = 2,
+  ADDXLI_OPCODE_X0 = 2,
+  ADDXLI_OPCODE_X1 = 1,
+  ADDXSC_RRR_0_OPCODE_X0 = 1,
+  ADDXSC_RRR_0_OPCODE_X1 = 1,
+  ADDX_RRR_0_OPCODE_X0 = 2,
+  ADDX_RRR_0_OPCODE_X1 = 2,
+  ADDX_RRR_0_OPCODE_Y0 = 0,
+  ADDX_SPECIAL_0_OPCODE_Y1 = 0,
+  ADD_RRR_0_OPCODE_X0 = 3,
+  ADD_RRR_0_OPCODE_X1 = 3,
+  ADD_RRR_0_OPCODE_Y0 = 1,
+  ADD_SPECIAL_0_OPCODE_Y1 = 1,
+  ANDI_IMM8_OPCODE_X0 = 3,
+  ANDI_IMM8_OPCODE_X1 = 3,
+  ANDI_OPCODE_Y0 = 2,
+  ANDI_OPCODE_Y1 = 3,
+  AND_RRR_0_OPCODE_X0 = 4,
+  AND_RRR_0_OPCODE_X1 = 4,
+  AND_RRR_5_OPCODE_Y0 = 0,
+  AND_RRR_5_OPCODE_Y1 = 0,
+  BEQZT_BRANCH_OPCODE_X1 = 16,
+  BEQZ_BRANCH_OPCODE_X1 = 17,
+  BFEXTS_BF_OPCODE_X0 = 4,
+  BFEXTU_BF_OPCODE_X0 = 5,
+  BFINS_BF_OPCODE_X0 = 6,
+  BF_OPCODE_X0 = 3,
+  BGEZT_BRANCH_OPCODE_X1 = 18,
+  BGEZ_BRANCH_OPCODE_X1 = 19,
+  BGTZT_BRANCH_OPCODE_X1 = 20,
+  BGTZ_BRANCH_OPCODE_X1 = 21,
+  BLBCT_BRANCH_OPCODE_X1 = 22,
+  BLBC_BRANCH_OPCODE_X1 = 23,
+  BLBST_BRANCH_OPCODE_X1 = 24,
+  BLBS_BRANCH_OPCODE_X1 = 25,
+  BLEZT_BRANCH_OPCODE_X1 = 26,
+  BLEZ_BRANCH_OPCODE_X1 = 27,
+  BLTZT_BRANCH_OPCODE_X1 = 28,
+  BLTZ_BRANCH_OPCODE_X1 = 29,
+  BNEZT_BRANCH_OPCODE_X1 = 30,
+  BNEZ_BRANCH_OPCODE_X1 = 31,
+  BRANCH_OPCODE_X1 = 2,
+  CMOVEQZ_RRR_0_OPCODE_X0 = 5,
+  CMOVEQZ_RRR_4_OPCODE_Y0 = 0,
+  CMOVNEZ_RRR_0_OPCODE_X0 = 6,
+  CMOVNEZ_RRR_4_OPCODE_Y0 = 1,
+  CMPEQI_IMM8_OPCODE_X0 = 4,
+  CMPEQI_IMM8_OPCODE_X1 = 4,
+  CMPEQI_OPCODE_Y0 = 3,
+  CMPEQI_OPCODE_Y1 = 4,
+  CMPEQ_RRR_0_OPCODE_X0 = 7,
+  CMPEQ_RRR_0_OPCODE_X1 = 5,
+  CMPEQ_RRR_3_OPCODE_Y0 = 0,
+  CMPEQ_RRR_3_OPCODE_Y1 = 2,
+  CMPEXCH4_RRR_0_OPCODE_X1 = 6,
+  CMPEXCH_RRR_0_OPCODE_X1 = 7,
+  CMPLES_RRR_0_OPCODE_X0 = 8,
+  CMPLES_RRR_0_OPCODE_X1 = 8,
+  CMPLES_RRR_2_OPCODE_Y0 = 0,
+  CMPLES_RRR_2_OPCODE_Y1 = 0,
+  CMPLEU_RRR_0_OPCODE_X0 = 9,
+  CMPLEU_RRR_0_OPCODE_X1 = 9,
+  CMPLEU_RRR_2_OPCODE_Y0 = 1,
+  CMPLEU_RRR_2_OPCODE_Y1 = 1,
+  CMPLTSI_IMM8_OPCODE_X0 = 5,
+  CMPLTSI_IMM8_OPCODE_X1 = 5,
+  CMPLTSI_OPCODE_Y0 = 4,
+  CMPLTSI_OPCODE_Y1 = 5,
+  CMPLTS_RRR_0_OPCODE_X0 = 10,
+  CMPLTS_RRR_0_OPCODE_X1 = 10,
+  CMPLTS_RRR_2_OPCODE_Y0 = 2,
+  CMPLTS_RRR_2_OPCODE_Y1 = 2,
+  CMPLTUI_IMM8_OPCODE_X0 = 6,
+  CMPLTUI_IMM8_OPCODE_X1 = 6,
+  CMPLTU_RRR_0_OPCODE_X0 = 11,
+  CMPLTU_RRR_0_OPCODE_X1 = 11,
+  CMPLTU_RRR_2_OPCODE_Y0 = 3,
+  CMPLTU_RRR_2_OPCODE_Y1 = 3,
+  CMPNE_RRR_0_OPCODE_X0 = 12,
+  CMPNE_RRR_0_OPCODE_X1 = 12,
+  CMPNE_RRR_3_OPCODE_Y0 = 1,
+  CMPNE_RRR_3_OPCODE_Y1 = 3,
+  CMULAF_RRR_0_OPCODE_X0 = 13,
+  CMULA_RRR_0_OPCODE_X0 = 14,
+  CMULFR_RRR_0_OPCODE_X0 = 15,
+  CMULF_RRR_0_OPCODE_X0 = 16,
+  CMULHR_RRR_0_OPCODE_X0 = 17,
+  CMULH_RRR_0_OPCODE_X0 = 18,
+  CMUL_RRR_0_OPCODE_X0 = 19,
+  CNTLZ_UNARY_OPCODE_X0 = 1,
+  CNTLZ_UNARY_OPCODE_Y0 = 1,
+  CNTTZ_UNARY_OPCODE_X0 = 2,
+  CNTTZ_UNARY_OPCODE_Y0 = 2,
+  CRC32_32_RRR_0_OPCODE_X0 = 20,
+  CRC32_8_RRR_0_OPCODE_X0 = 21,
+  DBLALIGN2_RRR_0_OPCODE_X0 = 22,
+  DBLALIGN2_RRR_0_OPCODE_X1 = 13,
+  DBLALIGN4_RRR_0_OPCODE_X0 = 23,
+  DBLALIGN4_RRR_0_OPCODE_X1 = 14,
+  DBLALIGN6_RRR_0_OPCODE_X0 = 24,
+  DBLALIGN6_RRR_0_OPCODE_X1 = 15,
+  DBLALIGN_RRR_0_OPCODE_X0 = 25,
+  DRAIN_UNARY_OPCODE_X1 = 1,
+  DTLBPR_UNARY_OPCODE_X1 = 2,
+  EXCH4_RRR_0_OPCODE_X1 = 16,
+  EXCH_RRR_0_OPCODE_X1 = 17,
+  FDOUBLE_ADDSUB_RRR_0_OPCODE_X0 = 26,
+  FDOUBLE_ADD_FLAGS_RRR_0_OPCODE_X0 = 27,
+  FDOUBLE_MUL_FLAGS_RRR_0_OPCODE_X0 = 28,
+  FDOUBLE_PACK1_RRR_0_OPCODE_X0 = 29,
+  FDOUBLE_PACK2_RRR_0_OPCODE_X0 = 30,
+  FDOUBLE_SUB_FLAGS_RRR_0_OPCODE_X0 = 31,
+  FDOUBLE_UNPACK_MAX_RRR_0_OPCODE_X0 = 32,
+  FDOUBLE_UNPACK_MIN_RRR_0_OPCODE_X0 = 33,
+  FETCHADD4_RRR_0_OPCODE_X1 = 18,
+  FETCHADDGEZ4_RRR_0_OPCODE_X1 = 19,
+  FETCHADDGEZ_RRR_0_OPCODE_X1 = 20,
+  FETCHADD_RRR_0_OPCODE_X1 = 21,
+  FETCHAND4_RRR_0_OPCODE_X1 = 22,
+  FETCHAND_RRR_0_OPCODE_X1 = 23,
+  FETCHOR4_RRR_0_OPCODE_X1 = 24,
+  FETCHOR_RRR_0_OPCODE_X1 = 25,
+  FINV_UNARY_OPCODE_X1 = 3,
+  FLUSHWB_UNARY_OPCODE_X1 = 4,
+  FLUSH_UNARY_OPCODE_X1 = 5,
+  FNOP_UNARY_OPCODE_X0 = 3,
+  FNOP_UNARY_OPCODE_X1 = 6,
+  FNOP_UNARY_OPCODE_Y0 = 3,
+  FNOP_UNARY_OPCODE_Y1 = 8,
+  FSINGLE_ADD1_RRR_0_OPCODE_X0 = 34,
+  FSINGLE_ADDSUB2_RRR_0_OPCODE_X0 = 35,
+  FSINGLE_MUL1_RRR_0_OPCODE_X0 = 36,
+  FSINGLE_MUL2_RRR_0_OPCODE_X0 = 37,
+  FSINGLE_PACK1_UNARY_OPCODE_X0 = 4,
+  FSINGLE_PACK1_UNARY_OPCODE_Y0 = 4,
+  FSINGLE_PACK2_RRR_0_OPCODE_X0 = 38,
+  FSINGLE_SUB1_RRR_0_OPCODE_X0 = 39,
+  ICOH_UNARY_OPCODE_X1 = 7,
+  ILL_UNARY_OPCODE_X1 = 8,
+  ILL_UNARY_OPCODE_Y1 = 9,
+  IMM8_OPCODE_X0 = 4,
+  IMM8_OPCODE_X1 = 3,
+  INV_UNARY_OPCODE_X1 = 9,
+  IRET_UNARY_OPCODE_X1 = 10,
+  JALRP_UNARY_OPCODE_X1 = 11,
+  JALRP_UNARY_OPCODE_Y1 = 10,
+  JALR_UNARY_OPCODE_X1 = 12,
+  JALR_UNARY_OPCODE_Y1 = 11,
+  JAL_JUMP_OPCODE_X1 = 0,
+  JRP_UNARY_OPCODE_X1 = 13,
+  JRP_UNARY_OPCODE_Y1 = 12,
+  JR_UNARY_OPCODE_X1 = 14,
+  JR_UNARY_OPCODE_Y1 = 13,
+  JUMP_OPCODE_X1 = 4,
+  J_JUMP_OPCODE_X1 = 1,
+  LD1S_ADD_IMM8_OPCODE_X1 = 7,
+  LD1S_OPCODE_Y2 = 0,
+  LD1S_UNARY_OPCODE_X1 = 15,
+  LD1U_ADD_IMM8_OPCODE_X1 = 8,
+  LD1U_OPCODE_Y2 = 1,
+  LD1U_UNARY_OPCODE_X1 = 16,
+  LD2S_ADD_IMM8_OPCODE_X1 = 9,
+  LD2S_OPCODE_Y2 = 2,
+  LD2S_UNARY_OPCODE_X1 = 17,
+  LD2U_ADD_IMM8_OPCODE_X1 = 10,
+  LD2U_OPCODE_Y2 = 3,
+  LD2U_UNARY_OPCODE_X1 = 18,
+  LD4S_ADD_IMM8_OPCODE_X1 = 11,
+  LD4S_OPCODE_Y2 = 1,
+  LD4S_UNARY_OPCODE_X1 = 19,
+  LD4U_ADD_IMM8_OPCODE_X1 = 12,
+  LD4U_OPCODE_Y2 = 2,
+  LD4U_UNARY_OPCODE_X1 = 20,
+  LDNA_UNARY_OPCODE_X1 = 21,
+  LDNT1S_ADD_IMM8_OPCODE_X1 = 13,
+  LDNT1S_UNARY_OPCODE_X1 = 22,
+  LDNT1U_ADD_IMM8_OPCODE_X1 = 14,
+  LDNT1U_UNARY_OPCODE_X1 = 23,
+  LDNT2S_ADD_IMM8_OPCODE_X1 = 15,
+  LDNT2S_UNARY_OPCODE_X1 = 24,
+  LDNT2U_ADD_IMM8_OPCODE_X1 = 16,
+  LDNT2U_UNARY_OPCODE_X1 = 25,
+  LDNT4S_ADD_IMM8_OPCODE_X1 = 17,
+  LDNT4S_UNARY_OPCODE_X1 = 26,
+  LDNT4U_ADD_IMM8_OPCODE_X1 = 18,
+  LDNT4U_UNARY_OPCODE_X1 = 27,
+  LDNT_ADD_IMM8_OPCODE_X1 = 19,
+  LDNT_UNARY_OPCODE_X1 = 28,
+  LD_ADD_IMM8_OPCODE_X1 = 20,
+  LD_OPCODE_Y2 = 3,
+  LD_UNARY_OPCODE_X1 = 29,
+  LNK_UNARY_OPCODE_X1 = 30,
+  LNK_UNARY_OPCODE_Y1 = 14,
+  LWNA_ADD_IMM8_OPCODE_X1 = 21,
+  MFSPR_IMM8_OPCODE_X1 = 22,
+  MF_UNARY_OPCODE_X1 = 31,
+  MM_BF_OPCODE_X0 = 7,
+  MNZ_RRR_0_OPCODE_X0 = 40,
+  MNZ_RRR_0_OPCODE_X1 = 26,
+  MNZ_RRR_4_OPCODE_Y0 = 2,
+  MNZ_RRR_4_OPCODE_Y1 = 2,
+  MODE_OPCODE_YA2 = 1,
+  MODE_OPCODE_YB2 = 2,
+  MODE_OPCODE_YC2 = 3,
+  MTSPR_IMM8_OPCODE_X1 = 23,
+  MULAX_RRR_0_OPCODE_X0 = 41,
+  MULAX_RRR_3_OPCODE_Y0 = 2,
+  MULA_HS_HS_RRR_0_OPCODE_X0 = 42,
+  MULA_HS_HS_RRR_9_OPCODE_Y0 = 0,
+  MULA_HS_HU_RRR_0_OPCODE_X0 = 43,
+  MULA_HS_LS_RRR_0_OPCODE_X0 = 44,
+  MULA_HS_LU_RRR_0_OPCODE_X0 = 45,
+  MULA_HU_HU_RRR_0_OPCODE_X0 = 46,
+  MULA_HU_HU_RRR_9_OPCODE_Y0 = 1,
+  MULA_HU_LS_RRR_0_OPCODE_X0 = 47,
+  MULA_HU_LU_RRR_0_OPCODE_X0 = 48,
+  MULA_LS_LS_RRR_0_OPCODE_X0 = 49,
+  MULA_LS_LS_RRR_9_OPCODE_Y0 = 2,
+  MULA_LS_LU_RRR_0_OPCODE_X0 = 50,
+  MULA_LU_LU_RRR_0_OPCODE_X0 = 51,
+  MULA_LU_LU_RRR_9_OPCODE_Y0 = 3,
+  MULX_RRR_0_OPCODE_X0 = 52,
+  MULX_RRR_3_OPCODE_Y0 = 3,
+  MUL_HS_HS_RRR_0_OPCODE_X0 = 53,
+  MUL_HS_HS_RRR_8_OPCODE_Y0 = 0,
+  MUL_HS_HU_RRR_0_OPCODE_X0 = 54,
+  MUL_HS_LS_RRR_0_OPCODE_X0 = 55,
+  MUL_HS_LU_RRR_0_OPCODE_X0 = 56,
+  MUL_HU_HU_RRR_0_OPCODE_X0 = 57,
+  MUL_HU_HU_RRR_8_OPCODE_Y0 = 1,
+  MUL_HU_LS_RRR_0_OPCODE_X0 = 58,
+  MUL_HU_LU_RRR_0_OPCODE_X0 = 59,
+  MUL_LS_LS_RRR_0_OPCODE_X0 = 60,
+  MUL_LS_LS_RRR_8_OPCODE_Y0 = 2,
+  MUL_LS_LU_RRR_0_OPCODE_X0 = 61,
+  MUL_LU_LU_RRR_0_OPCODE_X0 = 62,
+  MUL_LU_LU_RRR_8_OPCODE_Y0 = 3,
+  MZ_RRR_0_OPCODE_X0 = 63,
+  MZ_RRR_0_OPCODE_X1 = 27,
+  MZ_RRR_4_OPCODE_Y0 = 3,
+  MZ_RRR_4_OPCODE_Y1 = 3,
+  NAP_UNARY_OPCODE_X1 = 32,
+  NOP_UNARY_OPCODE_X0 = 5,
+  NOP_UNARY_OPCODE_X1 = 33,
+  NOP_UNARY_OPCODE_Y0 = 5,
+  NOP_UNARY_OPCODE_Y1 = 15,
+  NOR_RRR_0_OPCODE_X0 = 64,
+  NOR_RRR_0_OPCODE_X1 = 28,
+  NOR_RRR_5_OPCODE_Y0 = 1,
+  NOR_RRR_5_OPCODE_Y1 = 1,
+  ORI_IMM8_OPCODE_X0 = 7,
+  ORI_IMM8_OPCODE_X1 = 24,
+  OR_RRR_0_OPCODE_X0 = 65,
+  OR_RRR_0_OPCODE_X1 = 29,
+  OR_RRR_5_OPCODE_Y0 = 2,
+  OR_RRR_5_OPCODE_Y1 = 2,
+  PCNT_UNARY_OPCODE_X0 = 6,
+  PCNT_UNARY_OPCODE_Y0 = 6,
+  REVBITS_UNARY_OPCODE_X0 = 7,
+  REVBITS_UNARY_OPCODE_Y0 = 7,
+  REVBYTES_UNARY_OPCODE_X0 = 8,
+  REVBYTES_UNARY_OPCODE_Y0 = 8,
+  ROTLI_SHIFT_OPCODE_X0 = 1,
+  ROTLI_SHIFT_OPCODE_X1 = 1,
+  ROTLI_SHIFT_OPCODE_Y0 = 0,
+  ROTLI_SHIFT_OPCODE_Y1 = 0,
+  ROTL_RRR_0_OPCODE_X0 = 66,
+  ROTL_RRR_0_OPCODE_X1 = 30,
+  ROTL_RRR_6_OPCODE_Y0 = 0,
+  ROTL_RRR_6_OPCODE_Y1 = 0,
+  RRR_0_OPCODE_X0 = 5,
+  RRR_0_OPCODE_X1 = 5,
+  RRR_0_OPCODE_Y0 = 5,
+  RRR_0_OPCODE_Y1 = 6,
+  RRR_1_OPCODE_Y0 = 6,
+  RRR_1_OPCODE_Y1 = 7,
+  RRR_2_OPCODE_Y0 = 7,
+  RRR_2_OPCODE_Y1 = 8,
+  RRR_3_OPCODE_Y0 = 8,
+  RRR_3_OPCODE_Y1 = 9,
+  RRR_4_OPCODE_Y0 = 9,
+  RRR_4_OPCODE_Y1 = 10,
+  RRR_5_OPCODE_Y0 = 10,
+  RRR_5_OPCODE_Y1 = 11,
+  RRR_6_OPCODE_Y0 = 11,
+  RRR_6_OPCODE_Y1 = 12,
+  RRR_7_OPCODE_Y0 = 12,
+  RRR_7_OPCODE_Y1 = 13,
+  RRR_8_OPCODE_Y0 = 13,
+  RRR_9_OPCODE_Y0 = 14,
+  SHIFT_OPCODE_X0 = 6,
+  SHIFT_OPCODE_X1 = 6,
+  SHIFT_OPCODE_Y0 = 15,
+  SHIFT_OPCODE_Y1 = 14,
+  SHL16INSLI_OPCODE_X0 = 7,
+  SHL16INSLI_OPCODE_X1 = 7,
+  SHL1ADDX_RRR_0_OPCODE_X0 = 67,
+  SHL1ADDX_RRR_0_OPCODE_X1 = 31,
+  SHL1ADDX_RRR_7_OPCODE_Y0 = 1,
+  SHL1ADDX_RRR_7_OPCODE_Y1 = 1,
+  SHL1ADD_RRR_0_OPCODE_X0 = 68,
+  SHL1ADD_RRR_0_OPCODE_X1 = 32,
+  SHL1ADD_RRR_1_OPCODE_Y0 = 0,
+  SHL1ADD_RRR_1_OPCODE_Y1 = 0,
+  SHL2ADDX_RRR_0_OPCODE_X0 = 69,
+  SHL2ADDX_RRR_0_OPCODE_X1 = 33,
+  SHL2ADDX_RRR_7_OPCODE_Y0 = 2,
+  SHL2ADDX_RRR_7_OPCODE_Y1 = 2,
+  SHL2ADD_RRR_0_OPCODE_X0 = 70,
+  SHL2ADD_RRR_0_OPCODE_X1 = 34,
+  SHL2ADD_RRR_1_OPCODE_Y0 = 1,
+  SHL2ADD_RRR_1_OPCODE_Y1 = 1,
+  SHL3ADDX_RRR_0_OPCODE_X0 = 71,
+  SHL3ADDX_RRR_0_OPCODE_X1 = 35,
+  SHL3ADDX_RRR_7_OPCODE_Y0 = 3,
+  SHL3ADDX_RRR_7_OPCODE_Y1 = 3,
+  SHL3ADD_RRR_0_OPCODE_X0 = 72,
+  SHL3ADD_RRR_0_OPCODE_X1 = 36,
+  SHL3ADD_RRR_1_OPCODE_Y0 = 2,
+  SHL3ADD_RRR_1_OPCODE_Y1 = 2,
+  SHLI_SHIFT_OPCODE_X0 = 2,
+  SHLI_SHIFT_OPCODE_X1 = 2,
+  SHLI_SHIFT_OPCODE_Y0 = 1,
+  SHLI_SHIFT_OPCODE_Y1 = 1,
+  SHLXI_SHIFT_OPCODE_X0 = 3,
+  SHLXI_SHIFT_OPCODE_X1 = 3,
+  SHLX_RRR_0_OPCODE_X0 = 73,
+  SHLX_RRR_0_OPCODE_X1 = 37,
+  SHL_RRR_0_OPCODE_X0 = 74,
+  SHL_RRR_0_OPCODE_X1 = 38,
+  SHL_RRR_6_OPCODE_Y0 = 1,
+  SHL_RRR_6_OPCODE_Y1 = 1,
+  SHRSI_SHIFT_OPCODE_X0 = 4,
+  SHRSI_SHIFT_OPCODE_X1 = 4,
+  SHRSI_SHIFT_OPCODE_Y0 = 2,
+  SHRSI_SHIFT_OPCODE_Y1 = 2,
+  SHRS_RRR_0_OPCODE_X0 = 75,
+  SHRS_RRR_0_OPCODE_X1 = 39,
+  SHRS_RRR_6_OPCODE_Y0 = 2,
+  SHRS_RRR_6_OPCODE_Y1 = 2,
+  SHRUI_SHIFT_OPCODE_X0 = 5,
+  SHRUI_SHIFT_OPCODE_X1 = 5,
+  SHRUI_SHIFT_OPCODE_Y0 = 3,
+  SHRUI_SHIFT_OPCODE_Y1 = 3,
+  SHRUXI_SHIFT_OPCODE_X0 = 6,
+  SHRUXI_SHIFT_OPCODE_X1 = 6,
+  SHRUX_RRR_0_OPCODE_X0 = 76,
+  SHRUX_RRR_0_OPCODE_X1 = 40,
+  SHRU_RRR_0_OPCODE_X0 = 77,
+  SHRU_RRR_0_OPCODE_X1 = 41,
+  SHRU_RRR_6_OPCODE_Y0 = 3,
+  SHRU_RRR_6_OPCODE_Y1 = 3,
+  SHUFFLEBYTES_RRR_0_OPCODE_X0 = 78,
+  ST1_ADD_IMM8_OPCODE_X1 = 25,
+  ST1_OPCODE_Y2 = 0,
+  ST1_RRR_0_OPCODE_X1 = 42,
+  ST2_ADD_IMM8_OPCODE_X1 = 26,
+  ST2_OPCODE_Y2 = 1,
+  ST2_RRR_0_OPCODE_X1 = 43,
+  ST4_ADD_IMM8_OPCODE_X1 = 27,
+  ST4_OPCODE_Y2 = 2,
+  ST4_RRR_0_OPCODE_X1 = 44,
+  STNT1_ADD_IMM8_OPCODE_X1 = 28,
+  STNT1_RRR_0_OPCODE_X1 = 45,
+  STNT2_ADD_IMM8_OPCODE_X1 = 29,
+  STNT2_RRR_0_OPCODE_X1 = 46,
+  STNT4_ADD_IMM8_OPCODE_X1 = 30,
+  STNT4_RRR_0_OPCODE_X1 = 47,
+  STNT_ADD_IMM8_OPCODE_X1 = 31,
+  STNT_RRR_0_OPCODE_X1 = 48,
+  ST_ADD_IMM8_OPCODE_X1 = 32,
+  ST_OPCODE_Y2 = 3,
+  ST_RRR_0_OPCODE_X1 = 49,
+  SUBXSC_RRR_0_OPCODE_X0 = 79,
+  SUBXSC_RRR_0_OPCODE_X1 = 50,
+  SUBX_RRR_0_OPCODE_X0 = 80,
+  SUBX_RRR_0_OPCODE_X1 = 51,
+  SUBX_RRR_0_OPCODE_Y0 = 2,
+  SUBX_RRR_0_OPCODE_Y1 = 2,
+  SUB_RRR_0_OPCODE_X0 = 81,
+  SUB_RRR_0_OPCODE_X1 = 52,
+  SUB_RRR_0_OPCODE_Y0 = 3,
+  SUB_RRR_0_OPCODE_Y1 = 3,
+  SWINT0_UNARY_OPCODE_X1 = 34,
+  SWINT1_UNARY_OPCODE_X1 = 35,
+  SWINT2_UNARY_OPCODE_X1 = 36,
+  SWINT3_UNARY_OPCODE_X1 = 37,
+  TBLIDXB0_UNARY_OPCODE_X0 = 9,
+  TBLIDXB0_UNARY_OPCODE_Y0 = 9,
+  TBLIDXB1_UNARY_OPCODE_X0 = 10,
+  TBLIDXB1_UNARY_OPCODE_Y0 = 10,
+  TBLIDXB2_UNARY_OPCODE_X0 = 11,
+  TBLIDXB2_UNARY_OPCODE_Y0 = 11,
+  TBLIDXB3_UNARY_OPCODE_X0 = 12,
+  TBLIDXB3_UNARY_OPCODE_Y0 = 12,
+  UNARY_RRR_0_OPCODE_X0 = 82,
+  UNARY_RRR_0_OPCODE_X1 = 53,
+  UNARY_RRR_1_OPCODE_Y0 = 3,
+  UNARY_RRR_1_OPCODE_Y1 = 3,
+  V1ADDI_IMM8_OPCODE_X0 = 8,
+  V1ADDI_IMM8_OPCODE_X1 = 33,
+  V1ADDUC_RRR_0_OPCODE_X0 = 83,
+  V1ADDUC_RRR_0_OPCODE_X1 = 54,
+  V1ADD_RRR_0_OPCODE_X0 = 84,
+  V1ADD_RRR_0_OPCODE_X1 = 55,
+  V1ADIFFU_RRR_0_OPCODE_X0 = 85,
+  V1AVGU_RRR_0_OPCODE_X0 = 86,
+  V1CMPEQI_IMM8_OPCODE_X0 = 9,
+  V1CMPEQI_IMM8_OPCODE_X1 = 34,
+  V1CMPEQ_RRR_0_OPCODE_X0 = 87,
+  V1CMPEQ_RRR_0_OPCODE_X1 = 56,
+  V1CMPLES_RRR_0_OPCODE_X0 = 88,
+  V1CMPLES_RRR_0_OPCODE_X1 = 57,
+  V1CMPLEU_RRR_0_OPCODE_X0 = 89,
+  V1CMPLEU_RRR_0_OPCODE_X1 = 58,
+  V1CMPLTSI_IMM8_OPCODE_X0 = 10,
+  V1CMPLTSI_IMM8_OPCODE_X1 = 35,
+  V1CMPLTS_RRR_0_OPCODE_X0 = 90,
+  V1CMPLTS_RRR_0_OPCODE_X1 = 59,
+  V1CMPLTUI_IMM8_OPCODE_X0 = 11,
+  V1CMPLTUI_IMM8_OPCODE_X1 = 36,
+  V1CMPLTU_RRR_0_OPCODE_X0 = 91,
+  V1CMPLTU_RRR_0_OPCODE_X1 = 60,
+  V1CMPNE_RRR_0_OPCODE_X0 = 92,
+  V1CMPNE_RRR_0_OPCODE_X1 = 61,
+  V1DDOTPUA_RRR_0_OPCODE_X0 = 161,
+  V1DDOTPUSA_RRR_0_OPCODE_X0 = 93,
+  V1DDOTPUS_RRR_0_OPCODE_X0 = 94,
+  V1DDOTPU_RRR_0_OPCODE_X0 = 162,
+  V1DOTPA_RRR_0_OPCODE_X0 = 95,
+  V1DOTPUA_RRR_0_OPCODE_X0 = 163,
+  V1DOTPUSA_RRR_0_OPCODE_X0 = 96,
+  V1DOTPUS_RRR_0_OPCODE_X0 = 97,
+  V1DOTPU_RRR_0_OPCODE_X0 = 164,
+  V1DOTP_RRR_0_OPCODE_X0 = 98,
+  V1INT_H_RRR_0_OPCODE_X0 = 99,
+  V1INT_H_RRR_0_OPCODE_X1 = 62,
+  V1INT_L_RRR_0_OPCODE_X0 = 100,
+  V1INT_L_RRR_0_OPCODE_X1 = 63,
+  V1MAXUI_IMM8_OPCODE_X0 = 12,
+  V1MAXUI_IMM8_OPCODE_X1 = 37,
+  V1MAXU_RRR_0_OPCODE_X0 = 101,
+  V1MAXU_RRR_0_OPCODE_X1 = 64,
+  V1MINUI_IMM8_OPCODE_X0 = 13,
+  V1MINUI_IMM8_OPCODE_X1 = 38,
+  V1MINU_RRR_0_OPCODE_X0 = 102,
+  V1MINU_RRR_0_OPCODE_X1 = 65,
+  V1MNZ_RRR_0_OPCODE_X0 = 103,
+  V1MNZ_RRR_0_OPCODE_X1 = 66,
+  V1MULTU_RRR_0_OPCODE_X0 = 104,
+  V1MULUS_RRR_0_OPCODE_X0 = 105,
+  V1MULU_RRR_0_OPCODE_X0 = 106,
+  V1MZ_RRR_0_OPCODE_X0 = 107,
+  V1MZ_RRR_0_OPCODE_X1 = 67,
+  V1SADAU_RRR_0_OPCODE_X0 = 108,
+  V1SADU_RRR_0_OPCODE_X0 = 109,
+  V1SHLI_SHIFT_OPCODE_X0 = 7,
+  V1SHLI_SHIFT_OPCODE_X1 = 7,
+  V1SHL_RRR_0_OPCODE_X0 = 110,
+  V1SHL_RRR_0_OPCODE_X1 = 68,
+  V1SHRSI_SHIFT_OPCODE_X0 = 8,
+  V1SHRSI_SHIFT_OPCODE_X1 = 8,
+  V1SHRS_RRR_0_OPCODE_X0 = 111,
+  V1SHRS_RRR_0_OPCODE_X1 = 69,
+  V1SHRUI_SHIFT_OPCODE_X0 = 9,
+  V1SHRUI_SHIFT_OPCODE_X1 = 9,
+  V1SHRU_RRR_0_OPCODE_X0 = 112,
+  V1SHRU_RRR_0_OPCODE_X1 = 70,
+  V1SUBUC_RRR_0_OPCODE_X0 = 113,
+  V1SUBUC_RRR_0_OPCODE_X1 = 71,
+  V1SUB_RRR_0_OPCODE_X0 = 114,
+  V1SUB_RRR_0_OPCODE_X1 = 72,
+  V2ADDI_IMM8_OPCODE_X0 = 14,
+  V2ADDI_IMM8_OPCODE_X1 = 39,
+  V2ADDSC_RRR_0_OPCODE_X0 = 115,
+  V2ADDSC_RRR_0_OPCODE_X1 = 73,
+  V2ADD_RRR_0_OPCODE_X0 = 116,
+  V2ADD_RRR_0_OPCODE_X1 = 74,
+  V2ADIFFS_RRR_0_OPCODE_X0 = 117,
+  V2AVGS_RRR_0_OPCODE_X0 = 118,
+  V2CMPEQI_IMM8_OPCODE_X0 = 15,
+  V2CMPEQI_IMM8_OPCODE_X1 = 40,
+  V2CMPEQ_RRR_0_OPCODE_X0 = 119,
+  V2CMPEQ_RRR_0_OPCODE_X1 = 75,
+  V2CMPLES_RRR_0_OPCODE_X0 = 120,
+  V2CMPLES_RRR_0_OPCODE_X1 = 76,
+  V2CMPLEU_RRR_0_OPCODE_X0 = 121,
+  V2CMPLEU_RRR_0_OPCODE_X1 = 77,
+  V2CMPLTSI_IMM8_OPCODE_X0 = 16,
+  V2CMPLTSI_IMM8_OPCODE_X1 = 41,
+  V2CMPLTS_RRR_0_OPCODE_X0 = 122,
+  V2CMPLTS_RRR_0_OPCODE_X1 = 78,
+  V2CMPLTUI_IMM8_OPCODE_X0 = 17,
+  V2CMPLTUI_IMM8_OPCODE_X1 = 42,
+  V2CMPLTU_RRR_0_OPCODE_X0 = 123,
+  V2CMPLTU_RRR_0_OPCODE_X1 = 79,
+  V2CMPNE_RRR_0_OPCODE_X0 = 124,
+  V2CMPNE_RRR_0_OPCODE_X1 = 80,
+  V2DOTPA_RRR_0_OPCODE_X0 = 125,
+  V2DOTP_RRR_0_OPCODE_X0 = 126,
+  V2INT_H_RRR_0_OPCODE_X0 = 127,
+  V2INT_H_RRR_0_OPCODE_X1 = 81,
+  V2INT_L_RRR_0_OPCODE_X0 = 128,
+  V2INT_L_RRR_0_OPCODE_X1 = 82,
+  V2MAXSI_IMM8_OPCODE_X0 = 18,
+  V2MAXSI_IMM8_OPCODE_X1 = 43,
+  V2MAXS_RRR_0_OPCODE_X0 = 129,
+  V2MAXS_RRR_0_OPCODE_X1 = 83,
+  V2MINSI_IMM8_OPCODE_X0 = 19,
+  V2MINSI_IMM8_OPCODE_X1 = 44,
+  V2MINS_RRR_0_OPCODE_X0 = 130,
+  V2MINS_RRR_0_OPCODE_X1 = 84,
+  V2MNZ_RRR_0_OPCODE_X0 = 131,
+  V2MNZ_RRR_0_OPCODE_X1 = 85,
+  V2MULFSC_RRR_0_OPCODE_X0 = 132,
+  V2MULS_RRR_0_OPCODE_X0 = 133,
+  V2MULTS_RRR_0_OPCODE_X0 = 134,
+  V2MZ_RRR_0_OPCODE_X0 = 135,
+  V2MZ_RRR_0_OPCODE_X1 = 86,
+  V2PACKH_RRR_0_OPCODE_X0 = 136,
+  V2PACKH_RRR_0_OPCODE_X1 = 87,
+  V2PACKL_RRR_0_OPCODE_X0 = 137,
+  V2PACKL_RRR_0_OPCODE_X1 = 88,
+  V2PACKUC_RRR_0_OPCODE_X0 = 138,
+  V2PACKUC_RRR_0_OPCODE_X1 = 89,
+  V2SADAS_RRR_0_OPCODE_X0 = 139,
+  V2SADAU_RRR_0_OPCODE_X0 = 140,
+  V2SADS_RRR_0_OPCODE_X0 = 141,
+  V2SADU_RRR_0_OPCODE_X0 = 142,
+  V2SHLI_SHIFT_OPCODE_X0 = 10,
+  V2SHLI_SHIFT_OPCODE_X1 = 10,
+  V2SHLSC_RRR_0_OPCODE_X0 = 143,
+  V2SHLSC_RRR_0_OPCODE_X1 = 90,
+  V2SHL_RRR_0_OPCODE_X0 = 144,
+  V2SHL_RRR_0_OPCODE_X1 = 91,
+  V2SHRSI_SHIFT_OPCODE_X0 = 11,
+  V2SHRSI_SHIFT_OPCODE_X1 = 11,
+  V2SHRS_RRR_0_OPCODE_X0 = 145,
+  V2SHRS_RRR_0_OPCODE_X1 = 92,
+  V2SHRUI_SHIFT_OPCODE_X0 = 12,
+  V2SHRUI_SHIFT_OPCODE_X1 = 12,
+  V2SHRU_RRR_0_OPCODE_X0 = 146,
+  V2SHRU_RRR_0_OPCODE_X1 = 93,
+  V2SUBSC_RRR_0_OPCODE_X0 = 147,
+  V2SUBSC_RRR_0_OPCODE_X1 = 94,
+  V2SUB_RRR_0_OPCODE_X0 = 148,
+  V2SUB_RRR_0_OPCODE_X1 = 95,
+  V4ADDSC_RRR_0_OPCODE_X0 = 149,
+  V4ADDSC_RRR_0_OPCODE_X1 = 96,
+  V4ADD_RRR_0_OPCODE_X0 = 150,
+  V4ADD_RRR_0_OPCODE_X1 = 97,
+  V4INT_H_RRR_0_OPCODE_X0 = 151,
+  V4INT_H_RRR_0_OPCODE_X1 = 98,
+  V4INT_L_RRR_0_OPCODE_X0 = 152,
+  V4INT_L_RRR_0_OPCODE_X1 = 99,
+  V4PACKSC_RRR_0_OPCODE_X0 = 153,
+  V4PACKSC_RRR_0_OPCODE_X1 = 100,
+  V4SHLSC_RRR_0_OPCODE_X0 = 154,
+  V4SHLSC_RRR_0_OPCODE_X1 = 101,
+  V4SHL_RRR_0_OPCODE_X0 = 155,
+  V4SHL_RRR_0_OPCODE_X1 = 102,
+  V4SHRS_RRR_0_OPCODE_X0 = 156,
+  V4SHRS_RRR_0_OPCODE_X1 = 103,
+  V4SHRU_RRR_0_OPCODE_X0 = 157,
+  V4SHRU_RRR_0_OPCODE_X1 = 104,
+  V4SUBSC_RRR_0_OPCODE_X0 = 158,
+  V4SUBSC_RRR_0_OPCODE_X1 = 105,
+  V4SUB_RRR_0_OPCODE_X0 = 159,
+  V4SUB_RRR_0_OPCODE_X1 = 106,
+  WH64_UNARY_OPCODE_X1 = 38,
+  XORI_IMM8_OPCODE_X0 = 20,
+  XORI_IMM8_OPCODE_X1 = 45,
+  XOR_RRR_0_OPCODE_X0 = 160,
+  XOR_RRR_0_OPCODE_X1 = 107,
+  XOR_RRR_5_OPCODE_Y0 = 3,
+  XOR_RRR_5_OPCODE_Y1 = 3
+};
+
+static __inline unsigned int
+get_BFEnd_X0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0x3f);
+}
+
+static __inline unsigned int
+get_BFOpcodeExtension_X0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 24)) & 0xf);
+}
+
+static __inline unsigned int
+get_BFStart_X0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 18)) & 0x3f);
+}
+
+static __inline unsigned int
+get_BrOff_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 31)) & 0x0000003f) |
+         (((unsigned int)(n >> 37)) & 0x0001ffc0);
+}
+
+static __inline unsigned int
+get_BrType_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 54)) & 0x1f);
+}
+
+static __inline unsigned int
+get_Dest_Imm8_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 31)) & 0x0000003f) |
+         (((unsigned int)(n >> 43)) & 0x000000c0);
+}
+
+static __inline unsigned int
+get_Dest_X0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 0)) & 0x3f);
+}
+
+static __inline unsigned int
+get_Dest_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 31)) & 0x3f);
+}
+
+static __inline unsigned int
+get_Dest_Y0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 0)) & 0x3f);
+}
+
+static __inline unsigned int
+get_Dest_Y1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 31)) & 0x3f);
+}
+
+static __inline unsigned int
+get_Imm16_X0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0xffff);
+}
+
+static __inline unsigned int
+get_Imm16_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0xffff);
+}
+
+static __inline unsigned int
+get_Imm8OpcodeExtension_X0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 20)) & 0xff);
+}
+
+static __inline unsigned int
+get_Imm8OpcodeExtension_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 51)) & 0xff);
+}
+
+static __inline unsigned int
+get_Imm8_X0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0xff);
+}
+
+static __inline unsigned int
+get_Imm8_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0xff);
+}
+
+static __inline unsigned int
+get_Imm8_Y0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0xff);
+}
+
+static __inline unsigned int
+get_Imm8_Y1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0xff);
+}
+
+static __inline unsigned int
+get_JumpOff_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 31)) & 0x7ffffff);
+}
+
+static __inline unsigned int
+get_JumpOpcodeExtension_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 58)) & 0x1);
+}
+
+static __inline unsigned int
+get_MF_Imm14_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 37)) & 0x3fff);
+}
+
+static __inline unsigned int
+get_MT_Imm14_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 31)) & 0x0000003f) |
+         (((unsigned int)(n >> 37)) & 0x00003fc0);
+}
+
+static __inline unsigned int
+get_Mode(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 62)) & 0x3);
+}
+
+static __inline unsigned int
+get_Opcode_X0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 28)) & 0x7);
+}
+
+static __inline unsigned int
+get_Opcode_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 59)) & 0x7);
+}
+
+static __inline unsigned int
+get_Opcode_Y0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 27)) & 0xf);
+}
+
+static __inline unsigned int
+get_Opcode_Y1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 58)) & 0xf);
+}
+
+static __inline unsigned int
+get_Opcode_Y2(tilegx_bundle_bits n)
+{
+  return (((n >> 26)) & 0x00000001) |
+         (((unsigned int)(n >> 56)) & 0x00000002);
+}
+
+static __inline unsigned int
+get_RRROpcodeExtension_X0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 18)) & 0x3ff);
+}
+
+static __inline unsigned int
+get_RRROpcodeExtension_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 49)) & 0x3ff);
+}
+
+static __inline unsigned int
+get_RRROpcodeExtension_Y0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 18)) & 0x3);
+}
+
+static __inline unsigned int
+get_RRROpcodeExtension_Y1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 49)) & 0x3);
+}
+
+static __inline unsigned int
+get_ShAmt_X0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0x3f);
+}
+
+static __inline unsigned int
+get_ShAmt_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0x3f);
+}
+
+static __inline unsigned int
+get_ShAmt_Y0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0x3f);
+}
+
+static __inline unsigned int
+get_ShAmt_Y1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0x3f);
+}
+
+static __inline unsigned int
+get_ShiftOpcodeExtension_X0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 18)) & 0x3ff);
+}
+
+static __inline unsigned int
+get_ShiftOpcodeExtension_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 49)) & 0x3ff);
+}
+
+static __inline unsigned int
+get_ShiftOpcodeExtension_Y0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 18)) & 0x3);
+}
+
+static __inline unsigned int
+get_ShiftOpcodeExtension_Y1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 49)) & 0x3);
+}
+
+static __inline unsigned int
+get_SrcA_X0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 6)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcA_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 37)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcA_Y0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 6)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcA_Y1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 37)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcA_Y2(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 20)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcBDest_Y2(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 51)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcB_X0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcB_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcB_Y0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcB_Y1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0x3f);
+}
+
+static __inline unsigned int
+get_UnaryOpcodeExtension_X0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0x3f);
+}
+
+static __inline unsigned int
+get_UnaryOpcodeExtension_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0x3f);
+}
+
+static __inline unsigned int
+get_UnaryOpcodeExtension_Y0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0x3f);
+}
+
+static __inline unsigned int
+get_UnaryOpcodeExtension_Y1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0x3f);
+}
+
+static __inline int
+sign_extend(int n, int num_bits)
+{
+  int shift = (int)(sizeof(int) * 8 - num_bits);
+  return (n << shift) >> shift;
+}
+
+static __inline tilegx_bundle_bits
+create_BFEnd_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 12);
+}
+
+static __inline tilegx_bundle_bits
+create_BFOpcodeExtension_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0xf) << 24);
+}
+
+static __inline tilegx_bundle_bits
+create_BFStart_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 18);
+}
+
+static __inline tilegx_bundle_bits
+create_BrOff_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x0000003f)) << 31) |
+         (((tilegx_bundle_bits)(n & 0x0001ffc0)) << 37);
+}
+
+static __inline tilegx_bundle_bits
+create_BrType_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x1f)) << 54);
+}
+
+static __inline tilegx_bundle_bits
+create_Dest_Imm8_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x0000003f)) << 31) |
+         (((tilegx_bundle_bits)(n & 0x000000c0)) << 43);
+}
+
+static __inline tilegx_bundle_bits
+create_Dest_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 0);
+}
+
+static __inline tilegx_bundle_bits
+create_Dest_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3f)) << 31);
+}
+
+static __inline tilegx_bundle_bits
+create_Dest_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 0);
+}
+
+static __inline tilegx_bundle_bits
+create_Dest_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3f)) << 31);
+}
+
+static __inline tilegx_bundle_bits
+create_Imm16_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0xffff) << 12);
+}
+
+static __inline tilegx_bundle_bits
+create_Imm16_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0xffff)) << 43);
+}
+
+static __inline tilegx_bundle_bits
+create_Imm8OpcodeExtension_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0xff) << 20);
+}
+
+static __inline tilegx_bundle_bits
+create_Imm8OpcodeExtension_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0xff)) << 51);
+}
+
+static __inline tilegx_bundle_bits
+create_Imm8_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0xff) << 12);
+}
+
+static __inline tilegx_bundle_bits
+create_Imm8_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0xff)) << 43);
+}
+
+static __inline tilegx_bundle_bits
+create_Imm8_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0xff) << 12);
+}
+
+static __inline tilegx_bundle_bits
+create_Imm8_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0xff)) << 43);
+}
+
+static __inline tilegx_bundle_bits
+create_JumpOff_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x7ffffff)) << 31);
+}
+
+static __inline tilegx_bundle_bits
+create_JumpOpcodeExtension_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x1)) << 58);
+}
+
+static __inline tilegx_bundle_bits
+create_MF_Imm14_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3fff)) << 37);
+}
+
+static __inline tilegx_bundle_bits
+create_MT_Imm14_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x0000003f)) << 31) |
+         (((tilegx_bundle_bits)(n & 0x00003fc0)) << 37);
+}
+
+static __inline tilegx_bundle_bits
+create_Mode(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3)) << 62);
+}
+
+static __inline tilegx_bundle_bits
+create_Opcode_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x7) << 28);
+}
+
+static __inline tilegx_bundle_bits
+create_Opcode_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x7)) << 59);
+}
+
+static __inline tilegx_bundle_bits
+create_Opcode_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0xf) << 27);
+}
+
+static __inline tilegx_bundle_bits
+create_Opcode_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0xf)) << 58);
+}
+
+static __inline tilegx_bundle_bits
+create_Opcode_Y2(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x00000001) << 26) |
+         (((tilegx_bundle_bits)(n & 0x00000002)) << 56);
+}
+
+static __inline tilegx_bundle_bits
+create_RRROpcodeExtension_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3ff) << 18);
+}
+
+static __inline tilegx_bundle_bits
+create_RRROpcodeExtension_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3ff)) << 49);
+}
+
+static __inline tilegx_bundle_bits
+create_RRROpcodeExtension_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3) << 18);
+}
+
+static __inline tilegx_bundle_bits
+create_RRROpcodeExtension_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3)) << 49);
+}
+
+static __inline tilegx_bundle_bits
+create_ShAmt_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 12);
+}
+
+static __inline tilegx_bundle_bits
+create_ShAmt_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3f)) << 43);
+}
+
+static __inline tilegx_bundle_bits
+create_ShAmt_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 12);
+}
+
+static __inline tilegx_bundle_bits
+create_ShAmt_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3f)) << 43);
+}
+
+static __inline tilegx_bundle_bits
+create_ShiftOpcodeExtension_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3ff) << 18);
+}
+
+static __inline tilegx_bundle_bits
+create_ShiftOpcodeExtension_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3ff)) << 49);
+}
+
+static __inline tilegx_bundle_bits
+create_ShiftOpcodeExtension_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3) << 18);
+}
+
+static __inline tilegx_bundle_bits
+create_ShiftOpcodeExtension_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3)) << 49);
+}
+
+static __inline tilegx_bundle_bits
+create_SrcA_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 6);
+}
+
+static __inline tilegx_bundle_bits
+create_SrcA_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3f)) << 37);
+}
+
+static __inline tilegx_bundle_bits
+create_SrcA_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 6);
+}
+
+static __inline tilegx_bundle_bits
+create_SrcA_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3f)) << 37);
+}
+
+static __inline tilegx_bundle_bits
+create_SrcA_Y2(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 20);
+}
+
+static __inline tilegx_bundle_bits
+create_SrcBDest_Y2(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3f)) << 51);
+}
+
+static __inline tilegx_bundle_bits
+create_SrcB_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 12);
+}
+
+static __inline tilegx_bundle_bits
+create_SrcB_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3f)) << 43);
+}
+
+static __inline tilegx_bundle_bits
+create_SrcB_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 12);
+}
+
+static __inline tilegx_bundle_bits
+create_SrcB_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3f)) << 43);
+}
+
+static __inline tilegx_bundle_bits
+create_UnaryOpcodeExtension_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 12);
+}
+
+static __inline tilegx_bundle_bits
+create_UnaryOpcodeExtension_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3f)) << 43);
+}
+
+static __inline tilegx_bundle_bits
+create_UnaryOpcodeExtension_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 12);
+}
+
+static __inline tilegx_bundle_bits
+create_UnaryOpcodeExtension_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3f)) << 43);
+}
+
+const struct tilegx_opcode tilegx_opcodes[336] =
+{
+ { "bpt", TILEGX_OPC_BPT, 0x2, 0, TREG_ZERO, 0,
+    { { 0, }, {  }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xffffffff80000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286a44ae00000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "info", TILEGX_OPC_INFO, 0xf, 1, TREG_ZERO, 1,
+    { { 0 }, { 1 }, { 2 }, { 3 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ff00fffULL,
+      0xfff807ff80000000ULL,
+      0x0000000078000fffULL,
+      0x3c0007ff80000000ULL,
+      0ULL
+    },
+    {
+      0x0000000040300fffULL,
+      0x181807ff80000000ULL,
+      0x0000000010000fffULL,
+      0x0c0007ff80000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "infol", TILEGX_OPC_INFOL, 0x3, 1, TREG_ZERO, 1,
+    { { 4 }, { 5 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc000000070000fffULL,
+      0xf80007ff80000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000070000fffULL,
+      0x380007ff80000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ld4s_tls", TILEGX_OPC_LD4S_TLS, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1858000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ld_tls", TILEGX_OPC_LD_TLS, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x18a0000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "move", TILEGX_OPC_MOVE, 0xf, 2, TREG_ZERO, 1,
+    { { 8, 9 }, { 6, 7 }, { 10, 11 }, { 12, 13 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffff000ULL,
+      0xfffff80000000000ULL,
+      0x00000000780ff000ULL,
+      0x3c07f80000000000ULL,
+      0ULL
+    },
+    {
+      0x000000005107f000ULL,
+      0x283bf80000000000ULL,
+      0x00000000500bf000ULL,
+      0x2c05f80000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "movei", TILEGX_OPC_MOVEI, 0xf, 2, TREG_ZERO, 1,
+    { { 8, 0 }, { 6, 1 }, { 10, 2 }, { 12, 3 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ff00fc0ULL,
+      0xfff807e000000000ULL,
+      0x0000000078000fc0ULL,
+      0x3c0007e000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000040100fc0ULL,
+      0x180807e000000000ULL,
+      0x0000000000000fc0ULL,
+      0x040007e000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "moveli", TILEGX_OPC_MOVELI, 0x3, 2, TREG_ZERO, 1,
+    { { 8, 4 }, { 6, 5 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc000000070000fc0ULL,
+      0xf80007e000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000010000fc0ULL,
+      0x000007e000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "prefetch", TILEGX_OPC_PREFETCH, 0x12, 1, TREG_ZERO, 1,
+    { { 0, }, { 7 }, { 0, }, { 0, }, { 14 } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff81f80000000ULL,
+      0ULL,
+      0ULL,
+      0xc3f8000004000000ULL
+    },
+    {
+      -1ULL,
+      0x286a801f80000000ULL,
+      -1ULL,
+      -1ULL,
+      0x41f8000004000000ULL
+    }
+#endif
+  },
+  { "prefetch_add_l1", TILEGX_OPC_PREFETCH_ADD_L1, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 15, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8001f80000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1840001f80000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "prefetch_add_l1_fault", TILEGX_OPC_PREFETCH_ADD_L1_FAULT, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 15, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8001f80000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1838001f80000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "prefetch_add_l2", TILEGX_OPC_PREFETCH_ADD_L2, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 15, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8001f80000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1850001f80000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "prefetch_add_l2_fault", TILEGX_OPC_PREFETCH_ADD_L2_FAULT, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 15, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8001f80000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1848001f80000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "prefetch_add_l3", TILEGX_OPC_PREFETCH_ADD_L3, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 15, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8001f80000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1860001f80000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "prefetch_add_l3_fault", TILEGX_OPC_PREFETCH_ADD_L3_FAULT, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 15, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8001f80000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1858001f80000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "prefetch_l1", TILEGX_OPC_PREFETCH_L1, 0x12, 1, TREG_ZERO, 1,
+    { { 0, }, { 7 }, { 0, }, { 0, }, { 14 } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff81f80000000ULL,
+      0ULL,
+      0ULL,
+      0xc3f8000004000000ULL
+    },
+    {
+      -1ULL,
+      0x286a801f80000000ULL,
+      -1ULL,
+      -1ULL,
+      0x41f8000004000000ULL
+    }
+#endif
+  },
+  { "prefetch_l1_fault", TILEGX_OPC_PREFETCH_L1_FAULT, 0x12, 1, TREG_ZERO, 1,
+    { { 0, }, { 7 }, { 0, }, { 0, }, { 14 } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff81f80000000ULL,
+      0ULL,
+      0ULL,
+      0xc3f8000004000000ULL
+    },
+    {
+      -1ULL,
+      0x286a781f80000000ULL,
+      -1ULL,
+      -1ULL,
+      0x41f8000000000000ULL
+    }
+#endif
+  },
+  { "prefetch_l2", TILEGX_OPC_PREFETCH_L2, 0x12, 1, TREG_ZERO, 1,
+    { { 0, }, { 7 }, { 0, }, { 0, }, { 14 } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff81f80000000ULL,
+      0ULL,
+      0ULL,
+      0xc3f8000004000000ULL
+    },
+    {
+      -1ULL,
+      0x286a901f80000000ULL,
+      -1ULL,
+      -1ULL,
+      0x43f8000004000000ULL
+    }
+#endif
+  },
+  { "prefetch_l2_fault", TILEGX_OPC_PREFETCH_L2_FAULT, 0x12, 1, TREG_ZERO, 1,
+    { { 0, }, { 7 }, { 0, }, { 0, }, { 14 } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff81f80000000ULL,
+      0ULL,
+      0ULL,
+      0xc3f8000004000000ULL
+    },
+    {
+      -1ULL,
+      0x286a881f80000000ULL,
+      -1ULL,
+      -1ULL,
+      0x43f8000000000000ULL
+    }
+#endif
+  },
+  { "prefetch_l3", TILEGX_OPC_PREFETCH_L3, 0x12, 1, TREG_ZERO, 1,
+    { { 0, }, { 7 }, { 0, }, { 0, }, { 14 } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff81f80000000ULL,
+      0ULL,
+      0ULL,
+      0xc3f8000004000000ULL
+    },
+    {
+      -1ULL,
+      0x286aa01f80000000ULL,
+      -1ULL,
+      -1ULL,
+      0x83f8000000000000ULL
+    }
+#endif
+  },
+  { "prefetch_l3_fault", TILEGX_OPC_PREFETCH_L3_FAULT, 0x12, 1, TREG_ZERO, 1,
+    { { 0, }, { 7 }, { 0, }, { 0, }, { 14 } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff81f80000000ULL,
+      0ULL,
+      0ULL,
+      0xc3f8000004000000ULL
+    },
+    {
+      -1ULL,
+      0x286a981f80000000ULL,
+      -1ULL,
+      -1ULL,
+      0x81f8000004000000ULL
+    }
+#endif
+  },
+  { "raise", TILEGX_OPC_RAISE, 0x2, 0, TREG_ZERO, 1,
+    { { 0, }, {  }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xffffffff80000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286a44ae80000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "add", TILEGX_OPC_ADD, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x00000000500c0000ULL,
+      0x2806000000000000ULL,
+      0x0000000028040000ULL,
+      0x1802000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "addi", TILEGX_OPC_ADDI, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 0 }, { 6, 7, 1 }, { 10, 11, 2 }, { 12, 13, 3 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0x0000000078000000ULL,
+      0x3c00000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000040100000ULL,
+      0x1808000000000000ULL,
+      0ULL,
+      0x0400000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "addli", TILEGX_OPC_ADDLI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 4 }, { 6, 7, 5 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc000000070000000ULL,
+      0xf800000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000010000000ULL,
+      0ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "addx", TILEGX_OPC_ADDX, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000050080000ULL,
+      0x2804000000000000ULL,
+      0x0000000028000000ULL,
+      0x1800000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "addxi", TILEGX_OPC_ADDXI, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 0 }, { 6, 7, 1 }, { 10, 11, 2 }, { 12, 13, 3 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0x0000000078000000ULL,
+      0x3c00000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000040200000ULL,
+      0x1810000000000000ULL,
+      0x0000000008000000ULL,
+      0x0800000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "addxli", TILEGX_OPC_ADDXLI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 4 }, { 6, 7, 5 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc000000070000000ULL,
+      0xf800000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000020000000ULL,
+      0x0800000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "addxsc", TILEGX_OPC_ADDXSC, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050040000ULL,
+      0x2802000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "and", TILEGX_OPC_AND, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000050100000ULL,
+      0x2808000000000000ULL,
+      0x0000000050000000ULL,
+      0x2c00000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "andi", TILEGX_OPC_ANDI, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 0 }, { 6, 7, 1 }, { 10, 11, 2 }, { 12, 13, 3 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0x0000000078000000ULL,
+      0x3c00000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000040300000ULL,
+      0x1818000000000000ULL,
+      0x0000000010000000ULL,
+      0x0c00000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "beqz", TILEGX_OPC_BEQZ, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 7, 20 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xffc0000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1440000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "beqzt", TILEGX_OPC_BEQZT, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 7, 20 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xffc0000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1400000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "bfexts", TILEGX_OPC_BFEXTS, 0x1, 4, TREG_ZERO, 1,
+    { { 8, 9, 21, 22 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007f000000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000034000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "bfextu", TILEGX_OPC_BFEXTU, 0x1, 4, TREG_ZERO, 1,
+    { { 8, 9, 21, 22 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007f000000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000035000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "bfins", TILEGX_OPC_BFINS, 0x1, 4, TREG_ZERO, 1,
+    { { 23, 9, 21, 22 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007f000000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000036000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "bgez", TILEGX_OPC_BGEZ, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 7, 20 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xffc0000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x14c0000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "bgezt", TILEGX_OPC_BGEZT, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 7, 20 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xffc0000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1480000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "bgtz", TILEGX_OPC_BGTZ, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 7, 20 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xffc0000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1540000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "bgtzt", TILEGX_OPC_BGTZT, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 7, 20 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xffc0000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1500000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "blbc", TILEGX_OPC_BLBC, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 7, 20 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xffc0000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x15c0000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "blbct", TILEGX_OPC_BLBCT, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 7, 20 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xffc0000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1580000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "blbs", TILEGX_OPC_BLBS, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 7, 20 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xffc0000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1640000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "blbst", TILEGX_OPC_BLBST, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 7, 20 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xffc0000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1600000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "blez", TILEGX_OPC_BLEZ, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 7, 20 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xffc0000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x16c0000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "blezt", TILEGX_OPC_BLEZT, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 7, 20 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xffc0000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1680000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "bltz", TILEGX_OPC_BLTZ, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 7, 20 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xffc0000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1740000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "bltzt", TILEGX_OPC_BLTZT, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 7, 20 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xffc0000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1700000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "bnez", TILEGX_OPC_BNEZ, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 7, 20 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xffc0000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x17c0000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "bnezt", TILEGX_OPC_BNEZT, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 7, 20 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xffc0000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1780000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "clz", TILEGX_OPC_CLZ, 0x5, 2, TREG_ZERO, 1,
+    { { 8, 9 }, { 0, }, { 10, 11 }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffff000ULL,
+      0ULL,
+      0x00000000780ff000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051481000ULL,
+      -1ULL,
+      0x00000000300c1000ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "cmoveqz", TILEGX_OPC_CMOVEQZ, 0x5, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 24, 11, 18 }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0x00000000780c0000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050140000ULL,
+      -1ULL,
+      0x0000000048000000ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "cmovnez", TILEGX_OPC_CMOVNEZ, 0x5, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 24, 11, 18 }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0x00000000780c0000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050180000ULL,
+      -1ULL,
+      0x0000000048040000ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "cmpeq", TILEGX_OPC_CMPEQ, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x00000000501c0000ULL,
+      0x280a000000000000ULL,
+      0x0000000040000000ULL,
+      0x2404000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "cmpeqi", TILEGX_OPC_CMPEQI, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 0 }, { 6, 7, 1 }, { 10, 11, 2 }, { 12, 13, 3 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0x0000000078000000ULL,
+      0x3c00000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000040400000ULL,
+      0x1820000000000000ULL,
+      0x0000000018000000ULL,
+      0x1000000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "cmpexch", TILEGX_OPC_CMPEXCH, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x280e000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "cmpexch4", TILEGX_OPC_CMPEXCH4, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x280c000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "cmples", TILEGX_OPC_CMPLES, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000050200000ULL,
+      0x2810000000000000ULL,
+      0x0000000038000000ULL,
+      0x2000000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "cmpleu", TILEGX_OPC_CMPLEU, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000050240000ULL,
+      0x2812000000000000ULL,
+      0x0000000038040000ULL,
+      0x2002000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "cmplts", TILEGX_OPC_CMPLTS, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000050280000ULL,
+      0x2814000000000000ULL,
+      0x0000000038080000ULL,
+      0x2004000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "cmpltsi", TILEGX_OPC_CMPLTSI, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 0 }, { 6, 7, 1 }, { 10, 11, 2 }, { 12, 13, 3 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0x0000000078000000ULL,
+      0x3c00000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000040500000ULL,
+      0x1828000000000000ULL,
+      0x0000000020000000ULL,
+      0x1400000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "cmpltu", TILEGX_OPC_CMPLTU, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x00000000502c0000ULL,
+      0x2816000000000000ULL,
+      0x00000000380c0000ULL,
+      0x2006000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "cmpltui", TILEGX_OPC_CMPLTUI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 0 }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000040600000ULL,
+      0x1830000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "cmpne", TILEGX_OPC_CMPNE, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000050300000ULL,
+      0x2818000000000000ULL,
+      0x0000000040040000ULL,
+      0x2406000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "cmul", TILEGX_OPC_CMUL, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000504c0000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "cmula", TILEGX_OPC_CMULA, 0x1, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050380000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "cmulaf", TILEGX_OPC_CMULAF, 0x1, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050340000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "cmulf", TILEGX_OPC_CMULF, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050400000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "cmulfr", TILEGX_OPC_CMULFR, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000503c0000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "cmulh", TILEGX_OPC_CMULH, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050480000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "cmulhr", TILEGX_OPC_CMULHR, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050440000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "crc32_32", TILEGX_OPC_CRC32_32, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050500000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "crc32_8", TILEGX_OPC_CRC32_8, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050540000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ctz", TILEGX_OPC_CTZ, 0x5, 2, TREG_ZERO, 1,
+    { { 8, 9 }, { 0, }, { 10, 11 }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffff000ULL,
+      0ULL,
+      0x00000000780ff000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051482000ULL,
+      -1ULL,
+      0x00000000300c2000ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "dblalign", TILEGX_OPC_DBLALIGN, 0x1, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050640000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "dblalign2", TILEGX_OPC_DBLALIGN2, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050580000ULL,
+      0x281a000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "dblalign4", TILEGX_OPC_DBLALIGN4, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000505c0000ULL,
+      0x281c000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "dblalign6", TILEGX_OPC_DBLALIGN6, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050600000ULL,
+      0x281e000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "drain", TILEGX_OPC_DRAIN, 0x2, 0, TREG_ZERO, 0,
+    { { 0, }, {  }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286a080000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "dtlbpr", TILEGX_OPC_DTLBPR, 0x2, 1, TREG_ZERO, 1,
+    { { 0, }, { 7 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286a100000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "exch", TILEGX_OPC_EXCH, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x2822000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "exch4", TILEGX_OPC_EXCH4, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x2820000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "fdouble_add_flags", TILEGX_OPC_FDOUBLE_ADD_FLAGS, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000506c0000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "fdouble_addsub", TILEGX_OPC_FDOUBLE_ADDSUB, 0x1, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050680000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "fdouble_mul_flags", TILEGX_OPC_FDOUBLE_MUL_FLAGS, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050700000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "fdouble_pack1", TILEGX_OPC_FDOUBLE_PACK1, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050740000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "fdouble_pack2", TILEGX_OPC_FDOUBLE_PACK2, 0x1, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050780000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "fdouble_sub_flags", TILEGX_OPC_FDOUBLE_SUB_FLAGS, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000507c0000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "fdouble_unpack_max", TILEGX_OPC_FDOUBLE_UNPACK_MAX, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050800000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "fdouble_unpack_min", TILEGX_OPC_FDOUBLE_UNPACK_MIN, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050840000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "fetchadd", TILEGX_OPC_FETCHADD, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x282a000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "fetchadd4", TILEGX_OPC_FETCHADD4, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x2824000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "fetchaddgez", TILEGX_OPC_FETCHADDGEZ, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x2828000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "fetchaddgez4", TILEGX_OPC_FETCHADDGEZ4, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x2826000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "fetchand", TILEGX_OPC_FETCHAND, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x282e000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "fetchand4", TILEGX_OPC_FETCHAND4, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x282c000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "fetchor", TILEGX_OPC_FETCHOR, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x2832000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "fetchor4", TILEGX_OPC_FETCHOR4, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x2830000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "finv", TILEGX_OPC_FINV, 0x2, 1, TREG_ZERO, 1,
+    { { 0, }, { 7 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286a180000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "flush", TILEGX_OPC_FLUSH, 0x2, 1, TREG_ZERO, 1,
+    { { 0, }, { 7 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286a280000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "flushwb", TILEGX_OPC_FLUSHWB, 0x2, 0, TREG_ZERO, 1,
+    { { 0, }, {  }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286a200000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "fnop", TILEGX_OPC_FNOP, 0xf, 0, TREG_ZERO, 1,
+    { {  }, {  }, {  }, {  }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffff000ULL,
+      0xfffff80000000000ULL,
+      0x00000000780ff000ULL,
+      0x3c07f80000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000051483000ULL,
+      0x286a300000000000ULL,
+      0x00000000300c3000ULL,
+      0x1c06400000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "fsingle_add1", TILEGX_OPC_FSINGLE_ADD1, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050880000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "fsingle_addsub2", TILEGX_OPC_FSINGLE_ADDSUB2, 0x1, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000508c0000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "fsingle_mul1", TILEGX_OPC_FSINGLE_MUL1, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050900000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "fsingle_mul2", TILEGX_OPC_FSINGLE_MUL2, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050940000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "fsingle_pack1", TILEGX_OPC_FSINGLE_PACK1, 0x5, 2, TREG_ZERO, 1,
+    { { 8, 9 }, { 0, }, { 10, 11 }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffff000ULL,
+      0ULL,
+      0x00000000780ff000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051484000ULL,
+      -1ULL,
+      0x00000000300c4000ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "fsingle_pack2", TILEGX_OPC_FSINGLE_PACK2, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050980000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "fsingle_sub1", TILEGX_OPC_FSINGLE_SUB1, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000509c0000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "icoh", TILEGX_OPC_ICOH, 0x2, 1, TREG_ZERO, 1,
+    { { 0, }, { 7 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286a380000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ill", TILEGX_OPC_ILL, 0xa, 0, TREG_ZERO, 1,
+    { { 0, }, {  }, { 0, }, {  }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0x3c07f80000000000ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286a400000000000ULL,
+      -1ULL,
+      0x1c06480000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "inv", TILEGX_OPC_INV, 0x2, 1, TREG_ZERO, 1,
+    { { 0, }, { 7 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286a480000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "iret", TILEGX_OPC_IRET, 0x2, 0, TREG_ZERO, 1,
+    { { 0, }, {  }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286a500000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "j", TILEGX_OPC_J, 0x2, 1, TREG_ZERO, 1,
+    { { 0, }, { 25 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfc00000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x2400000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "jal", TILEGX_OPC_JAL, 0x2, 1, TREG_LR, 1,
+    { { 0, }, { 25 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfc00000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x2000000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "jalr", TILEGX_OPC_JALR, 0xa, 1, TREG_LR, 1,
+    { { 0, }, { 7 }, { 0, }, { 13 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0x3c07f80000000000ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286a600000000000ULL,
+      -1ULL,
+      0x1c06580000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "jalrp", TILEGX_OPC_JALRP, 0xa, 1, TREG_LR, 1,
+    { { 0, }, { 7 }, { 0, }, { 13 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0x3c07f80000000000ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286a580000000000ULL,
+      -1ULL,
+      0x1c06500000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "jr", TILEGX_OPC_JR, 0xa, 1, TREG_ZERO, 1,
+    { { 0, }, { 7 }, { 0, }, { 13 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0x3c07f80000000000ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286a700000000000ULL,
+      -1ULL,
+      0x1c06680000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "jrp", TILEGX_OPC_JRP, 0xa, 1, TREG_ZERO, 1,
+    { { 0, }, { 7 }, { 0, }, { 13 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0x3c07f80000000000ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286a680000000000ULL,
+      -1ULL,
+      0x1c06600000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ld", TILEGX_OPC_LD, 0x12, 2, TREG_ZERO, 1,
+    { { 0, }, { 6, 7 }, { 0, }, { 0, }, { 26, 14 } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0xc200000004000000ULL
+    },
+    {
+      -1ULL,
+      0x286ae80000000000ULL,
+      -1ULL,
+      -1ULL,
+      0x8200000004000000ULL
+    }
+#endif
+  },
+  { "ld1s", TILEGX_OPC_LD1S, 0x12, 2, TREG_ZERO, 1,
+    { { 0, }, { 6, 7 }, { 0, }, { 0, }, { 26, 14 } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0xc200000004000000ULL
+    },
+    {
+      -1ULL,
+      0x286a780000000000ULL,
+      -1ULL,
+      -1ULL,
+      0x4000000000000000ULL
+    }
+#endif
+  },
+  { "ld1s_add", TILEGX_OPC_LD1S_ADD, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 15, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1838000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ld1u", TILEGX_OPC_LD1U, 0x12, 2, TREG_ZERO, 1,
+    { { 0, }, { 6, 7 }, { 0, }, { 0, }, { 26, 14 } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0xc200000004000000ULL
+    },
+    {
+      -1ULL,
+      0x286a800000000000ULL,
+      -1ULL,
+      -1ULL,
+      0x4000000004000000ULL
+    }
+#endif
+  },
+  { "ld1u_add", TILEGX_OPC_LD1U_ADD, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 15, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1840000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ld2s", TILEGX_OPC_LD2S, 0x12, 2, TREG_ZERO, 1,
+    { { 0, }, { 6, 7 }, { 0, }, { 0, }, { 26, 14 } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0xc200000004000000ULL
+    },
+    {
+      -1ULL,
+      0x286a880000000000ULL,
+      -1ULL,
+      -1ULL,
+      0x4200000000000000ULL
+    }
+#endif
+  },
+  { "ld2s_add", TILEGX_OPC_LD2S_ADD, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 15, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1848000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ld2u", TILEGX_OPC_LD2U, 0x12, 2, TREG_ZERO, 1,
+    { { 0, }, { 6, 7 }, { 0, }, { 0, }, { 26, 14 } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0xc200000004000000ULL
+    },
+    {
+      -1ULL,
+      0x286a900000000000ULL,
+      -1ULL,
+      -1ULL,
+      0x4200000004000000ULL
+    }
+#endif
+  },
+  { "ld2u_add", TILEGX_OPC_LD2U_ADD, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 15, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1850000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ld4s", TILEGX_OPC_LD4S, 0x12, 2, TREG_ZERO, 1,
+    { { 0, }, { 6, 7 }, { 0, }, { 0, }, { 26, 14 } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0xc200000004000000ULL
+    },
+    {
+      -1ULL,
+      0x286a980000000000ULL,
+      -1ULL,
+      -1ULL,
+      0x8000000004000000ULL
+    }
+#endif
+  },
+  { "ld4s_add", TILEGX_OPC_LD4S_ADD, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 15, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1858000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ld4u", TILEGX_OPC_LD4U, 0x12, 2, TREG_ZERO, 1,
+    { { 0, }, { 6, 7 }, { 0, }, { 0, }, { 26, 14 } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0xc200000004000000ULL
+    },
+    {
+      -1ULL,
+      0x286aa00000000000ULL,
+      -1ULL,
+      -1ULL,
+      0x8200000000000000ULL
+    }
+#endif
+  },
+  { "ld4u_add", TILEGX_OPC_LD4U_ADD, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 15, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1860000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ld_add", TILEGX_OPC_LD_ADD, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 15, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x18a0000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ldna", TILEGX_OPC_LDNA, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 6, 7 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286aa80000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ldna_add", TILEGX_OPC_LDNA_ADD, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 15, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x18a8000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ldnt", TILEGX_OPC_LDNT, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 6, 7 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286ae00000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ldnt1s", TILEGX_OPC_LDNT1S, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 6, 7 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286ab00000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ldnt1s_add", TILEGX_OPC_LDNT1S_ADD, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 15, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1868000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ldnt1u", TILEGX_OPC_LDNT1U, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 6, 7 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286ab80000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ldnt1u_add", TILEGX_OPC_LDNT1U_ADD, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 15, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1870000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ldnt2s", TILEGX_OPC_LDNT2S, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 6, 7 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286ac00000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ldnt2s_add", TILEGX_OPC_LDNT2S_ADD, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 15, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1878000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ldnt2u", TILEGX_OPC_LDNT2U, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 6, 7 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286ac80000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ldnt2u_add", TILEGX_OPC_LDNT2U_ADD, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 15, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1880000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ldnt4s", TILEGX_OPC_LDNT4S, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 6, 7 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286ad00000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ldnt4s_add", TILEGX_OPC_LDNT4S_ADD, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 15, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1888000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ldnt4u", TILEGX_OPC_LDNT4U, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 6, 7 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286ad80000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ldnt4u_add", TILEGX_OPC_LDNT4U_ADD, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 15, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1890000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ldnt_add", TILEGX_OPC_LDNT_ADD, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 15, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1898000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "lnk", TILEGX_OPC_LNK, 0xa, 1, TREG_ZERO, 1,
+    { { 0, }, { 6 }, { 0, }, { 12 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0x3c07f80000000000ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286af00000000000ULL,
+      -1ULL,
+      0x1c06700000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mf", TILEGX_OPC_MF, 0x2, 0, TREG_ZERO, 1,
+    { { 0, }, {  }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286af80000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mfspr", TILEGX_OPC_MFSPR, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 6, 27 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x18b0000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mm", TILEGX_OPC_MM, 0x1, 4, TREG_ZERO, 1,
+    { { 23, 9, 21, 22 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007f000000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000037000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mnz", TILEGX_OPC_MNZ, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000050a00000ULL,
+      0x2834000000000000ULL,
+      0x0000000048080000ULL,
+      0x2804000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mtspr", TILEGX_OPC_MTSPR, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 28, 7 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x18b8000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mul_hs_hs", TILEGX_OPC_MUL_HS_HS, 0x5, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 10, 11, 18 }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0x00000000780c0000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050d40000ULL,
+      -1ULL,
+      0x0000000068000000ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mul_hs_hu", TILEGX_OPC_MUL_HS_HU, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050d80000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mul_hs_ls", TILEGX_OPC_MUL_HS_LS, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050dc0000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mul_hs_lu", TILEGX_OPC_MUL_HS_LU, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050e00000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mul_hu_hu", TILEGX_OPC_MUL_HU_HU, 0x5, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 10, 11, 18 }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0x00000000780c0000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050e40000ULL,
+      -1ULL,
+      0x0000000068040000ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mul_hu_ls", TILEGX_OPC_MUL_HU_LS, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050e80000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mul_hu_lu", TILEGX_OPC_MUL_HU_LU, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050ec0000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mul_ls_ls", TILEGX_OPC_MUL_LS_LS, 0x5, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 10, 11, 18 }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0x00000000780c0000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050f00000ULL,
+      -1ULL,
+      0x0000000068080000ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mul_ls_lu", TILEGX_OPC_MUL_LS_LU, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050f40000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mul_lu_lu", TILEGX_OPC_MUL_LU_LU, 0x5, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 10, 11, 18 }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0x00000000780c0000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050f80000ULL,
+      -1ULL,
+      0x00000000680c0000ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mula_hs_hs", TILEGX_OPC_MULA_HS_HS, 0x5, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 24, 11, 18 }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0x00000000780c0000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050a80000ULL,
+      -1ULL,
+      0x0000000070000000ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mula_hs_hu", TILEGX_OPC_MULA_HS_HU, 0x1, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050ac0000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mula_hs_ls", TILEGX_OPC_MULA_HS_LS, 0x1, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050b00000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mula_hs_lu", TILEGX_OPC_MULA_HS_LU, 0x1, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050b40000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mula_hu_hu", TILEGX_OPC_MULA_HU_HU, 0x5, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 24, 11, 18 }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0x00000000780c0000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050b80000ULL,
+      -1ULL,
+      0x0000000070040000ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mula_hu_ls", TILEGX_OPC_MULA_HU_LS, 0x1, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050bc0000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mula_hu_lu", TILEGX_OPC_MULA_HU_LU, 0x1, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050c00000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mula_ls_ls", TILEGX_OPC_MULA_LS_LS, 0x5, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 24, 11, 18 }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0x00000000780c0000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050c40000ULL,
+      -1ULL,
+      0x0000000070080000ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mula_ls_lu", TILEGX_OPC_MULA_LS_LU, 0x1, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050c80000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mula_lu_lu", TILEGX_OPC_MULA_LU_LU, 0x5, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 24, 11, 18 }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0x00000000780c0000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050cc0000ULL,
+      -1ULL,
+      0x00000000700c0000ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mulax", TILEGX_OPC_MULAX, 0x5, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 24, 11, 18 }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0x00000000780c0000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050a40000ULL,
+      -1ULL,
+      0x0000000040080000ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mulx", TILEGX_OPC_MULX, 0x5, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 10, 11, 18 }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0x00000000780c0000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050d00000ULL,
+      -1ULL,
+      0x00000000400c0000ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mz", TILEGX_OPC_MZ, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000050fc0000ULL,
+      0x2836000000000000ULL,
+      0x00000000480c0000ULL,
+      0x2806000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "nap", TILEGX_OPC_NAP, 0x2, 0, TREG_ZERO, 0,
+    { { 0, }, {  }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286b000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "nop", TILEGX_OPC_NOP, 0xf, 0, TREG_ZERO, 1,
+    { {  }, {  }, {  }, {  }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffff000ULL,
+      0xfffff80000000000ULL,
+      0x00000000780ff000ULL,
+      0x3c07f80000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000051485000ULL,
+      0x286b080000000000ULL,
+      0x00000000300c5000ULL,
+      0x1c06780000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "nor", TILEGX_OPC_NOR, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000051000000ULL,
+      0x2838000000000000ULL,
+      0x0000000050040000ULL,
+      0x2c02000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "or", TILEGX_OPC_OR, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000051040000ULL,
+      0x283a000000000000ULL,
+      0x0000000050080000ULL,
+      0x2c04000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ori", TILEGX_OPC_ORI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 0 }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000040700000ULL,
+      0x18c0000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "pcnt", TILEGX_OPC_PCNT, 0x5, 2, TREG_ZERO, 1,
+    { { 8, 9 }, { 0, }, { 10, 11 }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffff000ULL,
+      0ULL,
+      0x00000000780ff000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051486000ULL,
+      -1ULL,
+      0x00000000300c6000ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "revbits", TILEGX_OPC_REVBITS, 0x5, 2, TREG_ZERO, 1,
+    { { 8, 9 }, { 0, }, { 10, 11 }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffff000ULL,
+      0ULL,
+      0x00000000780ff000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051487000ULL,
+      -1ULL,
+      0x00000000300c7000ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "revbytes", TILEGX_OPC_REVBYTES, 0x5, 2, TREG_ZERO, 1,
+    { { 8, 9 }, { 0, }, { 10, 11 }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffff000ULL,
+      0ULL,
+      0x00000000780ff000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051488000ULL,
+      -1ULL,
+      0x00000000300c8000ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "rotl", TILEGX_OPC_ROTL, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000051080000ULL,
+      0x283c000000000000ULL,
+      0x0000000058000000ULL,
+      0x3000000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "rotli", TILEGX_OPC_ROTLI, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 29 }, { 6, 7, 30 }, { 10, 11, 31 }, { 12, 13, 32 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000060040000ULL,
+      0x3002000000000000ULL,
+      0x0000000078000000ULL,
+      0x3800000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "shl", TILEGX_OPC_SHL, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000051280000ULL,
+      0x284c000000000000ULL,
+      0x0000000058040000ULL,
+      0x3002000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "shl16insli", TILEGX_OPC_SHL16INSLI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 4 }, { 6, 7, 5 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc000000070000000ULL,
+      0xf800000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000070000000ULL,
+      0x3800000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "shl1add", TILEGX_OPC_SHL1ADD, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000051100000ULL,
+      0x2840000000000000ULL,
+      0x0000000030000000ULL,
+      0x1c00000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "shl1addx", TILEGX_OPC_SHL1ADDX, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x00000000510c0000ULL,
+      0x283e000000000000ULL,
+      0x0000000060040000ULL,
+      0x3402000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "shl2add", TILEGX_OPC_SHL2ADD, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000051180000ULL,
+      0x2844000000000000ULL,
+      0x0000000030040000ULL,
+      0x1c02000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "shl2addx", TILEGX_OPC_SHL2ADDX, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000051140000ULL,
+      0x2842000000000000ULL,
+      0x0000000060080000ULL,
+      0x3404000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "shl3add", TILEGX_OPC_SHL3ADD, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000051200000ULL,
+      0x2848000000000000ULL,
+      0x0000000030080000ULL,
+      0x1c04000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "shl3addx", TILEGX_OPC_SHL3ADDX, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x00000000511c0000ULL,
+      0x2846000000000000ULL,
+      0x00000000600c0000ULL,
+      0x3406000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "shli", TILEGX_OPC_SHLI, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 29 }, { 6, 7, 30 }, { 10, 11, 31 }, { 12, 13, 32 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000060080000ULL,
+      0x3004000000000000ULL,
+      0x0000000078040000ULL,
+      0x3802000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "shlx", TILEGX_OPC_SHLX, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051240000ULL,
+      0x284a000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "shlxi", TILEGX_OPC_SHLXI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 29 }, { 6, 7, 30 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000600c0000ULL,
+      0x3006000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "shrs", TILEGX_OPC_SHRS, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x00000000512c0000ULL,
+      0x284e000000000000ULL,
+      0x0000000058080000ULL,
+      0x3004000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "shrsi", TILEGX_OPC_SHRSI, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 29 }, { 6, 7, 30 }, { 10, 11, 31 }, { 12, 13, 32 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000060100000ULL,
+      0x3008000000000000ULL,
+      0x0000000078080000ULL,
+      0x3804000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "shru", TILEGX_OPC_SHRU, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000051340000ULL,
+      0x2852000000000000ULL,
+      0x00000000580c0000ULL,
+      0x3006000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "shrui", TILEGX_OPC_SHRUI, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 29 }, { 6, 7, 30 }, { 10, 11, 31 }, { 12, 13, 32 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000060140000ULL,
+      0x300a000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3806000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "shrux", TILEGX_OPC_SHRUX, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051300000ULL,
+      0x2850000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "shruxi", TILEGX_OPC_SHRUXI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 29 }, { 6, 7, 30 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000060180000ULL,
+      0x300c000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "shufflebytes", TILEGX_OPC_SHUFFLEBYTES, 0x1, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051380000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "st", TILEGX_OPC_ST, 0x12, 2, TREG_ZERO, 1,
+    { { 0, }, { 7, 17 }, { 0, }, { 0, }, { 14, 33 } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0xc200000004000000ULL
+    },
+    {
+      -1ULL,
+      0x2862000000000000ULL,
+      -1ULL,
+      -1ULL,
+      0xc200000004000000ULL
+    }
+#endif
+  },
+  { "st1", TILEGX_OPC_ST1, 0x12, 2, TREG_ZERO, 1,
+    { { 0, }, { 7, 17 }, { 0, }, { 0, }, { 14, 33 } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0xc200000004000000ULL
+    },
+    {
+      -1ULL,
+      0x2854000000000000ULL,
+      -1ULL,
+      -1ULL,
+      0xc000000000000000ULL
+    }
+#endif
+  },
+  { "st1_add", TILEGX_OPC_ST1_ADD, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x18c8000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "st2", TILEGX_OPC_ST2, 0x12, 2, TREG_ZERO, 1,
+    { { 0, }, { 7, 17 }, { 0, }, { 0, }, { 14, 33 } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0xc200000004000000ULL
+    },
+    {
+      -1ULL,
+      0x2856000000000000ULL,
+      -1ULL,
+      -1ULL,
+      0xc000000004000000ULL
+    }
+#endif
+  },
+  { "st2_add", TILEGX_OPC_ST2_ADD, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x18d0000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "st4", TILEGX_OPC_ST4, 0x12, 2, TREG_ZERO, 1,
+    { { 0, }, { 7, 17 }, { 0, }, { 0, }, { 14, 33 } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0xc200000004000000ULL
+    },
+    {
+      -1ULL,
+      0x2858000000000000ULL,
+      -1ULL,
+      -1ULL,
+      0xc200000000000000ULL
+    }
+#endif
+  },
+  { "st4_add", TILEGX_OPC_ST4_ADD, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x18d8000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "st_add", TILEGX_OPC_ST_ADD, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1900000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "stnt", TILEGX_OPC_STNT, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x2860000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "stnt1", TILEGX_OPC_STNT1, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x285a000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "stnt1_add", TILEGX_OPC_STNT1_ADD, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x18e0000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "stnt2", TILEGX_OPC_STNT2, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x285c000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "stnt2_add", TILEGX_OPC_STNT2_ADD, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x18e8000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "stnt4", TILEGX_OPC_STNT4, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x285e000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "stnt4_add", TILEGX_OPC_STNT4_ADD, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x18f0000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "stnt_add", TILEGX_OPC_STNT_ADD, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x18f8000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "sub", TILEGX_OPC_SUB, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000051440000ULL,
+      0x2868000000000000ULL,
+      0x00000000280c0000ULL,
+      0x1806000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "subx", TILEGX_OPC_SUBX, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000051400000ULL,
+      0x2866000000000000ULL,
+      0x0000000028080000ULL,
+      0x1804000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "subxsc", TILEGX_OPC_SUBXSC, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000513c0000ULL,
+      0x2864000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "swint0", TILEGX_OPC_SWINT0, 0x2, 0, TREG_ZERO, 0,
+    { { 0, }, {  }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286b100000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "swint1", TILEGX_OPC_SWINT1, 0x2, 0, TREG_ZERO, 0,
+    { { 0, }, {  }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286b180000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "swint2", TILEGX_OPC_SWINT2, 0x2, 0, TREG_ZERO, 0,
+    { { 0, }, {  }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286b200000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "swint3", TILEGX_OPC_SWINT3, 0x2, 0, TREG_ZERO, 0,
+    { { 0, }, {  }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286b280000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "tblidxb0", TILEGX_OPC_TBLIDXB0, 0x5, 2, TREG_ZERO, 1,
+    { { 23, 9 }, { 0, }, { 24, 11 }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffff000ULL,
+      0ULL,
+      0x00000000780ff000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051489000ULL,
+      -1ULL,
+      0x00000000300c9000ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "tblidxb1", TILEGX_OPC_TBLIDXB1, 0x5, 2, TREG_ZERO, 1,
+    { { 23, 9 }, { 0, }, { 24, 11 }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffff000ULL,
+      0ULL,
+      0x00000000780ff000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x000000005148a000ULL,
+      -1ULL,
+      0x00000000300ca000ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "tblidxb2", TILEGX_OPC_TBLIDXB2, 0x5, 2, TREG_ZERO, 1,
+    { { 23, 9 }, { 0, }, { 24, 11 }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffff000ULL,
+      0ULL,
+      0x00000000780ff000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x000000005148b000ULL,
+      -1ULL,
+      0x00000000300cb000ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "tblidxb3", TILEGX_OPC_TBLIDXB3, 0x5, 2, TREG_ZERO, 1,
+    { { 23, 9 }, { 0, }, { 24, 11 }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffff000ULL,
+      0ULL,
+      0x00000000780ff000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x000000005148c000ULL,
+      -1ULL,
+      0x00000000300cc000ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1add", TILEGX_OPC_V1ADD, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051500000ULL,
+      0x286e000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1addi", TILEGX_OPC_V1ADDI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 0 }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000040800000ULL,
+      0x1908000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1adduc", TILEGX_OPC_V1ADDUC, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000514c0000ULL,
+      0x286c000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1adiffu", TILEGX_OPC_V1ADIFFU, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051540000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1avgu", TILEGX_OPC_V1AVGU, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051580000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1cmpeq", TILEGX_OPC_V1CMPEQ, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000515c0000ULL,
+      0x2870000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1cmpeqi", TILEGX_OPC_V1CMPEQI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 0 }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000040900000ULL,
+      0x1910000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1cmples", TILEGX_OPC_V1CMPLES, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051600000ULL,
+      0x2872000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1cmpleu", TILEGX_OPC_V1CMPLEU, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051640000ULL,
+      0x2874000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1cmplts", TILEGX_OPC_V1CMPLTS, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051680000ULL,
+      0x2876000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1cmpltsi", TILEGX_OPC_V1CMPLTSI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 0 }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000040a00000ULL,
+      0x1918000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1cmpltu", TILEGX_OPC_V1CMPLTU, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000516c0000ULL,
+      0x2878000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1cmpltui", TILEGX_OPC_V1CMPLTUI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 0 }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000040b00000ULL,
+      0x1920000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1cmpne", TILEGX_OPC_V1CMPNE, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051700000ULL,
+      0x287a000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1ddotpu", TILEGX_OPC_V1DDOTPU, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052880000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1ddotpua", TILEGX_OPC_V1DDOTPUA, 0x1, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052840000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1ddotpus", TILEGX_OPC_V1DDOTPUS, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051780000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1ddotpusa", TILEGX_OPC_V1DDOTPUSA, 0x1, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051740000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1dotp", TILEGX_OPC_V1DOTP, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051880000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1dotpa", TILEGX_OPC_V1DOTPA, 0x1, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000517c0000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1dotpu", TILEGX_OPC_V1DOTPU, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052900000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1dotpua", TILEGX_OPC_V1DOTPUA, 0x1, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000528c0000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1dotpus", TILEGX_OPC_V1DOTPUS, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051840000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1dotpusa", TILEGX_OPC_V1DOTPUSA, 0x1, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051800000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1int_h", TILEGX_OPC_V1INT_H, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000518c0000ULL,
+      0x287c000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1int_l", TILEGX_OPC_V1INT_L, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051900000ULL,
+      0x287e000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1maxu", TILEGX_OPC_V1MAXU, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051940000ULL,
+      0x2880000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1maxui", TILEGX_OPC_V1MAXUI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 0 }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000040c00000ULL,
+      0x1928000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1minu", TILEGX_OPC_V1MINU, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051980000ULL,
+      0x2882000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1minui", TILEGX_OPC_V1MINUI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 0 }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000040d00000ULL,
+      0x1930000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1mnz", TILEGX_OPC_V1MNZ, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000519c0000ULL,
+      0x2884000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1multu", TILEGX_OPC_V1MULTU, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051a00000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1mulu", TILEGX_OPC_V1MULU, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051a80000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1mulus", TILEGX_OPC_V1MULUS, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051a40000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1mz", TILEGX_OPC_V1MZ, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051ac0000ULL,
+      0x2886000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1sadau", TILEGX_OPC_V1SADAU, 0x1, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051b00000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1sadu", TILEGX_OPC_V1SADU, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051b40000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1shl", TILEGX_OPC_V1SHL, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051b80000ULL,
+      0x2888000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1shli", TILEGX_OPC_V1SHLI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 29 }, { 6, 7, 30 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000601c0000ULL,
+      0x300e000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1shrs", TILEGX_OPC_V1SHRS, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051bc0000ULL,
+      0x288a000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1shrsi", TILEGX_OPC_V1SHRSI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 29 }, { 6, 7, 30 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000060200000ULL,
+      0x3010000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1shru", TILEGX_OPC_V1SHRU, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051c00000ULL,
+      0x288c000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1shrui", TILEGX_OPC_V1SHRUI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 29 }, { 6, 7, 30 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000060240000ULL,
+      0x3012000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1sub", TILEGX_OPC_V1SUB, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051c80000ULL,
+      0x2890000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1subuc", TILEGX_OPC_V1SUBUC, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051c40000ULL,
+      0x288e000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2add", TILEGX_OPC_V2ADD, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051d00000ULL,
+      0x2894000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2addi", TILEGX_OPC_V2ADDI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 0 }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000040e00000ULL,
+      0x1938000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2addsc", TILEGX_OPC_V2ADDSC, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051cc0000ULL,
+      0x2892000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2adiffs", TILEGX_OPC_V2ADIFFS, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051d40000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2avgs", TILEGX_OPC_V2AVGS, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051d80000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2cmpeq", TILEGX_OPC_V2CMPEQ, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051dc0000ULL,
+      0x2896000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2cmpeqi", TILEGX_OPC_V2CMPEQI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 0 }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000040f00000ULL,
+      0x1940000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2cmples", TILEGX_OPC_V2CMPLES, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051e00000ULL,
+      0x2898000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2cmpleu", TILEGX_OPC_V2CMPLEU, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051e40000ULL,
+      0x289a000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2cmplts", TILEGX_OPC_V2CMPLTS, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051e80000ULL,
+      0x289c000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2cmpltsi", TILEGX_OPC_V2CMPLTSI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 0 }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000041000000ULL,
+      0x1948000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2cmpltu", TILEGX_OPC_V2CMPLTU, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051ec0000ULL,
+      0x289e000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2cmpltui", TILEGX_OPC_V2CMPLTUI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 0 }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000041100000ULL,
+      0x1950000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2cmpne", TILEGX_OPC_V2CMPNE, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051f00000ULL,
+      0x28a0000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2dotp", TILEGX_OPC_V2DOTP, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051f80000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2dotpa", TILEGX_OPC_V2DOTPA, 0x1, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051f40000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2int_h", TILEGX_OPC_V2INT_H, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051fc0000ULL,
+      0x28a2000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2int_l", TILEGX_OPC_V2INT_L, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052000000ULL,
+      0x28a4000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2maxs", TILEGX_OPC_V2MAXS, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052040000ULL,
+      0x28a6000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2maxsi", TILEGX_OPC_V2MAXSI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 0 }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000041200000ULL,
+      0x1958000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2mins", TILEGX_OPC_V2MINS, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052080000ULL,
+      0x28a8000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2minsi", TILEGX_OPC_V2MINSI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 0 }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000041300000ULL,
+      0x1960000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2mnz", TILEGX_OPC_V2MNZ, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000520c0000ULL,
+      0x28aa000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2mulfsc", TILEGX_OPC_V2MULFSC, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052100000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2muls", TILEGX_OPC_V2MULS, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052140000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2mults", TILEGX_OPC_V2MULTS, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052180000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2mz", TILEGX_OPC_V2MZ, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000521c0000ULL,
+      0x28ac000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2packh", TILEGX_OPC_V2PACKH, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052200000ULL,
+      0x28ae000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2packl", TILEGX_OPC_V2PACKL, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052240000ULL,
+      0x28b0000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2packuc", TILEGX_OPC_V2PACKUC, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052280000ULL,
+      0x28b2000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2sadas", TILEGX_OPC_V2SADAS, 0x1, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000522c0000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2sadau", TILEGX_OPC_V2SADAU, 0x1, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052300000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2sads", TILEGX_OPC_V2SADS, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052340000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2sadu", TILEGX_OPC_V2SADU, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052380000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2shl", TILEGX_OPC_V2SHL, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052400000ULL,
+      0x28b6000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2shli", TILEGX_OPC_V2SHLI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 29 }, { 6, 7, 30 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000060280000ULL,
+      0x3014000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2shlsc", TILEGX_OPC_V2SHLSC, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000523c0000ULL,
+      0x28b4000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2shrs", TILEGX_OPC_V2SHRS, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052440000ULL,
+      0x28b8000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2shrsi", TILEGX_OPC_V2SHRSI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 29 }, { 6, 7, 30 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000602c0000ULL,
+      0x3016000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2shru", TILEGX_OPC_V2SHRU, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052480000ULL,
+      0x28ba000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2shrui", TILEGX_OPC_V2SHRUI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 29 }, { 6, 7, 30 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000060300000ULL,
+      0x3018000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2sub", TILEGX_OPC_V2SUB, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052500000ULL,
+      0x28be000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2subsc", TILEGX_OPC_V2SUBSC, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000524c0000ULL,
+      0x28bc000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v4add", TILEGX_OPC_V4ADD, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052580000ULL,
+      0x28c2000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v4addsc", TILEGX_OPC_V4ADDSC, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052540000ULL,
+      0x28c0000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v4int_h", TILEGX_OPC_V4INT_H, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000525c0000ULL,
+      0x28c4000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v4int_l", TILEGX_OPC_V4INT_L, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052600000ULL,
+      0x28c6000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v4packsc", TILEGX_OPC_V4PACKSC, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052640000ULL,
+      0x28c8000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v4shl", TILEGX_OPC_V4SHL, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000526c0000ULL,
+      0x28cc000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v4shlsc", TILEGX_OPC_V4SHLSC, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052680000ULL,
+      0x28ca000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v4shrs", TILEGX_OPC_V4SHRS, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052700000ULL,
+      0x28ce000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v4shru", TILEGX_OPC_V4SHRU, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052740000ULL,
+      0x28d0000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v4sub", TILEGX_OPC_V4SUB, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000527c0000ULL,
+      0x28d4000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v4subsc", TILEGX_OPC_V4SUBSC, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052780000ULL,
+      0x28d2000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "wh64", TILEGX_OPC_WH64, 0x2, 1, TREG_ZERO, 1,
+    { { 0, }, { 7 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286b300000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "xor", TILEGX_OPC_XOR, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000052800000ULL,
+      0x28d6000000000000ULL,
+      0x00000000500c0000ULL,
+      0x2c06000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "xori", TILEGX_OPC_XORI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 0 }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000041400000ULL,
+      0x1968000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { NULL, TILEGX_OPC_NONE, 0, 0, TREG_ZERO, 0, { { 0, } },
+#ifndef DISASM_ONLY
+    { 0, }, { 0, }
+#endif
+  }
+};
+
+#define BITFIELD(start, size) ((start) | (((1 << (size)) - 1) << 6))
+#define CHILD(array_index) (TILEGX_OPC_NONE + (array_index))
+
+static const unsigned short decode_X0_fsm[936] =
+{
+  BITFIELD(22, 9) /* index 0 */,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+  CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+  CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+  CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+  CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+  CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+  CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+  CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+  CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+  CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+  CHILD(513), CHILD(513), CHILD(513), CHILD(513), TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_BFEXTS,
+  TILEGX_OPC_BFEXTS, TILEGX_OPC_BFEXTS, TILEGX_OPC_BFEXTS, TILEGX_OPC_BFEXTU,
+  TILEGX_OPC_BFEXTU, TILEGX_OPC_BFEXTU, TILEGX_OPC_BFEXTU, TILEGX_OPC_BFINS,
+  TILEGX_OPC_BFINS, TILEGX_OPC_BFINS, TILEGX_OPC_BFINS, TILEGX_OPC_MM,
+  TILEGX_OPC_MM, TILEGX_OPC_MM, TILEGX_OPC_MM, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, CHILD(528), CHILD(578),
+  CHILD(583), CHILD(588), CHILD(593), CHILD(598), TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, CHILD(603), CHILD(620), CHILD(637), CHILD(654), CHILD(671),
+  CHILD(703), CHILD(797), CHILD(814), CHILD(831), CHILD(848), CHILD(865),
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, CHILD(889), TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906),
+  CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906),
+  CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906),
+  CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906),
+  CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906),
+  CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906),
+  CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906),
+  CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906),
+  CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906),
+  CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906),
+  CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906),
+  BITFIELD(6, 2) /* index 513 */,
+  TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, CHILD(518),
+  BITFIELD(8, 2) /* index 518 */,
+  TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, CHILD(523),
+  BITFIELD(10, 2) /* index 523 */,
+  TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_MOVELI,
+  BITFIELD(20, 2) /* index 528 */,
+  TILEGX_OPC_NONE, CHILD(533), TILEGX_OPC_ADDXI, CHILD(548),
+  BITFIELD(6, 2) /* index 533 */,
+  TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(538),
+  BITFIELD(8, 2) /* index 538 */,
+  TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(543),
+  BITFIELD(10, 2) /* index 543 */,
+  TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_MOVEI,
+  BITFIELD(0, 2) /* index 548 */,
+  TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(553),
+  BITFIELD(2, 2) /* index 553 */,
+  TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(558),
+  BITFIELD(4, 2) /* index 558 */,
+  TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(563),
+  BITFIELD(6, 2) /* index 563 */,
+  TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(568),
+  BITFIELD(8, 2) /* index 568 */,
+  TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(573),
+  BITFIELD(10, 2) /* index 573 */,
+  TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_INFO,
+  BITFIELD(20, 2) /* index 578 */,
+  TILEGX_OPC_CMPEQI, TILEGX_OPC_CMPLTSI, TILEGX_OPC_CMPLTUI, TILEGX_OPC_ORI,
+  BITFIELD(20, 2) /* index 583 */,
+  TILEGX_OPC_V1ADDI, TILEGX_OPC_V1CMPEQI, TILEGX_OPC_V1CMPLTSI,
+  TILEGX_OPC_V1CMPLTUI,
+  BITFIELD(20, 2) /* index 588 */,
+  TILEGX_OPC_V1MAXUI, TILEGX_OPC_V1MINUI, TILEGX_OPC_V2ADDI,
+  TILEGX_OPC_V2CMPEQI,
+  BITFIELD(20, 2) /* index 593 */,
+  TILEGX_OPC_V2CMPLTSI, TILEGX_OPC_V2CMPLTUI, TILEGX_OPC_V2MAXSI,
+  TILEGX_OPC_V2MINSI,
+  BITFIELD(20, 2) /* index 598 */,
+  TILEGX_OPC_XORI, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  BITFIELD(18, 4) /* index 603 */,
+  TILEGX_OPC_NONE, TILEGX_OPC_ADDXSC, TILEGX_OPC_ADDX, TILEGX_OPC_ADD,
+  TILEGX_OPC_AND, TILEGX_OPC_CMOVEQZ, TILEGX_OPC_CMOVNEZ, TILEGX_OPC_CMPEQ,
+  TILEGX_OPC_CMPLES, TILEGX_OPC_CMPLEU, TILEGX_OPC_CMPLTS, TILEGX_OPC_CMPLTU,
+  TILEGX_OPC_CMPNE, TILEGX_OPC_CMULAF, TILEGX_OPC_CMULA, TILEGX_OPC_CMULFR,
+  BITFIELD(18, 4) /* index 620 */,
+  TILEGX_OPC_CMULF, TILEGX_OPC_CMULHR, TILEGX_OPC_CMULH, TILEGX_OPC_CMUL,
+  TILEGX_OPC_CRC32_32, TILEGX_OPC_CRC32_8, TILEGX_OPC_DBLALIGN2,
+  TILEGX_OPC_DBLALIGN4, TILEGX_OPC_DBLALIGN6, TILEGX_OPC_DBLALIGN,
+  TILEGX_OPC_FDOUBLE_ADDSUB, TILEGX_OPC_FDOUBLE_ADD_FLAGS,
+  TILEGX_OPC_FDOUBLE_MUL_FLAGS, TILEGX_OPC_FDOUBLE_PACK1,
+  TILEGX_OPC_FDOUBLE_PACK2, TILEGX_OPC_FDOUBLE_SUB_FLAGS,
+  BITFIELD(18, 4) /* index 637 */,
+  TILEGX_OPC_FDOUBLE_UNPACK_MAX, TILEGX_OPC_FDOUBLE_UNPACK_MIN,
+  TILEGX_OPC_FSINGLE_ADD1, TILEGX_OPC_FSINGLE_ADDSUB2,
+  TILEGX_OPC_FSINGLE_MUL1, TILEGX_OPC_FSINGLE_MUL2, TILEGX_OPC_FSINGLE_PACK2,
+  TILEGX_OPC_FSINGLE_SUB1, TILEGX_OPC_MNZ, TILEGX_OPC_MULAX,
+  TILEGX_OPC_MULA_HS_HS, TILEGX_OPC_MULA_HS_HU, TILEGX_OPC_MULA_HS_LS,
+  TILEGX_OPC_MULA_HS_LU, TILEGX_OPC_MULA_HU_HU, TILEGX_OPC_MULA_HU_LS,
+  BITFIELD(18, 4) /* index 654 */,
+  TILEGX_OPC_MULA_HU_LU, TILEGX_OPC_MULA_LS_LS, TILEGX_OPC_MULA_LS_LU,
+  TILEGX_OPC_MULA_LU_LU, TILEGX_OPC_MULX, TILEGX_OPC_MUL_HS_HS,
+  TILEGX_OPC_MUL_HS_HU, TILEGX_OPC_MUL_HS_LS, TILEGX_OPC_MUL_HS_LU,
+  TILEGX_OPC_MUL_HU_HU, TILEGX_OPC_MUL_HU_LS, TILEGX_OPC_MUL_HU_LU,
+  TILEGX_OPC_MUL_LS_LS, TILEGX_OPC_MUL_LS_LU, TILEGX_OPC_MUL_LU_LU,
+  TILEGX_OPC_MZ,
+  BITFIELD(18, 4) /* index 671 */,
+  TILEGX_OPC_NOR, CHILD(688), TILEGX_OPC_ROTL, TILEGX_OPC_SHL1ADDX,
+  TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL2ADDX, TILEGX_OPC_SHL2ADD,
+  TILEGX_OPC_SHL3ADDX, TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHLX, TILEGX_OPC_SHL,
+  TILEGX_OPC_SHRS, TILEGX_OPC_SHRUX, TILEGX_OPC_SHRU, TILEGX_OPC_SHUFFLEBYTES,
+  TILEGX_OPC_SUBXSC,
+  BITFIELD(12, 2) /* index 688 */,
+  TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(693),
+  BITFIELD(14, 2) /* index 693 */,
+  TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(698),
+  BITFIELD(16, 2) /* index 698 */,
+  TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_MOVE,
+  BITFIELD(18, 4) /* index 703 */,
+  TILEGX_OPC_SUBX, TILEGX_OPC_SUB, CHILD(720), TILEGX_OPC_V1ADDUC,
+  TILEGX_OPC_V1ADD, TILEGX_OPC_V1ADIFFU, TILEGX_OPC_V1AVGU,
+  TILEGX_OPC_V1CMPEQ, TILEGX_OPC_V1CMPLES, TILEGX_OPC_V1CMPLEU,
+  TILEGX_OPC_V1CMPLTS, TILEGX_OPC_V1CMPLTU, TILEGX_OPC_V1CMPNE,
+  TILEGX_OPC_V1DDOTPUSA, TILEGX_OPC_V1DDOTPUS, TILEGX_OPC_V1DOTPA,
+  BITFIELD(12, 4) /* index 720 */,
+  TILEGX_OPC_NONE, CHILD(737), CHILD(742), CHILD(747), CHILD(752), CHILD(757),
+  CHILD(762), CHILD(767), CHILD(772), CHILD(777), CHILD(782), CHILD(787),
+  CHILD(792), TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  BITFIELD(16, 2) /* index 737 */,
+  TILEGX_OPC_CLZ, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  BITFIELD(16, 2) /* index 742 */,
+  TILEGX_OPC_CTZ, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  BITFIELD(16, 2) /* index 747 */,
+  TILEGX_OPC_FNOP, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  BITFIELD(16, 2) /* index 752 */,
+  TILEGX_OPC_FSINGLE_PACK1, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  BITFIELD(16, 2) /* index 757 */,
+  TILEGX_OPC_NOP, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  BITFIELD(16, 2) /* index 762 */,
+  TILEGX_OPC_PCNT, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  BITFIELD(16, 2) /* index 767 */,
+  TILEGX_OPC_REVBITS, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  BITFIELD(16, 2) /* index 772 */,
+  TILEGX_OPC_REVBYTES, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  BITFIELD(16, 2) /* index 777 */,
+  TILEGX_OPC_TBLIDXB0, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  BITFIELD(16, 2) /* index 782 */,
+  TILEGX_OPC_TBLIDXB1, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  BITFIELD(16, 2) /* index 787 */,
+  TILEGX_OPC_TBLIDXB2, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  BITFIELD(16, 2) /* index 792 */,
+  TILEGX_OPC_TBLIDXB3, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  BITFIELD(18, 4) /* index 797 */,
+  TILEGX_OPC_V1DOTPUSA, TILEGX_OPC_V1DOTPUS, TILEGX_OPC_V1DOTP,
+  TILEGX_OPC_V1INT_H, TILEGX_OPC_V1INT_L, TILEGX_OPC_V1MAXU,
+  TILEGX_OPC_V1MINU, TILEGX_OPC_V1MNZ, TILEGX_OPC_V1MULTU, TILEGX_OPC_V1MULUS,
+  TILEGX_OPC_V1MULU, TILEGX_OPC_V1MZ, TILEGX_OPC_V1SADAU, TILEGX_OPC_V1SADU,
+  TILEGX_OPC_V1SHL, TILEGX_OPC_V1SHRS,
+  BITFIELD(18, 4) /* index 814 */,
+  TILEGX_OPC_V1SHRU, TILEGX_OPC_V1SUBUC, TILEGX_OPC_V1SUB, TILEGX_OPC_V2ADDSC,
+  TILEGX_OPC_V2ADD, TILEGX_OPC_V2ADIFFS, TILEGX_OPC_V2AVGS,
+  TILEGX_OPC_V2CMPEQ, TILEGX_OPC_V2CMPLES, TILEGX_OPC_V2CMPLEU,
+  TILEGX_OPC_V2CMPLTS, TILEGX_OPC_V2CMPLTU, TILEGX_OPC_V2CMPNE,
+  TILEGX_OPC_V2DOTPA, TILEGX_OPC_V2DOTP, TILEGX_OPC_V2INT_H,
+  BITFIELD(18, 4) /* index 831 */,
+  TILEGX_OPC_V2INT_L, TILEGX_OPC_V2MAXS, TILEGX_OPC_V2MINS, TILEGX_OPC_V2MNZ,
+  TILEGX_OPC_V2MULFSC, TILEGX_OPC_V2MULS, TILEGX_OPC_V2MULTS, TILEGX_OPC_V2MZ,
+  TILEGX_OPC_V2PACKH, TILEGX_OPC_V2PACKL, TILEGX_OPC_V2PACKUC,
+  TILEGX_OPC_V2SADAS, TILEGX_OPC_V2SADAU, TILEGX_OPC_V2SADS,
+  TILEGX_OPC_V2SADU, TILEGX_OPC_V2SHLSC,
+  BITFIELD(18, 4) /* index 848 */,
+  TILEGX_OPC_V2SHL, TILEGX_OPC_V2SHRS, TILEGX_OPC_V2SHRU, TILEGX_OPC_V2SUBSC,
+  TILEGX_OPC_V2SUB, TILEGX_OPC_V4ADDSC, TILEGX_OPC_V4ADD, TILEGX_OPC_V4INT_H,
+  TILEGX_OPC_V4INT_L, TILEGX_OPC_V4PACKSC, TILEGX_OPC_V4SHLSC,
+  TILEGX_OPC_V4SHL, TILEGX_OPC_V4SHRS, TILEGX_OPC_V4SHRU, TILEGX_OPC_V4SUBSC,
+  TILEGX_OPC_V4SUB,
+  BITFIELD(18, 3) /* index 865 */,
+  CHILD(874), CHILD(877), CHILD(880), CHILD(883), CHILD(886), TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  BITFIELD(21, 1) /* index 874 */,
+  TILEGX_OPC_XOR, TILEGX_OPC_NONE,
+  BITFIELD(21, 1) /* index 877 */,
+  TILEGX_OPC_V1DDOTPUA, TILEGX_OPC_NONE,
+  BITFIELD(21, 1) /* index 880 */,
+  TILEGX_OPC_V1DDOTPU, TILEGX_OPC_NONE,
+  BITFIELD(21, 1) /* index 883 */,
+  TILEGX_OPC_V1DOTPUA, TILEGX_OPC_NONE,
+  BITFIELD(21, 1) /* index 886 */,
+  TILEGX_OPC_V1DOTPU, TILEGX_OPC_NONE,
+  BITFIELD(18, 4) /* index 889 */,
+  TILEGX_OPC_NONE, TILEGX_OPC_ROTLI, TILEGX_OPC_SHLI, TILEGX_OPC_SHLXI,
+  TILEGX_OPC_SHRSI, TILEGX_OPC_SHRUI, TILEGX_OPC_SHRUXI, TILEGX_OPC_V1SHLI,
+  TILEGX_OPC_V1SHRSI, TILEGX_OPC_V1SHRUI, TILEGX_OPC_V2SHLI,
+  TILEGX_OPC_V2SHRSI, TILEGX_OPC_V2SHRUI, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE,
+  BITFIELD(0, 2) /* index 906 */,
+  TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI,
+  CHILD(911),
+  BITFIELD(2, 2) /* index 911 */,
+  TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI,
+  CHILD(916),
+  BITFIELD(4, 2) /* index 916 */,
+  TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI,
+  CHILD(921),
+  BITFIELD(6, 2) /* index 921 */,
+  TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI,
+  CHILD(926),
+  BITFIELD(8, 2) /* index 926 */,
+  TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI,
+  CHILD(931),
+  BITFIELD(10, 2) /* index 931 */,
+  TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI,
+  TILEGX_OPC_INFOL,
+};
+
+static const unsigned short decode_X1_fsm[1266] =
+{
+  BITFIELD(53, 9) /* index 0 */,
+  CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+  CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+  CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+  CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+  CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+  CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+  CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+  CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+  CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+  CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+  CHILD(513), CHILD(513), CHILD(513), CHILD(513), TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_BEQZT,
+  TILEGX_OPC_BEQZT, TILEGX_OPC_BEQZ, TILEGX_OPC_BEQZ, TILEGX_OPC_BGEZT,
+  TILEGX_OPC_BGEZT, TILEGX_OPC_BGEZ, TILEGX_OPC_BGEZ, TILEGX_OPC_BGTZT,
+  TILEGX_OPC_BGTZT, TILEGX_OPC_BGTZ, TILEGX_OPC_BGTZ, TILEGX_OPC_BLBCT,
+  TILEGX_OPC_BLBCT, TILEGX_OPC_BLBC, TILEGX_OPC_BLBC, TILEGX_OPC_BLBST,
+  TILEGX_OPC_BLBST, TILEGX_OPC_BLBS, TILEGX_OPC_BLBS, TILEGX_OPC_BLEZT,
+  TILEGX_OPC_BLEZT, TILEGX_OPC_BLEZ, TILEGX_OPC_BLEZ, TILEGX_OPC_BLTZT,
+  TILEGX_OPC_BLTZT, TILEGX_OPC_BLTZ, TILEGX_OPC_BLTZ, TILEGX_OPC_BNEZT,
+  TILEGX_OPC_BNEZT, TILEGX_OPC_BNEZ, TILEGX_OPC_BNEZ, CHILD(528), CHILD(578),
+  CHILD(598), CHILD(703), CHILD(723), CHILD(728), CHILD(753), CHILD(758),
+  CHILD(763), CHILD(768), CHILD(773), CHILD(778), TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_JAL,
+  TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL,
+  TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL,
+  TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL,
+  TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL,
+  TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL,
+  TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL,
+  TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL,
+  TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_J, TILEGX_OPC_J,
+  TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J,
+  TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J,
+  TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J,
+  TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J,
+  TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J,
+  TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J,
+  CHILD(783), CHILD(800), CHILD(832), CHILD(849), CHILD(1168), CHILD(1185),
+  CHILD(1202), TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, CHILD(1219), TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, CHILD(1236), CHILD(1236), CHILD(1236),
+  CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236),
+  CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236),
+  CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236),
+  CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236),
+  CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236),
+  CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236),
+  CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236),
+  CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236),
+  CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236),
+  CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236),
+  CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236),
+  CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236),
+  CHILD(1236),
+  BITFIELD(37, 2) /* index 513 */,
+  TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, CHILD(518),
+  BITFIELD(39, 2) /* index 518 */,
+  TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, CHILD(523),
+  BITFIELD(41, 2) /* index 523 */,
+  TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_MOVELI,
+  BITFIELD(51, 2) /* index 528 */,
+  TILEGX_OPC_NONE, CHILD(533), TILEGX_OPC_ADDXI, CHILD(548),
+  BITFIELD(37, 2) /* index 533 */,
+  TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(538),
+  BITFIELD(39, 2) /* index 538 */,
+  TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(543),
+  BITFIELD(41, 2) /* index 543 */,
+  TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_MOVEI,
+  BITFIELD(31, 2) /* index 548 */,
+  TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(553),
+  BITFIELD(33, 2) /* index 553 */,
+  TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(558),
+  BITFIELD(35, 2) /* index 558 */,
+  TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(563),
+  BITFIELD(37, 2) /* index 563 */,
+  TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(568),
+  BITFIELD(39, 2) /* index 568 */,
+  TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(573),
+  BITFIELD(41, 2) /* index 573 */,
+  TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_INFO,
+  BITFIELD(51, 2) /* index 578 */,
+  TILEGX_OPC_CMPEQI, TILEGX_OPC_CMPLTSI, TILEGX_OPC_CMPLTUI, CHILD(583),
+  BITFIELD(31, 2) /* index 583 */,
+  TILEGX_OPC_LD1S_ADD, TILEGX_OPC_LD1S_ADD, TILEGX_OPC_LD1S_ADD, CHILD(588),
+  BITFIELD(33, 2) /* index 588 */,
+  TILEGX_OPC_LD1S_ADD, TILEGX_OPC_LD1S_ADD, TILEGX_OPC_LD1S_ADD, CHILD(593),
+  BITFIELD(35, 2) /* index 593 */,
+  TILEGX_OPC_LD1S_ADD, TILEGX_OPC_LD1S_ADD, TILEGX_OPC_LD1S_ADD,
+  TILEGX_OPC_PREFETCH_ADD_L1_FAULT,
+  BITFIELD(51, 2) /* index 598 */,
+  CHILD(603), CHILD(618), CHILD(633), CHILD(648),
+  BITFIELD(31, 2) /* index 603 */,
+  TILEGX_OPC_LD1U_ADD, TILEGX_OPC_LD1U_ADD, TILEGX_OPC_LD1U_ADD, CHILD(608),
+  BITFIELD(33, 2) /* index 608 */,
+  TILEGX_OPC_LD1U_ADD, TILEGX_OPC_LD1U_ADD, TILEGX_OPC_LD1U_ADD, CHILD(613),
+  BITFIELD(35, 2) /* index 613 */,
+  TILEGX_OPC_LD1U_ADD, TILEGX_OPC_LD1U_ADD, TILEGX_OPC_LD1U_ADD,
+  TILEGX_OPC_PREFETCH_ADD_L1,
+  BITFIELD(31, 2) /* index 618 */,
+  TILEGX_OPC_LD2S_ADD, TILEGX_OPC_LD2S_ADD, TILEGX_OPC_LD2S_ADD, CHILD(623),
+  BITFIELD(33, 2) /* index 623 */,
+  TILEGX_OPC_LD2S_ADD, TILEGX_OPC_LD2S_ADD, TILEGX_OPC_LD2S_ADD, CHILD(628),
+  BITFIELD(35, 2) /* index 628 */,
+  TILEGX_OPC_LD2S_ADD, TILEGX_OPC_LD2S_ADD, TILEGX_OPC_LD2S_ADD,
+  TILEGX_OPC_PREFETCH_ADD_L2_FAULT,
+  BITFIELD(31, 2) /* index 633 */,
+  TILEGX_OPC_LD2U_ADD, TILEGX_OPC_LD2U_ADD, TILEGX_OPC_LD2U_ADD, CHILD(638),
+  BITFIELD(33, 2) /* index 638 */,
+  TILEGX_OPC_LD2U_ADD, TILEGX_OPC_LD2U_ADD, TILEGX_OPC_LD2U_ADD, CHILD(643),
+  BITFIELD(35, 2) /* index 643 */,
+  TILEGX_OPC_LD2U_ADD, TILEGX_OPC_LD2U_ADD, TILEGX_OPC_LD2U_ADD,
+  TILEGX_OPC_PREFETCH_ADD_L2,
+  BITFIELD(31, 2) /* index 648 */,
+  CHILD(653), CHILD(653), CHILD(653), CHILD(673),
+  BITFIELD(43, 2) /* index 653 */,
+  CHILD(658), TILEGX_OPC_LD4S_ADD, TILEGX_OPC_LD4S_ADD, TILEGX_OPC_LD4S_ADD,
+  BITFIELD(45, 2) /* index 658 */,
+  CHILD(663), TILEGX_OPC_LD4S_ADD, TILEGX_OPC_LD4S_ADD, TILEGX_OPC_LD4S_ADD,
+  BITFIELD(47, 2) /* index 663 */,
+  CHILD(668), TILEGX_OPC_LD4S_ADD, TILEGX_OPC_LD4S_ADD, TILEGX_OPC_LD4S_ADD,
+  BITFIELD(49, 2) /* index 668 */,
+  TILEGX_OPC_LD4S_TLS, TILEGX_OPC_LD4S_ADD, TILEGX_OPC_LD4S_ADD,
+  TILEGX_OPC_LD4S_ADD,
+  BITFIELD(33, 2) /* index 673 */,
+  CHILD(653), CHILD(653), CHILD(653), CHILD(678),
+  BITFIELD(35, 2) /* index 678 */,
+  CHILD(653), CHILD(653), CHILD(653), CHILD(683),
+  BITFIELD(43, 2) /* index 683 */,
+  CHILD(688), TILEGX_OPC_PREFETCH_ADD_L3_FAULT,
+  TILEGX_OPC_PREFETCH_ADD_L3_FAULT, TILEGX_OPC_PREFETCH_ADD_L3_FAULT,
+  BITFIELD(45, 2) /* index 688 */,
+  CHILD(693), TILEGX_OPC_PREFETCH_ADD_L3_FAULT,
+  TILEGX_OPC_PREFETCH_ADD_L3_FAULT, TILEGX_OPC_PREFETCH_ADD_L3_FAULT,
+  BITFIELD(47, 2) /* index 693 */,
+  CHILD(698), TILEGX_OPC_PREFETCH_ADD_L3_FAULT,
+  TILEGX_OPC_PREFETCH_ADD_L3_FAULT, TILEGX_OPC_PREFETCH_ADD_L3_FAULT,
+  BITFIELD(49, 2) /* index 698 */,
+  TILEGX_OPC_LD4S_TLS, TILEGX_OPC_PREFETCH_ADD_L3_FAULT,
+  TILEGX_OPC_PREFETCH_ADD_L3_FAULT, TILEGX_OPC_PREFETCH_ADD_L3_FAULT,
+  BITFIELD(51, 2) /* index 703 */,
+  CHILD(708), TILEGX_OPC_LDNT1S_ADD, TILEGX_OPC_LDNT1U_ADD,
+  TILEGX_OPC_LDNT2S_ADD,
+  BITFIELD(31, 2) /* index 708 */,
+  TILEGX_OPC_LD4U_ADD, TILEGX_OPC_LD4U_ADD, TILEGX_OPC_LD4U_ADD, CHILD(713),
+  BITFIELD(33, 2) /* index 713 */,
+  TILEGX_OPC_LD4U_ADD, TILEGX_OPC_LD4U_ADD, TILEGX_OPC_LD4U_ADD, CHILD(718),
+  BITFIELD(35, 2) /* index 718 */,
+  TILEGX_OPC_LD4U_ADD, TILEGX_OPC_LD4U_ADD, TILEGX_OPC_LD4U_ADD,
+  TILEGX_OPC_PREFETCH_ADD_L3,
+  BITFIELD(51, 2) /* index 723 */,
+  TILEGX_OPC_LDNT2U_ADD, TILEGX_OPC_LDNT4S_ADD, TILEGX_OPC_LDNT4U_ADD,
+  TILEGX_OPC_LDNT_ADD,
+  BITFIELD(51, 2) /* index 728 */,
+  CHILD(733), TILEGX_OPC_LDNA_ADD, TILEGX_OPC_MFSPR, TILEGX_OPC_MTSPR,
+  BITFIELD(43, 2) /* index 733 */,
+  CHILD(738), TILEGX_OPC_LD_ADD, TILEGX_OPC_LD_ADD, TILEGX_OPC_LD_ADD,
+  BITFIELD(45, 2) /* index 738 */,
+  CHILD(743), TILEGX_OPC_LD_ADD, TILEGX_OPC_LD_ADD, TILEGX_OPC_LD_ADD,
+  BITFIELD(47, 2) /* index 743 */,
+  CHILD(748), TILEGX_OPC_LD_ADD, TILEGX_OPC_LD_ADD, TILEGX_OPC_LD_ADD,
+  BITFIELD(49, 2) /* index 748 */,
+  TILEGX_OPC_LD_TLS, TILEGX_OPC_LD_ADD, TILEGX_OPC_LD_ADD, TILEGX_OPC_LD_ADD,
+  BITFIELD(51, 2) /* index 753 */,
+  TILEGX_OPC_ORI, TILEGX_OPC_ST1_ADD, TILEGX_OPC_ST2_ADD, TILEGX_OPC_ST4_ADD,
+  BITFIELD(51, 2) /* index 758 */,
+  TILEGX_OPC_STNT1_ADD, TILEGX_OPC_STNT2_ADD, TILEGX_OPC_STNT4_ADD,
+  TILEGX_OPC_STNT_ADD,
+  BITFIELD(51, 2) /* index 763 */,
+  TILEGX_OPC_ST_ADD, TILEGX_OPC_V1ADDI, TILEGX_OPC_V1CMPEQI,
+  TILEGX_OPC_V1CMPLTSI,
+  BITFIELD(51, 2) /* index 768 */,
+  TILEGX_OPC_V1CMPLTUI, TILEGX_OPC_V1MAXUI, TILEGX_OPC_V1MINUI,
+  TILEGX_OPC_V2ADDI,
+  BITFIELD(51, 2) /* index 773 */,
+  TILEGX_OPC_V2CMPEQI, TILEGX_OPC_V2CMPLTSI, TILEGX_OPC_V2CMPLTUI,
+  TILEGX_OPC_V2MAXSI,
+  BITFIELD(51, 2) /* index 778 */,
+  TILEGX_OPC_V2MINSI, TILEGX_OPC_XORI, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  BITFIELD(49, 4) /* index 783 */,
+  TILEGX_OPC_NONE, TILEGX_OPC_ADDXSC, TILEGX_OPC_ADDX, TILEGX_OPC_ADD,
+  TILEGX_OPC_AND, TILEGX_OPC_CMPEQ, TILEGX_OPC_CMPEXCH4, TILEGX_OPC_CMPEXCH,
+  TILEGX_OPC_CMPLES, TILEGX_OPC_CMPLEU, TILEGX_OPC_CMPLTS, TILEGX_OPC_CMPLTU,
+  TILEGX_OPC_CMPNE, TILEGX_OPC_DBLALIGN2, TILEGX_OPC_DBLALIGN4,
+  TILEGX_OPC_DBLALIGN6,
+  BITFIELD(49, 4) /* index 800 */,
+  TILEGX_OPC_EXCH4, TILEGX_OPC_EXCH, TILEGX_OPC_FETCHADD4,
+  TILEGX_OPC_FETCHADDGEZ4, TILEGX_OPC_FETCHADDGEZ, TILEGX_OPC_FETCHADD,
+  TILEGX_OPC_FETCHAND4, TILEGX_OPC_FETCHAND, TILEGX_OPC_FETCHOR4,
+  TILEGX_OPC_FETCHOR, TILEGX_OPC_MNZ, TILEGX_OPC_MZ, TILEGX_OPC_NOR,
+  CHILD(817), TILEGX_OPC_ROTL, TILEGX_OPC_SHL1ADDX,
+  BITFIELD(43, 2) /* index 817 */,
+  TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(822),
+  BITFIELD(45, 2) /* index 822 */,
+  TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(827),
+  BITFIELD(47, 2) /* index 827 */,
+  TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_MOVE,
+  BITFIELD(49, 4) /* index 832 */,
+  TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL2ADDX, TILEGX_OPC_SHL2ADD,
+  TILEGX_OPC_SHL3ADDX, TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHLX, TILEGX_OPC_SHL,
+  TILEGX_OPC_SHRS, TILEGX_OPC_SHRUX, TILEGX_OPC_SHRU, TILEGX_OPC_ST1,
+  TILEGX_OPC_ST2, TILEGX_OPC_ST4, TILEGX_OPC_STNT1, TILEGX_OPC_STNT2,
+  TILEGX_OPC_STNT4,
+  BITFIELD(46, 7) /* index 849 */,
+  TILEGX_OPC_STNT, TILEGX_OPC_STNT, TILEGX_OPC_STNT, TILEGX_OPC_STNT,
+  TILEGX_OPC_STNT, TILEGX_OPC_STNT, TILEGX_OPC_STNT, TILEGX_OPC_STNT,
+  TILEGX_OPC_ST, TILEGX_OPC_ST, TILEGX_OPC_ST, TILEGX_OPC_ST, TILEGX_OPC_ST,
+  TILEGX_OPC_ST, TILEGX_OPC_ST, TILEGX_OPC_ST, TILEGX_OPC_SUBXSC,
+  TILEGX_OPC_SUBXSC, TILEGX_OPC_SUBXSC, TILEGX_OPC_SUBXSC, TILEGX_OPC_SUBXSC,
+  TILEGX_OPC_SUBXSC, TILEGX_OPC_SUBXSC, TILEGX_OPC_SUBXSC, TILEGX_OPC_SUBX,
+  TILEGX_OPC_SUBX, TILEGX_OPC_SUBX, TILEGX_OPC_SUBX, TILEGX_OPC_SUBX,
+  TILEGX_OPC_SUBX, TILEGX_OPC_SUBX, TILEGX_OPC_SUBX, TILEGX_OPC_SUB,
+  TILEGX_OPC_SUB, TILEGX_OPC_SUB, TILEGX_OPC_SUB, TILEGX_OPC_SUB,
+  TILEGX_OPC_SUB, TILEGX_OPC_SUB, TILEGX_OPC_SUB, CHILD(978), CHILD(987),
+  CHILD(1066), CHILD(1150), CHILD(1159), TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_V1ADDUC, TILEGX_OPC_V1ADDUC, TILEGX_OPC_V1ADDUC,
+  TILEGX_OPC_V1ADDUC, TILEGX_OPC_V1ADDUC, TILEGX_OPC_V1ADDUC,
+  TILEGX_OPC_V1ADDUC, TILEGX_OPC_V1ADDUC, TILEGX_OPC_V1ADD, TILEGX_OPC_V1ADD,
+  TILEGX_OPC_V1ADD, TILEGX_OPC_V1ADD, TILEGX_OPC_V1ADD, TILEGX_OPC_V1ADD,
+  TILEGX_OPC_V1ADD, TILEGX_OPC_V1ADD, TILEGX_OPC_V1CMPEQ, TILEGX_OPC_V1CMPEQ,
+  TILEGX_OPC_V1CMPEQ, TILEGX_OPC_V1CMPEQ, TILEGX_OPC_V1CMPEQ,
+  TILEGX_OPC_V1CMPEQ, TILEGX_OPC_V1CMPEQ, TILEGX_OPC_V1CMPEQ,
+  TILEGX_OPC_V1CMPLES, TILEGX_OPC_V1CMPLES, TILEGX_OPC_V1CMPLES,
+  TILEGX_OPC_V1CMPLES, TILEGX_OPC_V1CMPLES, TILEGX_OPC_V1CMPLES,
+  TILEGX_OPC_V1CMPLES, TILEGX_OPC_V1CMPLES, TILEGX_OPC_V1CMPLEU,
+  TILEGX_OPC_V1CMPLEU, TILEGX_OPC_V1CMPLEU, TILEGX_OPC_V1CMPLEU,
+  TILEGX_OPC_V1CMPLEU, TILEGX_OPC_V1CMPLEU, TILEGX_OPC_V1CMPLEU,
+  TILEGX_OPC_V1CMPLEU, TILEGX_OPC_V1CMPLTS, TILEGX_OPC_V1CMPLTS,
+  TILEGX_OPC_V1CMPLTS, TILEGX_OPC_V1CMPLTS, TILEGX_OPC_V1CMPLTS,
+  TILEGX_OPC_V1CMPLTS, TILEGX_OPC_V1CMPLTS, TILEGX_OPC_V1CMPLTS,
+  TILEGX_OPC_V1CMPLTU, TILEGX_OPC_V1CMPLTU, TILEGX_OPC_V1CMPLTU,
+  TILEGX_OPC_V1CMPLTU, TILEGX_OPC_V1CMPLTU, TILEGX_OPC_V1CMPLTU,
+  TILEGX_OPC_V1CMPLTU, TILEGX_OPC_V1CMPLTU, TILEGX_OPC_V1CMPNE,
+  TILEGX_OPC_V1CMPNE, TILEGX_OPC_V1CMPNE, TILEGX_OPC_V1CMPNE,
+  TILEGX_OPC_V1CMPNE, TILEGX_OPC_V1CMPNE, TILEGX_OPC_V1CMPNE,
+  TILEGX_OPC_V1CMPNE, TILEGX_OPC_V1INT_H, TILEGX_OPC_V1INT_H,
+  TILEGX_OPC_V1INT_H, TILEGX_OPC_V1INT_H, TILEGX_OPC_V1INT_H,
+  TILEGX_OPC_V1INT_H, TILEGX_OPC_V1INT_H, TILEGX_OPC_V1INT_H,
+  TILEGX_OPC_V1INT_L, TILEGX_OPC_V1INT_L, TILEGX_OPC_V1INT_L,
+  TILEGX_OPC_V1INT_L, TILEGX_OPC_V1INT_L, TILEGX_OPC_V1INT_L,
+  TILEGX_OPC_V1INT_L, TILEGX_OPC_V1INT_L,
+  BITFIELD(43, 3) /* index 978 */,
+  TILEGX_OPC_NONE, TILEGX_OPC_DRAIN, TILEGX_OPC_DTLBPR, TILEGX_OPC_FINV,
+  TILEGX_OPC_FLUSHWB, TILEGX_OPC_FLUSH, TILEGX_OPC_FNOP, TILEGX_OPC_ICOH,
+  BITFIELD(43, 3) /* index 987 */,
+  CHILD(996), TILEGX_OPC_INV, TILEGX_OPC_IRET, TILEGX_OPC_JALRP,
+  TILEGX_OPC_JALR, TILEGX_OPC_JRP, TILEGX_OPC_JR, CHILD(1051),
+  BITFIELD(31, 2) /* index 996 */,
+  CHILD(1001), CHILD(1026), TILEGX_OPC_ILL, TILEGX_OPC_ILL,
+  BITFIELD(33, 2) /* index 1001 */,
+  TILEGX_OPC_ILL, TILEGX_OPC_ILL, TILEGX_OPC_ILL, CHILD(1006),
+  BITFIELD(35, 2) /* index 1006 */,
+  TILEGX_OPC_ILL, CHILD(1011), TILEGX_OPC_ILL, TILEGX_OPC_ILL,
+  BITFIELD(37, 2) /* index 1011 */,
+  TILEGX_OPC_ILL, CHILD(1016), TILEGX_OPC_ILL, TILEGX_OPC_ILL,
+  BITFIELD(39, 2) /* index 1016 */,
+  TILEGX_OPC_ILL, CHILD(1021), TILEGX_OPC_ILL, TILEGX_OPC_ILL,
+  BITFIELD(41, 2) /* index 1021 */,
+  TILEGX_OPC_ILL, TILEGX_OPC_ILL, TILEGX_OPC_BPT, TILEGX_OPC_ILL,
+  BITFIELD(33, 2) /* index 1026 */,
+  TILEGX_OPC_ILL, TILEGX_OPC_ILL, TILEGX_OPC_ILL, CHILD(1031),
+  BITFIELD(35, 2) /* index 1031 */,
+  TILEGX_OPC_ILL, CHILD(1036), TILEGX_OPC_ILL, TILEGX_OPC_ILL,
+  BITFIELD(37, 2) /* index 1036 */,
+  TILEGX_OPC_ILL, CHILD(1041), TILEGX_OPC_ILL, TILEGX_OPC_ILL,
+  BITFIELD(39, 2) /* index 1041 */,
+  TILEGX_OPC_ILL, CHILD(1046), TILEGX_OPC_ILL, TILEGX_OPC_ILL,
+  BITFIELD(41, 2) /* index 1046 */,
+  TILEGX_OPC_ILL, TILEGX_OPC_ILL, TILEGX_OPC_RAISE, TILEGX_OPC_ILL,
+  BITFIELD(31, 2) /* index 1051 */,
+  TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, CHILD(1056),
+  BITFIELD(33, 2) /* index 1056 */,
+  TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, CHILD(1061),
+  BITFIELD(35, 2) /* index 1061 */,
+  TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, TILEGX_OPC_LD1S,
+  TILEGX_OPC_PREFETCH_L1_FAULT,
+  BITFIELD(43, 3) /* index 1066 */,
+  CHILD(1075), CHILD(1090), CHILD(1105), CHILD(1120), CHILD(1135),
+  TILEGX_OPC_LDNA, TILEGX_OPC_LDNT1S, TILEGX_OPC_LDNT1U,
+  BITFIELD(31, 2) /* index 1075 */,
+  TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, CHILD(1080),
+  BITFIELD(33, 2) /* index 1080 */,
+  TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, CHILD(1085),
+  BITFIELD(35, 2) /* index 1085 */,
+  TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, TILEGX_OPC_PREFETCH,
+  BITFIELD(31, 2) /* index 1090 */,
+  TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, CHILD(1095),
+  BITFIELD(33, 2) /* index 1095 */,
+  TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, CHILD(1100),
+  BITFIELD(35, 2) /* index 1100 */,
+  TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, TILEGX_OPC_LD2S,
+  TILEGX_OPC_PREFETCH_L2_FAULT,
+  BITFIELD(31, 2) /* index 1105 */,
+  TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, CHILD(1110),
+  BITFIELD(33, 2) /* index 1110 */,
+  TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, CHILD(1115),
+  BITFIELD(35, 2) /* index 1115 */,
+  TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, TILEGX_OPC_PREFETCH_L2,
+  BITFIELD(31, 2) /* index 1120 */,
+  TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, CHILD(1125),
+  BITFIELD(33, 2) /* index 1125 */,
+  TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, CHILD(1130),
+  BITFIELD(35, 2) /* index 1130 */,
+  TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, TILEGX_OPC_LD4S,
+  TILEGX_OPC_PREFETCH_L3_FAULT,
+  BITFIELD(31, 2) /* index 1135 */,
+  TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, CHILD(1140),
+  BITFIELD(33, 2) /* index 1140 */,
+  TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, CHILD(1145),
+  BITFIELD(35, 2) /* index 1145 */,
+  TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, TILEGX_OPC_PREFETCH_L3,
+  BITFIELD(43, 3) /* index 1150 */,
+  TILEGX_OPC_LDNT2S, TILEGX_OPC_LDNT2U, TILEGX_OPC_LDNT4S, TILEGX_OPC_LDNT4U,
+  TILEGX_OPC_LDNT, TILEGX_OPC_LD, TILEGX_OPC_LNK, TILEGX_OPC_MF,
+  BITFIELD(43, 3) /* index 1159 */,
+  TILEGX_OPC_NAP, TILEGX_OPC_NOP, TILEGX_OPC_SWINT0, TILEGX_OPC_SWINT1,
+  TILEGX_OPC_SWINT2, TILEGX_OPC_SWINT3, TILEGX_OPC_WH64, TILEGX_OPC_NONE,
+  BITFIELD(49, 4) /* index 1168 */,
+  TILEGX_OPC_V1MAXU, TILEGX_OPC_V1MINU, TILEGX_OPC_V1MNZ, TILEGX_OPC_V1MZ,
+  TILEGX_OPC_V1SHL, TILEGX_OPC_V1SHRS, TILEGX_OPC_V1SHRU, TILEGX_OPC_V1SUBUC,
+  TILEGX_OPC_V1SUB, TILEGX_OPC_V2ADDSC, TILEGX_OPC_V2ADD, TILEGX_OPC_V2CMPEQ,
+  TILEGX_OPC_V2CMPLES, TILEGX_OPC_V2CMPLEU, TILEGX_OPC_V2CMPLTS,
+  TILEGX_OPC_V2CMPLTU,
+  BITFIELD(49, 4) /* index 1185 */,
+  TILEGX_OPC_V2CMPNE, TILEGX_OPC_V2INT_H, TILEGX_OPC_V2INT_L,
+  TILEGX_OPC_V2MAXS, TILEGX_OPC_V2MINS, TILEGX_OPC_V2MNZ, TILEGX_OPC_V2MZ,
+  TILEGX_OPC_V2PACKH, TILEGX_OPC_V2PACKL, TILEGX_OPC_V2PACKUC,
+  TILEGX_OPC_V2SHLSC, TILEGX_OPC_V2SHL, TILEGX_OPC_V2SHRS, TILEGX_OPC_V2SHRU,
+  TILEGX_OPC_V2SUBSC, TILEGX_OPC_V2SUB,
+  BITFIELD(49, 4) /* index 1202 */,
+  TILEGX_OPC_V4ADDSC, TILEGX_OPC_V4ADD, TILEGX_OPC_V4INT_H,
+  TILEGX_OPC_V4INT_L, TILEGX_OPC_V4PACKSC, TILEGX_OPC_V4SHLSC,
+  TILEGX_OPC_V4SHL, TILEGX_OPC_V4SHRS, TILEGX_OPC_V4SHRU, TILEGX_OPC_V4SUBSC,
+  TILEGX_OPC_V4SUB, TILEGX_OPC_XOR, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  BITFIELD(49, 4) /* index 1219 */,
+  TILEGX_OPC_NONE, TILEGX_OPC_ROTLI, TILEGX_OPC_SHLI, TILEGX_OPC_SHLXI,
+  TILEGX_OPC_SHRSI, TILEGX_OPC_SHRUI, TILEGX_OPC_SHRUXI, TILEGX_OPC_V1SHLI,
+  TILEGX_OPC_V1SHRSI, TILEGX_OPC_V1SHRUI, TILEGX_OPC_V2SHLI,
+  TILEGX_OPC_V2SHRSI, TILEGX_OPC_V2SHRUI, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE,
+  BITFIELD(31, 2) /* index 1236 */,
+  TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI,
+  CHILD(1241),
+  BITFIELD(33, 2) /* index 1241 */,
+  TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI,
+  CHILD(1246),
+  BITFIELD(35, 2) /* index 1246 */,
+  TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI,
+  CHILD(1251),
+  BITFIELD(37, 2) /* index 1251 */,
+  TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI,
+  CHILD(1256),
+  BITFIELD(39, 2) /* index 1256 */,
+  TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI,
+  CHILD(1261),
+  BITFIELD(41, 2) /* index 1261 */,
+  TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI,
+  TILEGX_OPC_INFOL,
+};
+
+static const unsigned short decode_Y0_fsm[178] =
+{
+  BITFIELD(27, 4) /* index 0 */,
+  CHILD(17), TILEGX_OPC_ADDXI, CHILD(32), TILEGX_OPC_CMPEQI,
+  TILEGX_OPC_CMPLTSI, CHILD(62), CHILD(67), CHILD(118), CHILD(123),
+  CHILD(128), CHILD(133), CHILD(153), CHILD(158), CHILD(163), CHILD(168),
+  CHILD(173),
+  BITFIELD(6, 2) /* index 17 */,
+  TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(22),
+  BITFIELD(8, 2) /* index 22 */,
+  TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(27),
+  BITFIELD(10, 2) /* index 27 */,
+  TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_MOVEI,
+  BITFIELD(0, 2) /* index 32 */,
+  TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(37),
+  BITFIELD(2, 2) /* index 37 */,
+  TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(42),
+  BITFIELD(4, 2) /* index 42 */,
+  TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(47),
+  BITFIELD(6, 2) /* index 47 */,
+  TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(52),
+  BITFIELD(8, 2) /* index 52 */,
+  TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(57),
+  BITFIELD(10, 2) /* index 57 */,
+  TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_INFO,
+  BITFIELD(18, 2) /* index 62 */,
+  TILEGX_OPC_ADDX, TILEGX_OPC_ADD, TILEGX_OPC_SUBX, TILEGX_OPC_SUB,
+  BITFIELD(15, 5) /* index 67 */,
+  TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL1ADD,
+  TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL1ADD,
+  TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL2ADD,
+  TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL2ADD,
+  TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL2ADD,
+  TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHL3ADD,
+  TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHL3ADD,
+  TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHL3ADD, CHILD(100),
+  CHILD(109), TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  BITFIELD(12, 3) /* index 100 */,
+  TILEGX_OPC_NONE, TILEGX_OPC_CLZ, TILEGX_OPC_CTZ, TILEGX_OPC_FNOP,
+  TILEGX_OPC_FSINGLE_PACK1, TILEGX_OPC_NOP, TILEGX_OPC_PCNT,
+  TILEGX_OPC_REVBITS,
+  BITFIELD(12, 3) /* index 109 */,
+  TILEGX_OPC_REVBYTES, TILEGX_OPC_TBLIDXB0, TILEGX_OPC_TBLIDXB1,
+  TILEGX_OPC_TBLIDXB2, TILEGX_OPC_TBLIDXB3, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE,
+  BITFIELD(18, 2) /* index 118 */,
+  TILEGX_OPC_CMPLES, TILEGX_OPC_CMPLEU, TILEGX_OPC_CMPLTS, TILEGX_OPC_CMPLTU,
+  BITFIELD(18, 2) /* index 123 */,
+  TILEGX_OPC_CMPEQ, TILEGX_OPC_CMPNE, TILEGX_OPC_MULAX, TILEGX_OPC_MULX,
+  BITFIELD(18, 2) /* index 128 */,
+  TILEGX_OPC_CMOVEQZ, TILEGX_OPC_CMOVNEZ, TILEGX_OPC_MNZ, TILEGX_OPC_MZ,
+  BITFIELD(18, 2) /* index 133 */,
+  TILEGX_OPC_AND, TILEGX_OPC_NOR, CHILD(138), TILEGX_OPC_XOR,
+  BITFIELD(12, 2) /* index 138 */,
+  TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(143),
+  BITFIELD(14, 2) /* index 143 */,
+  TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(148),
+  BITFIELD(16, 2) /* index 148 */,
+  TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_MOVE,
+  BITFIELD(18, 2) /* index 153 */,
+  TILEGX_OPC_ROTL, TILEGX_OPC_SHL, TILEGX_OPC_SHRS, TILEGX_OPC_SHRU,
+  BITFIELD(18, 2) /* index 158 */,
+  TILEGX_OPC_NONE, TILEGX_OPC_SHL1ADDX, TILEGX_OPC_SHL2ADDX,
+  TILEGX_OPC_SHL3ADDX,
+  BITFIELD(18, 2) /* index 163 */,
+  TILEGX_OPC_MUL_HS_HS, TILEGX_OPC_MUL_HU_HU, TILEGX_OPC_MUL_LS_LS,
+  TILEGX_OPC_MUL_LU_LU,
+  BITFIELD(18, 2) /* index 168 */,
+  TILEGX_OPC_MULA_HS_HS, TILEGX_OPC_MULA_HU_HU, TILEGX_OPC_MULA_LS_LS,
+  TILEGX_OPC_MULA_LU_LU,
+  BITFIELD(18, 2) /* index 173 */,
+  TILEGX_OPC_ROTLI, TILEGX_OPC_SHLI, TILEGX_OPC_SHRSI, TILEGX_OPC_SHRUI,
+};
+
+static const unsigned short decode_Y1_fsm[167] =
+{
+  BITFIELD(58, 4) /* index 0 */,
+  TILEGX_OPC_NONE, CHILD(17), TILEGX_OPC_ADDXI, CHILD(32), TILEGX_OPC_CMPEQI,
+  TILEGX_OPC_CMPLTSI, CHILD(62), CHILD(67), CHILD(117), CHILD(122),
+  CHILD(127), CHILD(132), CHILD(152), CHILD(157), CHILD(162), TILEGX_OPC_NONE,
+  BITFIELD(37, 2) /* index 17 */,
+  TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(22),
+  BITFIELD(39, 2) /* index 22 */,
+  TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(27),
+  BITFIELD(41, 2) /* index 27 */,
+  TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_MOVEI,
+  BITFIELD(31, 2) /* index 32 */,
+  TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(37),
+  BITFIELD(33, 2) /* index 37 */,
+  TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(42),
+  BITFIELD(35, 2) /* index 42 */,
+  TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(47),
+  BITFIELD(37, 2) /* index 47 */,
+  TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(52),
+  BITFIELD(39, 2) /* index 52 */,
+  TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(57),
+  BITFIELD(41, 2) /* index 57 */,
+  TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_INFO,
+  BITFIELD(49, 2) /* index 62 */,
+  TILEGX_OPC_ADDX, TILEGX_OPC_ADD, TILEGX_OPC_SUBX, TILEGX_OPC_SUB,
+  BITFIELD(47, 4) /* index 67 */,
+  TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL1ADD,
+  TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL2ADD,
+  TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL3ADD,
+  TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHL3ADD, CHILD(84),
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  BITFIELD(43, 3) /* index 84 */,
+  CHILD(93), CHILD(96), CHILD(99), CHILD(102), CHILD(105), CHILD(108),
+  CHILD(111), CHILD(114),
+  BITFIELD(46, 1) /* index 93 */,
+  TILEGX_OPC_NONE, TILEGX_OPC_FNOP,
+  BITFIELD(46, 1) /* index 96 */,
+  TILEGX_OPC_NONE, TILEGX_OPC_ILL,
+  BITFIELD(46, 1) /* index 99 */,
+  TILEGX_OPC_NONE, TILEGX_OPC_JALRP,
+  BITFIELD(46, 1) /* index 102 */,
+  TILEGX_OPC_NONE, TILEGX_OPC_JALR,
+  BITFIELD(46, 1) /* index 105 */,
+  TILEGX_OPC_NONE, TILEGX_OPC_JRP,
+  BITFIELD(46, 1) /* index 108 */,
+  TILEGX_OPC_NONE, TILEGX_OPC_JR,
+  BITFIELD(46, 1) /* index 111 */,
+  TILEGX_OPC_NONE, TILEGX_OPC_LNK,
+  BITFIELD(46, 1) /* index 114 */,
+  TILEGX_OPC_NONE, TILEGX_OPC_NOP,
+  BITFIELD(49, 2) /* index 117 */,
+  TILEGX_OPC_CMPLES, TILEGX_OPC_CMPLEU, TILEGX_OPC_CMPLTS, TILEGX_OPC_CMPLTU,
+  BITFIELD(49, 2) /* index 122 */,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_CMPEQ, TILEGX_OPC_CMPNE,
+  BITFIELD(49, 2) /* index 127 */,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_MNZ, TILEGX_OPC_MZ,
+  BITFIELD(49, 2) /* index 132 */,
+  TILEGX_OPC_AND, TILEGX_OPC_NOR, CHILD(137), TILEGX_OPC_XOR,
+  BITFIELD(43, 2) /* index 137 */,
+  TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(142),
+  BITFIELD(45, 2) /* index 142 */,
+  TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(147),
+  BITFIELD(47, 2) /* index 147 */,
+  TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_MOVE,
+  BITFIELD(49, 2) /* index 152 */,
+  TILEGX_OPC_ROTL, TILEGX_OPC_SHL, TILEGX_OPC_SHRS, TILEGX_OPC_SHRU,
+  BITFIELD(49, 2) /* index 157 */,
+  TILEGX_OPC_NONE, TILEGX_OPC_SHL1ADDX, TILEGX_OPC_SHL2ADDX,
+  TILEGX_OPC_SHL3ADDX,
+  BITFIELD(49, 2) /* index 162 */,
+  TILEGX_OPC_ROTLI, TILEGX_OPC_SHLI, TILEGX_OPC_SHRSI, TILEGX_OPC_SHRUI,
+};
+
+static const unsigned short decode_Y2_fsm[118] =
+{
+  BITFIELD(62, 2) /* index 0 */,
+  TILEGX_OPC_NONE, CHILD(5), CHILD(66), CHILD(109),
+  BITFIELD(55, 3) /* index 5 */,
+  CHILD(14), CHILD(14), CHILD(14), CHILD(17), CHILD(40), CHILD(40), CHILD(40),
+  CHILD(43),
+  BITFIELD(26, 1) /* index 14 */,
+  TILEGX_OPC_LD1S, TILEGX_OPC_LD1U,
+  BITFIELD(26, 1) /* index 17 */,
+  CHILD(20), CHILD(30),
+  BITFIELD(51, 2) /* index 20 */,
+  TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, CHILD(25),
+  BITFIELD(53, 2) /* index 25 */,
+  TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, TILEGX_OPC_LD1S,
+  TILEGX_OPC_PREFETCH_L1_FAULT,
+  BITFIELD(51, 2) /* index 30 */,
+  TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, CHILD(35),
+  BITFIELD(53, 2) /* index 35 */,
+  TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, TILEGX_OPC_PREFETCH,
+  BITFIELD(26, 1) /* index 40 */,
+  TILEGX_OPC_LD2S, TILEGX_OPC_LD2U,
+  BITFIELD(26, 1) /* index 43 */,
+  CHILD(46), CHILD(56),
+  BITFIELD(51, 2) /* index 46 */,
+  TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, CHILD(51),
+  BITFIELD(53, 2) /* index 51 */,
+  TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, TILEGX_OPC_LD2S,
+  TILEGX_OPC_PREFETCH_L2_FAULT,
+  BITFIELD(51, 2) /* index 56 */,
+  TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, CHILD(61),
+  BITFIELD(53, 2) /* index 61 */,
+  TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, TILEGX_OPC_PREFETCH_L2,
+  BITFIELD(56, 2) /* index 66 */,
+  CHILD(71), CHILD(74), CHILD(90), CHILD(93),
+  BITFIELD(26, 1) /* index 71 */,
+  TILEGX_OPC_NONE, TILEGX_OPC_LD4S,
+  BITFIELD(26, 1) /* index 74 */,
+  TILEGX_OPC_NONE, CHILD(77),
+  BITFIELD(51, 2) /* index 77 */,
+  TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, CHILD(82),
+  BITFIELD(53, 2) /* index 82 */,
+  TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, CHILD(87),
+  BITFIELD(55, 1) /* index 87 */,
+  TILEGX_OPC_LD4S, TILEGX_OPC_PREFETCH_L3_FAULT,
+  BITFIELD(26, 1) /* index 90 */,
+  TILEGX_OPC_LD4U, TILEGX_OPC_LD,
+  BITFIELD(26, 1) /* index 93 */,
+  CHILD(96), TILEGX_OPC_LD,
+  BITFIELD(51, 2) /* index 96 */,
+  TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, CHILD(101),
+  BITFIELD(53, 2) /* index 101 */,
+  TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, CHILD(106),
+  BITFIELD(55, 1) /* index 106 */,
+  TILEGX_OPC_LD4U, TILEGX_OPC_PREFETCH_L3,
+  BITFIELD(26, 1) /* index 109 */,
+  CHILD(112), CHILD(115),
+  BITFIELD(57, 1) /* index 112 */,
+  TILEGX_OPC_ST1, TILEGX_OPC_ST4,
+  BITFIELD(57, 1) /* index 115 */,
+  TILEGX_OPC_ST2, TILEGX_OPC_ST,
+};
+
+#undef BITFIELD
+#undef CHILD
+
+const unsigned short * const
+tilegx_bundle_decoder_fsms[TILEGX_NUM_PIPELINE_ENCODINGS] =
+{
+  decode_X0_fsm,
+  decode_X1_fsm,
+  decode_Y0_fsm,
+  decode_Y1_fsm,
+  decode_Y2_fsm
+};
+
+const struct tilegx_operand tilegx_operands[35] =
+{
+  {
+    TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_IMM8_X0),
+    8, 1, 0, 0, 0, 0,
+    create_Imm8_X0, get_Imm8_X0
+  },
+  {
+    TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_IMM8_X1),
+    8, 1, 0, 0, 0, 0,
+    create_Imm8_X1, get_Imm8_X1
+  },
+  {
+    TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_IMM8_Y0),
+    8, 1, 0, 0, 0, 0,
+    create_Imm8_Y0, get_Imm8_Y0
+  },
+  {
+    TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_IMM8_Y1),
+    8, 1, 0, 0, 0, 0,
+    create_Imm8_Y1, get_Imm8_Y1
+  },
+  {
+    TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_IMM16_X0_HW0_LAST),
+    16, 1, 0, 0, 0, 0,
+    create_Imm16_X0, get_Imm16_X0
+  },
+  {
+    TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_IMM16_X1_HW0_LAST),
+    16, 1, 0, 0, 0, 0,
+    create_Imm16_X1, get_Imm16_X1
+  },
+  {
+    TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+    6, 0, 0, 1, 0, 0,
+    create_Dest_X1, get_Dest_X1
+  },
+  {
+    TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+    6, 0, 1, 0, 0, 0,
+    create_SrcA_X1, get_SrcA_X1
+  },
+  {
+    TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+    6, 0, 0, 1, 0, 0,
+    create_Dest_X0, get_Dest_X0
+  },
+  {
+    TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+    6, 0, 1, 0, 0, 0,
+    create_SrcA_X0, get_SrcA_X0
+  },
+  {
+    TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+    6, 0, 0, 1, 0, 0,
+    create_Dest_Y0, get_Dest_Y0
+  },
+  {
+    TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+    6, 0, 1, 0, 0, 0,
+    create_SrcA_Y0, get_SrcA_Y0
+  },
+  {
+    TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+    6, 0, 0, 1, 0, 0,
+    create_Dest_Y1, get_Dest_Y1
+  },
+  {
+    TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+    6, 0, 1, 0, 0, 0,
+    create_SrcA_Y1, get_SrcA_Y1
+  },
+  {
+    TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+    6, 0, 1, 0, 0, 0,
+    create_SrcA_Y2, get_SrcA_Y2
+  },
+  {
+    TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+    6, 0, 1, 1, 0, 0,
+    create_SrcA_X1, get_SrcA_X1
+  },
+  {
+    TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+    6, 0, 1, 0, 0, 0,
+    create_SrcB_X0, get_SrcB_X0
+  },
+  {
+    TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+    6, 0, 1, 0, 0, 0,
+    create_SrcB_X1, get_SrcB_X1
+  },
+  {
+    TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+    6, 0, 1, 0, 0, 0,
+    create_SrcB_Y0, get_SrcB_Y0
+  },
+  {
+    TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+    6, 0, 1, 0, 0, 0,
+    create_SrcB_Y1, get_SrcB_Y1
+  },
+  {
+    TILEGX_OP_TYPE_ADDRESS, BFD_RELOC(TILEGX_BROFF_X1),
+    17, 1, 0, 0, 1, TILEGX_LOG2_BUNDLE_ALIGNMENT_IN_BYTES,
+    create_BrOff_X1, get_BrOff_X1
+  },
+  {
+    TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_MMSTART_X0),
+    6, 0, 0, 0, 0, 0,
+    create_BFStart_X0, get_BFStart_X0
+  },
+  {
+    TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_MMEND_X0),
+    6, 0, 0, 0, 0, 0,
+    create_BFEnd_X0, get_BFEnd_X0
+  },
+  {
+    TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+    6, 0, 1, 1, 0, 0,
+    create_Dest_X0, get_Dest_X0
+  },
+  {
+    TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+    6, 0, 1, 1, 0, 0,
+    create_Dest_Y0, get_Dest_Y0
+  },
+  {
+    TILEGX_OP_TYPE_ADDRESS, BFD_RELOC(TILEGX_JUMPOFF_X1),
+    27, 1, 0, 0, 1, TILEGX_LOG2_BUNDLE_ALIGNMENT_IN_BYTES,
+    create_JumpOff_X1, get_JumpOff_X1
+  },
+  {
+    TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+    6, 0, 0, 1, 0, 0,
+    create_SrcBDest_Y2, get_SrcBDest_Y2
+  },
+  {
+    TILEGX_OP_TYPE_SPR, BFD_RELOC(TILEGX_MF_IMM14_X1),
+    14, 0, 0, 0, 0, 0,
+    create_MF_Imm14_X1, get_MF_Imm14_X1
+  },
+  {
+    TILEGX_OP_TYPE_SPR, BFD_RELOC(TILEGX_MT_IMM14_X1),
+    14, 0, 0, 0, 0, 0,
+    create_MT_Imm14_X1, get_MT_Imm14_X1
+  },
+  {
+    TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_SHAMT_X0),
+    6, 0, 0, 0, 0, 0,
+    create_ShAmt_X0, get_ShAmt_X0
+  },
+  {
+    TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_SHAMT_X1),
+    6, 0, 0, 0, 0, 0,
+    create_ShAmt_X1, get_ShAmt_X1
+  },
+  {
+    TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_SHAMT_Y0),
+    6, 0, 0, 0, 0, 0,
+    create_ShAmt_Y0, get_ShAmt_Y0
+  },
+  {
+    TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_SHAMT_Y1),
+    6, 0, 0, 0, 0, 0,
+    create_ShAmt_Y1, get_ShAmt_Y1
+  },
+  {
+    TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+    6, 0, 1, 0, 0, 0,
+    create_SrcBDest_Y2, get_SrcBDest_Y2
+  },
+  {
+    TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_DEST_IMM8_X1),
+    8, 1, 0, 0, 0, 0,
+    create_Dest_Imm8_X1, get_Dest_Imm8_X1
+  }
+};
+
+/* Given a set of bundle bits and a specific pipe, returns which
+ * instruction the bundle contains in that pipe.
+ */
+const struct tilegx_opcode *
+find_opcode(tilegx_bundle_bits bits, tilegx_pipeline pipe)
+{
+  const unsigned short *table = tilegx_bundle_decoder_fsms[pipe];
+  int index = 0;
+
+  while (1)
+  {
+    unsigned short bitspec = table[index];
+    unsigned int bitfield =
+      ((unsigned int)(bits >> (bitspec & 63))) & (bitspec >> 6);
+
+    unsigned short next = table[index + 1 + bitfield];
+    if (next <= TILEGX_OPC_NONE)
+      return &tilegx_opcodes[next];
+
+    index = next - TILEGX_OPC_NONE;
+  }
+}
+
+int
+parse_insn_tilegx(tilegx_bundle_bits bits,
+                  unsigned long long pc,
+                  struct tilegx_decoded_instruction
+                  decoded[TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE])
+{
+  int num_instructions = 0;
+  int pipe;
+
+  int min_pipe, max_pipe;
+  if ((bits & TILEGX_BUNDLE_MODE_MASK) == 0)
+  {
+    min_pipe = TILEGX_PIPELINE_X0;
+    max_pipe = TILEGX_PIPELINE_X1;
+  }
+  else
+  {
+    min_pipe = TILEGX_PIPELINE_Y0;
+    max_pipe = TILEGX_PIPELINE_Y2;
+  }
+
+  /* For each pipe, find an instruction that fits. */
+  for (pipe = min_pipe; pipe <= max_pipe; pipe++)
+  {
+    const struct tilegx_opcode *opc;
+    struct tilegx_decoded_instruction *d;
+    int i;
+
+    d = &decoded[num_instructions++];
+    opc = find_opcode (bits, (tilegx_pipeline)pipe);
+    d->opcode = opc;
+
+    /* Decode each operand, sign extending, etc. as appropriate. */
+    for (i = 0; i < opc->num_operands; i++)
+    {
+      const struct tilegx_operand *op =
+        &tilegx_operands[opc->operands[pipe][i]];
+      int raw_opval = op->extract (bits);
+      long long opval;
+
+      if (op->is_signed)
+      {
+        /* Sign-extend the operand. */
+        int shift = (int)((sizeof(int) * 8) - op->num_bits);
+        raw_opval = (raw_opval << shift) >> shift;
+      }
+
+      /* Adjust PC-relative scaled branch offsets. */
+      if (op->type == TILEGX_OP_TYPE_ADDRESS)
+        opval = (raw_opval * TILEGX_BUNDLE_SIZE_IN_BYTES) + pc;
+      else
+        opval = raw_opval;
+
+      /* Record the final value. */
+      d->operands[i] = op;
+      d->operand_values[i] = opval;
+    }
+  }
+
+  return num_instructions;
+}
+
+struct tilegx_spr
+{
+  /* The number */
+  int number;
+
+  /* The name */
+  const char *name;
+};
+
+static int
+tilegx_spr_compare (const void *a_ptr, const void *b_ptr)
+{
+  const struct tilegx_spr *a = (const struct tilegx_spr *) a_ptr;
+  const struct tilegx_spr *b = (const struct tilegx_spr *) b_ptr;
+  return (a->number - b->number);
+}
+
+const struct tilegx_spr tilegx_sprs[] = {
+  { 0, "MPL_MEM_ERROR_SET_0" },
+  { 1, "MPL_MEM_ERROR_SET_1" },
+  { 2, "MPL_MEM_ERROR_SET_2" },
+  { 3, "MPL_MEM_ERROR_SET_3" },
+  { 4, "MPL_MEM_ERROR" },
+  { 5, "MEM_ERROR_CBOX_ADDR" },
+  { 6, "MEM_ERROR_CBOX_STATUS" },
+  { 7, "MEM_ERROR_ENABLE" },
+  { 8, "MEM_ERROR_MBOX_ADDR" },
+  { 9, "MEM_ERROR_MBOX_STATUS" },
+  { 10, "SBOX_ERROR" },
+  { 11, "XDN_DEMUX_ERROR" },
+  { 256, "MPL_SINGLE_STEP_3_SET_0" },
+  { 257, "MPL_SINGLE_STEP_3_SET_1" },
+  { 258, "MPL_SINGLE_STEP_3_SET_2" },
+  { 259, "MPL_SINGLE_STEP_3_SET_3" },
+  { 260, "MPL_SINGLE_STEP_3" },
+  { 261, "SINGLE_STEP_CONTROL_3" },
+  { 512, "MPL_SINGLE_STEP_2_SET_0" },
+  { 513, "MPL_SINGLE_STEP_2_SET_1" },
+  { 514, "MPL_SINGLE_STEP_2_SET_2" },
+  { 515, "MPL_SINGLE_STEP_2_SET_3" },
+  { 516, "MPL_SINGLE_STEP_2" },
+  { 517, "SINGLE_STEP_CONTROL_2" },
+  { 768, "MPL_SINGLE_STEP_1_SET_0" },
+  { 769, "MPL_SINGLE_STEP_1_SET_1" },
+  { 770, "MPL_SINGLE_STEP_1_SET_2" },
+  { 771, "MPL_SINGLE_STEP_1_SET_3" },
+  { 772, "MPL_SINGLE_STEP_1" },
+  { 773, "SINGLE_STEP_CONTROL_1" },
+  { 1024, "MPL_SINGLE_STEP_0_SET_0" },
+  { 1025, "MPL_SINGLE_STEP_0_SET_1" },
+  { 1026, "MPL_SINGLE_STEP_0_SET_2" },
+  { 1027, "MPL_SINGLE_STEP_0_SET_3" },
+  { 1028, "MPL_SINGLE_STEP_0" },
+  { 1029, "SINGLE_STEP_CONTROL_0" },
+  { 1280, "MPL_IDN_COMPLETE_SET_0" },
+  { 1281, "MPL_IDN_COMPLETE_SET_1" },
+  { 1282, "MPL_IDN_COMPLETE_SET_2" },
+  { 1283, "MPL_IDN_COMPLETE_SET_3" },
+  { 1284, "MPL_IDN_COMPLETE" },
+  { 1285, "IDN_COMPLETE_PENDING" },
+  { 1536, "MPL_UDN_COMPLETE_SET_0" },
+  { 1537, "MPL_UDN_COMPLETE_SET_1" },
+  { 1538, "MPL_UDN_COMPLETE_SET_2" },
+  { 1539, "MPL_UDN_COMPLETE_SET_3" },
+  { 1540, "MPL_UDN_COMPLETE" },
+  { 1541, "UDN_COMPLETE_PENDING" },
+  { 1792, "MPL_ITLB_MISS_SET_0" },
+  { 1793, "MPL_ITLB_MISS_SET_1" },
+  { 1794, "MPL_ITLB_MISS_SET_2" },
+  { 1795, "MPL_ITLB_MISS_SET_3" },
+  { 1796, "MPL_ITLB_MISS" },
+  { 1797, "ITLB_TSB_BASE_ADDR_0" },
+  { 1798, "ITLB_TSB_BASE_ADDR_1" },
+  { 1920, "ITLB_CURRENT_ATTR" },
+  { 1921, "ITLB_CURRENT_PA" },
+  { 1922, "ITLB_CURRENT_VA" },
+  { 1923, "ITLB_INDEX" },
+  { 1924, "ITLB_MATCH_0" },
+  { 1925, "ITLB_PERF" },
+  { 1926, "ITLB_PR" },
+  { 1927, "ITLB_TSB_ADDR_0" },
+  { 1928, "ITLB_TSB_ADDR_1" },
+  { 1929, "ITLB_TSB_FILL_CURRENT_ATTR" },
+  { 1930, "ITLB_TSB_FILL_MATCH" },
+  { 1931, "NUMBER_ITLB" },
+  { 1932, "REPLACEMENT_ITLB" },
+  { 1933, "WIRED_ITLB" },
+  { 2048, "MPL_ILL_SET_0" },
+  { 2049, "MPL_ILL_SET_1" },
+  { 2050, "MPL_ILL_SET_2" },
+  { 2051, "MPL_ILL_SET_3" },
+  { 2052, "MPL_ILL" },
+  { 2304, "MPL_GPV_SET_0" },
+  { 2305, "MPL_GPV_SET_1" },
+  { 2306, "MPL_GPV_SET_2" },
+  { 2307, "MPL_GPV_SET_3" },
+  { 2308, "MPL_GPV" },
+  { 2309, "GPV_REASON" },
+  { 2560, "MPL_IDN_ACCESS_SET_0" },
+  { 2561, "MPL_IDN_ACCESS_SET_1" },
+  { 2562, "MPL_IDN_ACCESS_SET_2" },
+  { 2563, "MPL_IDN_ACCESS_SET_3" },
+  { 2564, "MPL_IDN_ACCESS" },
+  { 2565, "IDN_DEMUX_COUNT_0" },
+  { 2566, "IDN_DEMUX_COUNT_1" },
+  { 2567, "IDN_FLUSH_EGRESS" },
+  { 2568, "IDN_PENDING" },
+  { 2569, "IDN_ROUTE_ORDER" },
+  { 2570, "IDN_SP_FIFO_CNT" },
+  { 2688, "IDN_DATA_AVAIL" },
+  { 2816, "MPL_UDN_ACCESS_SET_0" },
+  { 2817, "MPL_UDN_ACCESS_SET_1" },
+  { 2818, "MPL_UDN_ACCESS_SET_2" },
+  { 2819, "MPL_UDN_ACCESS_SET_3" },
+  { 2820, "MPL_UDN_ACCESS" },
+  { 2821, "UDN_DEMUX_COUNT_0" },
+  { 2822, "UDN_DEMUX_COUNT_1" },
+  { 2823, "UDN_DEMUX_COUNT_2" },
+  { 2824, "UDN_DEMUX_COUNT_3" },
+  { 2825, "UDN_FLUSH_EGRESS" },
+  { 2826, "UDN_PENDING" },
+  { 2827, "UDN_ROUTE_ORDER" },
+  { 2828, "UDN_SP_FIFO_CNT" },
+  { 2944, "UDN_DATA_AVAIL" },
+  { 3072, "MPL_SWINT_3_SET_0" },
+  { 3073, "MPL_SWINT_3_SET_1" },
+  { 3074, "MPL_SWINT_3_SET_2" },
+  { 3075, "MPL_SWINT_3_SET_3" },
+  { 3076, "MPL_SWINT_3" },
+  { 3328, "MPL_SWINT_2_SET_0" },
+  { 3329, "MPL_SWINT_2_SET_1" },
+  { 3330, "MPL_SWINT_2_SET_2" },
+  { 3331, "MPL_SWINT_2_SET_3" },
+  { 3332, "MPL_SWINT_2" },
+  { 3584, "MPL_SWINT_1_SET_0" },
+  { 3585, "MPL_SWINT_1_SET_1" },
+  { 3586, "MPL_SWINT_1_SET_2" },
+  { 3587, "MPL_SWINT_1_SET_3" },
+  { 3588, "MPL_SWINT_1" },
+  { 3840, "MPL_SWINT_0_SET_0" },
+  { 3841, "MPL_SWINT_0_SET_1" },
+  { 3842, "MPL_SWINT_0_SET_2" },
+  { 3843, "MPL_SWINT_0_SET_3" },
+  { 3844, "MPL_SWINT_0" },
+  { 4096, "MPL_ILL_TRANS_SET_0" },
+  { 4097, "MPL_ILL_TRANS_SET_1" },
+  { 4098, "MPL_ILL_TRANS_SET_2" },
+  { 4099, "MPL_ILL_TRANS_SET_3" },
+  { 4100, "MPL_ILL_TRANS" },
+  { 4101, "ILL_TRANS_REASON" },
+  { 4102, "ILL_VA_PC" },
+  { 4352, "MPL_UNALIGN_DATA_SET_0" },
+  { 4353, "MPL_UNALIGN_DATA_SET_1" },
+  { 4354, "MPL_UNALIGN_DATA_SET_2" },
+  { 4355, "MPL_UNALIGN_DATA_SET_3" },
+  { 4356, "MPL_UNALIGN_DATA" },
+  { 4608, "MPL_DTLB_MISS_SET_0" },
+  { 4609, "MPL_DTLB_MISS_SET_1" },
+  { 4610, "MPL_DTLB_MISS_SET_2" },
+  { 4611, "MPL_DTLB_MISS_SET_3" },
+  { 4612, "MPL_DTLB_MISS" },
+  { 4613, "DTLB_TSB_BASE_ADDR_0" },
+  { 4614, "DTLB_TSB_BASE_ADDR_1" },
+  { 4736, "AAR" },
+  { 4737, "CACHE_PINNED_WAYS" },
+  { 4738, "DTLB_BAD_ADDR" },
+  { 4739, "DTLB_BAD_ADDR_REASON" },
+  { 4740, "DTLB_CURRENT_ATTR" },
+  { 4741, "DTLB_CURRENT_PA" },
+  { 4742, "DTLB_CURRENT_VA" },
+  { 4743, "DTLB_INDEX" },
+  { 4744, "DTLB_MATCH_0" },
+  { 4745, "DTLB_PERF" },
+  { 4746, "DTLB_TSB_ADDR_0" },
+  { 4747, "DTLB_TSB_ADDR_1" },
+  { 4748, "DTLB_TSB_FILL_CURRENT_ATTR" },
+  { 4749, "DTLB_TSB_FILL_MATCH" },
+  { 4750, "NUMBER_DTLB" },
+  { 4751, "REPLACEMENT_DTLB" },
+  { 4752, "WIRED_DTLB" },
+  { 4864, "MPL_DTLB_ACCESS_SET_0" },
+  { 4865, "MPL_DTLB_ACCESS_SET_1" },
+  { 4866, "MPL_DTLB_ACCESS_SET_2" },
+  { 4867, "MPL_DTLB_ACCESS_SET_3" },
+  { 4868, "MPL_DTLB_ACCESS" },
+  { 5120, "MPL_IDN_FIREWALL_SET_0" },
+  { 5121, "MPL_IDN_FIREWALL_SET_1" },
+  { 5122, "MPL_IDN_FIREWALL_SET_2" },
+  { 5123, "MPL_IDN_FIREWALL_SET_3" },
+  { 5124, "MPL_IDN_FIREWALL" },
+  { 5125, "IDN_DIRECTION_PROTECT" },
+  { 5376, "MPL_UDN_FIREWALL_SET_0" },
+  { 5377, "MPL_UDN_FIREWALL_SET_1" },
+  { 5378, "MPL_UDN_FIREWALL_SET_2" },
+  { 5379, "MPL_UDN_FIREWALL_SET_3" },
+  { 5380, "MPL_UDN_FIREWALL" },
+  { 5381, "UDN_DIRECTION_PROTECT" },
+  { 5632, "MPL_TILE_TIMER_SET_0" },
+  { 5633, "MPL_TILE_TIMER_SET_1" },
+  { 5634, "MPL_TILE_TIMER_SET_2" },
+  { 5635, "MPL_TILE_TIMER_SET_3" },
+  { 5636, "MPL_TILE_TIMER" },
+  { 5637, "TILE_TIMER_CONTROL" },
+  { 5888, "MPL_AUX_TILE_TIMER_SET_0" },
+  { 5889, "MPL_AUX_TILE_TIMER_SET_1" },
+  { 5890, "MPL_AUX_TILE_TIMER_SET_2" },
+  { 5891, "MPL_AUX_TILE_TIMER_SET_3" },
+  { 5892, "MPL_AUX_TILE_TIMER" },
+  { 5893, "AUX_TILE_TIMER_CONTROL" },
+  { 6144, "MPL_IDN_TIMER_SET_0" },
+  { 6145, "MPL_IDN_TIMER_SET_1" },
+  { 6146, "MPL_IDN_TIMER_SET_2" },
+  { 6147, "MPL_IDN_TIMER_SET_3" },
+  { 6148, "MPL_IDN_TIMER" },
+  { 6149, "IDN_DEADLOCK_COUNT" },
+  { 6150, "IDN_DEADLOCK_TIMEOUT" },
+  { 6400, "MPL_UDN_TIMER_SET_0" },
+  { 6401, "MPL_UDN_TIMER_SET_1" },
+  { 6402, "MPL_UDN_TIMER_SET_2" },
+  { 6403, "MPL_UDN_TIMER_SET_3" },
+  { 6404, "MPL_UDN_TIMER" },
+  { 6405, "UDN_DEADLOCK_COUNT" },
+  { 6406, "UDN_DEADLOCK_TIMEOUT" },
+  { 6656, "MPL_IDN_AVAIL_SET_0" },
+  { 6657, "MPL_IDN_AVAIL_SET_1" },
+  { 6658, "MPL_IDN_AVAIL_SET_2" },
+  { 6659, "MPL_IDN_AVAIL_SET_3" },
+  { 6660, "MPL_IDN_AVAIL" },
+  { 6661, "IDN_AVAIL_EN" },
+  { 6912, "MPL_UDN_AVAIL_SET_0" },
+  { 6913, "MPL_UDN_AVAIL_SET_1" },
+  { 6914, "MPL_UDN_AVAIL_SET_2" },
+  { 6915, "MPL_UDN_AVAIL_SET_3" },
+  { 6916, "MPL_UDN_AVAIL" },
+  { 6917, "UDN_AVAIL_EN" },
+  { 7168, "MPL_IPI_3_SET_0" },
+  { 7169, "MPL_IPI_3_SET_1" },
+  { 7170, "MPL_IPI_3_SET_2" },
+  { 7171, "MPL_IPI_3_SET_3" },
+  { 7172, "MPL_IPI_3" },
+  { 7173, "IPI_EVENT_3" },
+  { 7174, "IPI_EVENT_RESET_3" },
+  { 7175, "IPI_EVENT_SET_3" },
+  { 7176, "IPI_MASK_3" },
+  { 7177, "IPI_MASK_RESET_3" },
+  { 7178, "IPI_MASK_SET_3" },
+  { 7424, "MPL_IPI_2_SET_0" },
+  { 7425, "MPL_IPI_2_SET_1" },
+  { 7426, "MPL_IPI_2_SET_2" },
+  { 7427, "MPL_IPI_2_SET_3" },
+  { 7428, "MPL_IPI_2" },
+  { 7429, "IPI_EVENT_2" },
+  { 7430, "IPI_EVENT_RESET_2" },
+  { 7431, "IPI_EVENT_SET_2" },
+  { 7432, "IPI_MASK_2" },
+  { 7433, "IPI_MASK_RESET_2" },
+  { 7434, "IPI_MASK_SET_2" },
+  { 7680, "MPL_IPI_1_SET_0" },
+  { 7681, "MPL_IPI_1_SET_1" },
+  { 7682, "MPL_IPI_1_SET_2" },
+  { 7683, "MPL_IPI_1_SET_3" },
+  { 7684, "MPL_IPI_1" },
+  { 7685, "IPI_EVENT_1" },
+  { 7686, "IPI_EVENT_RESET_1" },
+  { 7687, "IPI_EVENT_SET_1" },
+  { 7688, "IPI_MASK_1" },
+  { 7689, "IPI_MASK_RESET_1" },
+  { 7690, "IPI_MASK_SET_1" },
+  { 7936, "MPL_IPI_0_SET_0" },
+  { 7937, "MPL_IPI_0_SET_1" },
+  { 7938, "MPL_IPI_0_SET_2" },
+  { 7939, "MPL_IPI_0_SET_3" },
+  { 7940, "MPL_IPI_0" },
+  { 7941, "IPI_EVENT_0" },
+  { 7942, "IPI_EVENT_RESET_0" },
+  { 7943, "IPI_EVENT_SET_0" },
+  { 7944, "IPI_MASK_0" },
+  { 7945, "IPI_MASK_RESET_0" },
+  { 7946, "IPI_MASK_SET_0" },
+  { 8192, "MPL_PERF_COUNT_SET_0" },
+  { 8193, "MPL_PERF_COUNT_SET_1" },
+  { 8194, "MPL_PERF_COUNT_SET_2" },
+  { 8195, "MPL_PERF_COUNT_SET_3" },
+  { 8196, "MPL_PERF_COUNT" },
+  { 8197, "PERF_COUNT_0" },
+  { 8198, "PERF_COUNT_1" },
+  { 8199, "PERF_COUNT_CTL" },
+  { 8200, "PERF_COUNT_DN_CTL" },
+  { 8201, "PERF_COUNT_STS" },
+  { 8202, "WATCH_MASK" },
+  { 8203, "WATCH_VAL" },
+  { 8448, "MPL_AUX_PERF_COUNT_SET_0" },
+  { 8449, "MPL_AUX_PERF_COUNT_SET_1" },
+  { 8450, "MPL_AUX_PERF_COUNT_SET_2" },
+  { 8451, "MPL_AUX_PERF_COUNT_SET_3" },
+  { 8452, "MPL_AUX_PERF_COUNT" },
+  { 8453, "AUX_PERF_COUNT_0" },
+  { 8454, "AUX_PERF_COUNT_1" },
+  { 8455, "AUX_PERF_COUNT_CTL" },
+  { 8456, "AUX_PERF_COUNT_STS" },
+  { 8704, "MPL_INTCTRL_3_SET_0" },
+  { 8705, "MPL_INTCTRL_3_SET_1" },
+  { 8706, "MPL_INTCTRL_3_SET_2" },
+  { 8707, "MPL_INTCTRL_3_SET_3" },
+  { 8708, "MPL_INTCTRL_3" },
+  { 8709, "INTCTRL_3_STATUS" },
+  { 8710, "INTERRUPT_MASK_3" },
+  { 8711, "INTERRUPT_MASK_RESET_3" },
+  { 8712, "INTERRUPT_MASK_SET_3" },
+  { 8713, "INTERRUPT_VECTOR_BASE_3" },
+  { 8714, "SINGLE_STEP_EN_0_3" },
+  { 8715, "SINGLE_STEP_EN_1_3" },
+  { 8716, "SINGLE_STEP_EN_2_3" },
+  { 8717, "SINGLE_STEP_EN_3_3" },
+  { 8832, "EX_CONTEXT_3_0" },
+  { 8833, "EX_CONTEXT_3_1" },
+  { 8834, "SYSTEM_SAVE_3_0" },
+  { 8835, "SYSTEM_SAVE_3_1" },
+  { 8836, "SYSTEM_SAVE_3_2" },
+  { 8837, "SYSTEM_SAVE_3_3" },
+  { 8960, "MPL_INTCTRL_2_SET_0" },
+  { 8961, "MPL_INTCTRL_2_SET_1" },
+  { 8962, "MPL_INTCTRL_2_SET_2" },
+  { 8963, "MPL_INTCTRL_2_SET_3" },
+  { 8964, "MPL_INTCTRL_2" },
+  { 8965, "INTCTRL_2_STATUS" },
+  { 8966, "INTERRUPT_MASK_2" },
+  { 8967, "INTERRUPT_MASK_RESET_2" },
+  { 8968, "INTERRUPT_MASK_SET_2" },
+  { 8969, "INTERRUPT_VECTOR_BASE_2" },
+  { 8970, "SINGLE_STEP_EN_0_2" },
+  { 8971, "SINGLE_STEP_EN_1_2" },
+  { 8972, "SINGLE_STEP_EN_2_2" },
+  { 8973, "SINGLE_STEP_EN_3_2" },
+  { 9088, "EX_CONTEXT_2_0" },
+  { 9089, "EX_CONTEXT_2_1" },
+  { 9090, "SYSTEM_SAVE_2_0" },
+  { 9091, "SYSTEM_SAVE_2_1" },
+  { 9092, "SYSTEM_SAVE_2_2" },
+  { 9093, "SYSTEM_SAVE_2_3" },
+  { 9216, "MPL_INTCTRL_1_SET_0" },
+  { 9217, "MPL_INTCTRL_1_SET_1" },
+  { 9218, "MPL_INTCTRL_1_SET_2" },
+  { 9219, "MPL_INTCTRL_1_SET_3" },
+  { 9220, "MPL_INTCTRL_1" },
+  { 9221, "INTCTRL_1_STATUS" },
+  { 9222, "INTERRUPT_MASK_1" },
+  { 9223, "INTERRUPT_MASK_RESET_1" },
+  { 9224, "INTERRUPT_MASK_SET_1" },
+  { 9225, "INTERRUPT_VECTOR_BASE_1" },
+  { 9226, "SINGLE_STEP_EN_0_1" },
+  { 9227, "SINGLE_STEP_EN_1_1" },
+  { 9228, "SINGLE_STEP_EN_2_1" },
+  { 9229, "SINGLE_STEP_EN_3_1" },
+  { 9344, "EX_CONTEXT_1_0" },
+  { 9345, "EX_CONTEXT_1_1" },
+  { 9346, "SYSTEM_SAVE_1_0" },
+  { 9347, "SYSTEM_SAVE_1_1" },
+  { 9348, "SYSTEM_SAVE_1_2" },
+  { 9349, "SYSTEM_SAVE_1_3" },
+  { 9472, "MPL_INTCTRL_0_SET_0" },
+  { 9473, "MPL_INTCTRL_0_SET_1" },
+  { 9474, "MPL_INTCTRL_0_SET_2" },
+  { 9475, "MPL_INTCTRL_0_SET_3" },
+  { 9476, "MPL_INTCTRL_0" },
+  { 9477, "INTCTRL_0_STATUS" },
+  { 9478, "INTERRUPT_MASK_0" },
+  { 9479, "INTERRUPT_MASK_RESET_0" },
+  { 9480, "INTERRUPT_MASK_SET_0" },
+  { 9481, "INTERRUPT_VECTOR_BASE_0" },
+  { 9482, "SINGLE_STEP_EN_0_0" },
+  { 9483, "SINGLE_STEP_EN_1_0" },
+  { 9484, "SINGLE_STEP_EN_2_0" },
+  { 9485, "SINGLE_STEP_EN_3_0" },
+  { 9600, "EX_CONTEXT_0_0" },
+  { 9601, "EX_CONTEXT_0_1" },
+  { 9602, "SYSTEM_SAVE_0_0" },
+  { 9603, "SYSTEM_SAVE_0_1" },
+  { 9604, "SYSTEM_SAVE_0_2" },
+  { 9605, "SYSTEM_SAVE_0_3" },
+  { 9728, "MPL_BOOT_ACCESS_SET_0" },
+  { 9729, "MPL_BOOT_ACCESS_SET_1" },
+  { 9730, "MPL_BOOT_ACCESS_SET_2" },
+  { 9731, "MPL_BOOT_ACCESS_SET_3" },
+  { 9732, "MPL_BOOT_ACCESS" },
+  { 9733, "BIG_ENDIAN_CONFIG" },
+  { 9734, "CACHE_INVALIDATION_COMPRESSION_MODE" },
+  { 9735, "CACHE_INVALIDATION_MASK_0" },
+  { 9736, "CACHE_INVALIDATION_MASK_1" },
+  { 9737, "CACHE_INVALIDATION_MASK_2" },
+  { 9738, "CBOX_CACHEASRAM_CONFIG" },
+  { 9739, "CBOX_CACHE_CONFIG" },
+  { 9740, "CBOX_HOME_MAP_ADDR" },
+  { 9741, "CBOX_HOME_MAP_DATA" },
+  { 9742, "CBOX_MMAP_0" },
+  { 9743, "CBOX_MMAP_1" },
+  { 9744, "CBOX_MMAP_2" },
+  { 9745, "CBOX_MMAP_3" },
+  { 9746, "CBOX_MSR" },
+  { 9747, "DIAG_BCST_CTL" },
+  { 9748, "DIAG_BCST_MASK" },
+  { 9749, "DIAG_BCST_TRIGGER" },
+  { 9750, "DIAG_MUX_CTL" },
+  { 9751, "DIAG_TRACE_CTL" },
+  { 9752, "DIAG_TRACE_DATA" },
+  { 9753, "DIAG_TRACE_STS" },
+  { 9754, "IDN_DEMUX_BUF_THRESH" },
+  { 9755, "L1_I_PIN_WAY_0" },
+  { 9756, "MEM_ROUTE_ORDER" },
+  { 9757, "MEM_STRIPE_CONFIG" },
+  { 9758, "PERF_COUNT_PLS" },
+  { 9759, "PSEUDO_RANDOM_NUMBER_MODIFY" },
+  { 9760, "QUIESCE_CTL" },
+  { 9761, "RSHIM_COORD" },
+  { 9762, "SBOX_CONFIG" },
+  { 9763, "UDN_DEMUX_BUF_THRESH" },
+  { 9764, "XDN_CORE_STARVATION_COUNT" },
+  { 9765, "XDN_ROUND_ROBIN_ARB_CTL" },
+  { 9856, "CYCLE_MODIFY" },
+  { 9857, "I_AAR" },
+  { 9984, "MPL_WORLD_ACCESS_SET_0" },
+  { 9985, "MPL_WORLD_ACCESS_SET_1" },
+  { 9986, "MPL_WORLD_ACCESS_SET_2" },
+  { 9987, "MPL_WORLD_ACCESS_SET_3" },
+  { 9988, "MPL_WORLD_ACCESS" },
+  { 9989, "DONE" },
+  { 9990, "DSTREAM_PF" },
+  { 9991, "FAIL" },
+  { 9992, "INTERRUPT_CRITICAL_SECTION" },
+  { 9993, "PASS" },
+  { 9994, "PSEUDO_RANDOM_NUMBER" },
+  { 9995, "TILE_COORD" },
+  { 9996, "TILE_RTF_HWM" },
+  { 10112, "CMPEXCH_VALUE" },
+  { 10113, "CYCLE" },
+  { 10114, "EVENT_BEGIN" },
+  { 10115, "EVENT_END" },
+  { 10116, "PROC_STATUS" },
+  { 10117, "SIM_CONTROL" },
+  { 10118, "SIM_SOCKET" },
+  { 10119, "STATUS_SATURATE" },
+  { 10240, "MPL_I_ASID_SET_0" },
+  { 10241, "MPL_I_ASID_SET_1" },
+  { 10242, "MPL_I_ASID_SET_2" },
+  { 10243, "MPL_I_ASID_SET_3" },
+  { 10244, "MPL_I_ASID" },
+  { 10245, "I_ASID" },
+  { 10496, "MPL_D_ASID_SET_0" },
+  { 10497, "MPL_D_ASID_SET_1" },
+  { 10498, "MPL_D_ASID_SET_2" },
+  { 10499, "MPL_D_ASID_SET_3" },
+  { 10500, "MPL_D_ASID" },
+  { 10501, "D_ASID" },
+  { 10752, "MPL_DOUBLE_FAULT_SET_0" },
+  { 10753, "MPL_DOUBLE_FAULT_SET_1" },
+  { 10754, "MPL_DOUBLE_FAULT_SET_2" },
+  { 10755, "MPL_DOUBLE_FAULT_SET_3" },
+  { 10756, "MPL_DOUBLE_FAULT" },
+  { 10757, "LAST_INTERRUPT_REASON" },
+};
+
+const int tilegx_num_sprs = 441;
+
+const char *
+get_tilegx_spr_name (int num)
+{
+  void *result;
+  struct tilegx_spr key;
+
+  key.number = num;
+  result = bsearch((const void *) &key, (const void *) tilegx_sprs,
+                   tilegx_num_sprs, sizeof (struct tilegx_spr),
+                   tilegx_spr_compare);
+
+  if (result == NULL)
+  {
+    return (NULL);
+  }
+  else
+  {
+    struct tilegx_spr *result_ptr = (struct tilegx_spr *) result;
+    return (result_ptr->name);
+  }
+}
+
+int
+print_insn_tilegx (unsigned char * memaddr)
+{
+  struct tilegx_decoded_instruction
+    decoded[TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE];
+  unsigned char opbuf[TILEGX_BUNDLE_SIZE_IN_BYTES];
+  int i, num_instructions, num_printed;
+  tilegx_mnemonic padding_mnemonic;
+
+  memcpy((void *)opbuf, (void *)memaddr, TILEGX_BUNDLE_SIZE_IN_BYTES);
+
+  /* Parse the instructions in the bundle. */
+  num_instructions =
+    parse_insn_tilegx (*(unsigned long long *)opbuf, (unsigned long long)memaddr, decoded);
+
+  /* Print the instructions in the bundle. */
+  printf("{ ");
+  num_printed = 0;
+
+  /* Determine which nop opcode is used for padding and should be skipped. */
+  padding_mnemonic = TILEGX_OPC_FNOP;
+  for (i = 0; i < num_instructions; i++)
+  {
+    if (!decoded[i].opcode->can_bundle)
+    {
+      /* Instructions that cannot be bundled are padded out with nops,
+         rather than fnops. Displaying them is always clutter. */
+      padding_mnemonic = TILEGX_OPC_NOP;
+      break;
+    }
+  }
+
+  for (i = 0; i < num_instructions; i++)
+  {
+    const struct tilegx_opcode *opcode = decoded[i].opcode;
+    const char *name;
+    int j;
+
+    /* Do not print out fnops, unless everything is an fnop, in
+       which case we will print out just the last one. */
+    if (opcode->mnemonic == padding_mnemonic
+        && (num_printed > 0 || i + 1 < num_instructions))
+      continue;
+
+    if (num_printed > 0)
+      printf(" ; ");
+    ++num_printed;
+
+    name = opcode->name;
+    if (name == NULL)
+      name = "<invalid>";
+    printf("%s", name);
+
+    for (j = 0; j < opcode->num_operands; j++)
+    {
+      unsigned long long num;
+      const struct tilegx_operand *op;
+      const char *spr_name;
+
+      if (j > 0)
+        printf (",");
+      printf (" ");
+
+      num = decoded[i].operand_values[j];
+
+      op = decoded[i].operands[j];
+      switch (op->type)
+      {
+      case TILEGX_OP_TYPE_REGISTER:
+        printf ("%s", tilegx_register_names[(int)num]);
+        break;
+      case TILEGX_OP_TYPE_SPR:
+        spr_name = get_tilegx_spr_name(num);
+        if (spr_name != NULL)
+          printf ("%s", spr_name);
+        else
+          printf ("%d", (int)num);
+        break;
+      case TILEGX_OP_TYPE_IMMEDIATE:
+        printf ("%d", (int)num);
+        break;
+      case TILEGX_OP_TYPE_ADDRESS:
+        printf ("0x%016llx", num);
+        break;
+      default:
+        abort ();
+      }
+    }
+  }
+  printf (" }\n");
+
+  return TILEGX_BUNDLE_SIZE_IN_BYTES;
+}
diff --git a/ext/pcre/pcrelib/sljit/sljitNativeTILEGX_64.c b/ext/pcre/pcrelib/sljit/sljitNativeTILEGX_64.c
new file mode 100644 (file)
index 0000000..d0b392e
--- /dev/null
@@ -0,0 +1,2580 @@
+/*
+ *    Stack-less Just-In-Time compiler
+ *
+ *    Copyright 2013-2013 Tilera Corporation(jiwang@tilera.com). All rights reserved.
+ *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice, this list of
+ *      conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *      of conditions and the following disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* TileGX architecture. */
+/* Contributed by Tilera Corporation. */
+#include "sljitNativeTILEGX-encoder.c"
+
+#define SIMM_8BIT_MAX (0x7f)
+#define SIMM_8BIT_MIN (-0x80)
+#define SIMM_16BIT_MAX (0x7fff)
+#define SIMM_16BIT_MIN (-0x8000)
+#define SIMM_17BIT_MAX (0xffff)
+#define SIMM_17BIT_MIN (-0x10000)
+#define SIMM_32BIT_MIN (-0x80000000)
+#define SIMM_32BIT_MAX (0x7fffffff)
+#define SIMM_48BIT_MIN (0x800000000000L)
+#define SIMM_48BIT_MAX (0x7fffffff0000L)
+#define IMM16(imm) ((imm) & 0xffff)
+
+#define UIMM_16BIT_MAX (0xffff)
+
+#define TMP_REG1 (SLJIT_NO_REGISTERS + 1)
+#define TMP_REG2 (SLJIT_NO_REGISTERS + 2)
+#define TMP_REG3 (SLJIT_NO_REGISTERS + 3)
+#define ADDR_TMP (SLJIT_NO_REGISTERS + 4)
+#define PIC_ADDR_REG TMP_REG2
+
+static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = {
+       63, 0, 1, 2, 3, 4, 30, 31, 32, 33, 34, 54, 5, 16, 6, 7
+};
+
+#define SLJIT_LOCALS_REG_mapped 54
+#define TMP_REG1_mapped 5
+#define TMP_REG2_mapped 16
+#define TMP_REG3_mapped 6
+#define ADDR_TMP_mapped 7
+#define SLJIT_SAVED_REG1_mapped 30
+#define SLJIT_SAVED_REG2_mapped 31
+#define SLJIT_SAVED_REG3_mapped 32
+#define SLJIT_SAVED_EREG1_mapped 33
+#define SLJIT_SAVED_EREG2_mapped 34
+
+/* Flags are keept in volatile registers. */
+#define EQUAL_FLAG 8
+/* And carry flag as well. */
+#define ULESS_FLAG 9
+#define UGREATER_FLAG 10
+#define LESS_FLAG 11
+#define GREATER_FLAG 12
+#define OVERFLOW_FLAG 13
+
+#define ZERO 63
+#define RA 55
+#define TMP_EREG1 14
+#define TMP_EREG2 15
+
+#define LOAD_DATA 0x01
+#define WORD_DATA 0x00
+#define BYTE_DATA 0x02
+#define HALF_DATA 0x04
+#define INT_DATA 0x06
+#define SIGNED_DATA 0x08
+#define DOUBLE_DATA 0x10
+
+/* Separates integer and floating point registers */
+#define GPR_REG 0xf
+
+#define MEM_MASK 0x1f
+
+#define WRITE_BACK 0x00020
+#define ARG_TEST 0x00040
+#define ALT_KEEP_CACHE 0x00080
+#define CUMULATIVE_OP 0x00100
+#define LOGICAL_OP 0x00200
+#define IMM_OP 0x00400
+#define SRC2_IMM 0x00800
+
+#define UNUSED_DEST 0x01000
+#define REG_DEST 0x02000
+#define REG1_SOURCE 0x04000
+#define REG2_SOURCE 0x08000
+#define SLOW_SRC1 0x10000
+#define SLOW_SRC2 0x20000
+#define SLOW_DEST 0x40000
+
+/* Only these flags are set. UNUSED_DEST is not set when no flags should be set.
+ */
+#define CHECK_FLAGS(list) (!(flags & UNUSED_DEST) || (op & GET_FLAGS(~(list))))
+
+SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char *sljit_get_platform_name(void)
+{
+       return "TileGX" SLJIT_CPUINFO;
+}
+
+/* Length of an instruction word */
+typedef sljit_uw sljit_ins;
+
+struct jit_instr {
+       const struct tilegx_opcode* opcode; 
+       tilegx_pipeline pipe;
+       unsigned long input_registers;
+       unsigned long output_registers;
+       int operand_value[4];
+       int line;
+};
+
+/* Opcode Helper Macros */
+#define TILEGX_X_MODE 0
+
+#define X_MODE create_Mode(TILEGX_X_MODE)
+
+#define FNOP_X0 \
+       create_Opcode_X0(RRR_0_OPCODE_X0) | \
+       create_RRROpcodeExtension_X0(UNARY_RRR_0_OPCODE_X0) | \
+       create_UnaryOpcodeExtension_X0(FNOP_UNARY_OPCODE_X0)
+
+#define FNOP_X1 \
+       create_Opcode_X1(RRR_0_OPCODE_X1) | \
+       create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \
+       create_UnaryOpcodeExtension_X1(FNOP_UNARY_OPCODE_X1)
+
+#define NOP \
+       create_Mode(TILEGX_X_MODE) | FNOP_X0 | FNOP_X1
+
+#define ANOP_X0 \
+       create_Opcode_X0(RRR_0_OPCODE_X0) | \
+       create_RRROpcodeExtension_X0(UNARY_RRR_0_OPCODE_X0) | \
+       create_UnaryOpcodeExtension_X0(NOP_UNARY_OPCODE_X0)
+
+#define BPT create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
+       create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \
+       create_UnaryOpcodeExtension_X1(ILL_UNARY_OPCODE_X1) | \
+       create_Dest_X1(0x1C) | create_SrcA_X1(0x25) | ANOP_X0
+
+#define ADD_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
+       create_RRROpcodeExtension_X1(ADD_RRR_0_OPCODE_X1) | FNOP_X0
+
+#define ADDI_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \
+       create_Imm8OpcodeExtension_X1(ADDI_IMM8_OPCODE_X1) | FNOP_X0
+
+#define SUB_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
+       create_RRROpcodeExtension_X1(SUB_RRR_0_OPCODE_X1) | FNOP_X0
+
+#define NOR_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
+       create_RRROpcodeExtension_X1(NOR_RRR_0_OPCODE_X1) | FNOP_X0
+
+#define OR_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
+       create_RRROpcodeExtension_X1(OR_RRR_0_OPCODE_X1) | FNOP_X0
+
+#define AND_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
+       create_RRROpcodeExtension_X1(AND_RRR_0_OPCODE_X1) | FNOP_X0
+
+#define XOR_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
+       create_RRROpcodeExtension_X1(XOR_RRR_0_OPCODE_X1) | FNOP_X0
+
+#define CMOVNEZ_X0 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X0(RRR_0_OPCODE_X0) | \
+       create_RRROpcodeExtension_X0(CMOVNEZ_RRR_0_OPCODE_X0) | FNOP_X1
+
+#define CMOVEQZ_X0 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X0(RRR_0_OPCODE_X0) | \
+       create_RRROpcodeExtension_X0(CMOVEQZ_RRR_0_OPCODE_X0) | FNOP_X1
+
+#define ADDLI_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(ADDLI_OPCODE_X1) | FNOP_X0
+
+#define V4INT_L_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
+       create_RRROpcodeExtension_X1(V4INT_L_RRR_0_OPCODE_X1) | FNOP_X0
+
+#define BFEXTU_X0 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X0(BF_OPCODE_X0) | \
+       create_BFOpcodeExtension_X0(BFEXTU_BF_OPCODE_X0) | FNOP_X1
+
+#define BFEXTS_X0 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X0(BF_OPCODE_X0) | \
+       create_BFOpcodeExtension_X0(BFEXTS_BF_OPCODE_X0) | FNOP_X1
+
+#define SHL16INSLI_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(SHL16INSLI_OPCODE_X1) | FNOP_X0
+
+#define ST_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
+       create_RRROpcodeExtension_X1(ST_RRR_0_OPCODE_X1) | create_Dest_X1(0x0) | FNOP_X0
+
+#define LD_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
+       create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \
+       create_UnaryOpcodeExtension_X1(LD_UNARY_OPCODE_X1) | FNOP_X0
+
+#define JR_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
+       create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \
+       create_UnaryOpcodeExtension_X1(JR_UNARY_OPCODE_X1) | FNOP_X0
+
+#define JALR_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
+       create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \
+       create_UnaryOpcodeExtension_X1(JALR_UNARY_OPCODE_X1) | FNOP_X0
+
+#define CLZ_X0 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X0(RRR_0_OPCODE_X0) | \
+       create_RRROpcodeExtension_X0(UNARY_RRR_0_OPCODE_X0) | \
+       create_UnaryOpcodeExtension_X0(CNTLZ_UNARY_OPCODE_X0) | FNOP_X1
+
+#define CMPLTUI_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \
+       create_Imm8OpcodeExtension_X1(CMPLTUI_IMM8_OPCODE_X1) | FNOP_X0
+
+#define CMPLTU_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
+       create_RRROpcodeExtension_X1(CMPLTU_RRR_0_OPCODE_X1) | FNOP_X0
+
+#define CMPLTS_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
+       create_RRROpcodeExtension_X1(CMPLTS_RRR_0_OPCODE_X1) | FNOP_X0
+
+#define XORI_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \
+       create_Imm8OpcodeExtension_X1(XORI_IMM8_OPCODE_X1) | FNOP_X0
+
+#define ORI_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \
+       create_Imm8OpcodeExtension_X1(ORI_IMM8_OPCODE_X1) | FNOP_X0
+
+#define ANDI_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \
+       create_Imm8OpcodeExtension_X1(ANDI_IMM8_OPCODE_X1) | FNOP_X0
+
+#define SHLI_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(SHIFT_OPCODE_X1) | \
+       create_ShiftOpcodeExtension_X1(SHLI_SHIFT_OPCODE_X1) | FNOP_X0
+
+#define SHL_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
+       create_RRROpcodeExtension_X1(SHL_RRR_0_OPCODE_X1) | FNOP_X0
+
+#define SHRSI_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(SHIFT_OPCODE_X1) | \
+       create_ShiftOpcodeExtension_X1(SHRSI_SHIFT_OPCODE_X1) | FNOP_X0
+
+#define SHRS_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
+       create_RRROpcodeExtension_X1(SHRS_RRR_0_OPCODE_X1) | FNOP_X0
+
+#define SHRUI_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(SHIFT_OPCODE_X1) | \
+       create_ShiftOpcodeExtension_X1(SHRUI_SHIFT_OPCODE_X1) | FNOP_X0
+
+#define SHRU_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
+       create_RRROpcodeExtension_X1(SHRU_RRR_0_OPCODE_X1) | FNOP_X0
+
+#define BEQZ_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(BRANCH_OPCODE_X1) | \
+       create_BrType_X1(BEQZ_BRANCH_OPCODE_X1) | FNOP_X0
+
+#define BNEZ_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(BRANCH_OPCODE_X1) | \
+       create_BrType_X1(BNEZ_BRANCH_OPCODE_X1) | FNOP_X0
+
+#define J_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(JUMP_OPCODE_X1) | \
+       create_JumpOpcodeExtension_X1(J_JUMP_OPCODE_X1) | FNOP_X0
+
+#define JAL_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(JUMP_OPCODE_X1) | \
+       create_JumpOpcodeExtension_X1(JAL_JUMP_OPCODE_X1) | FNOP_X0
+
+#define DEST_X0(x) create_Dest_X0(x)
+#define SRCA_X0(x) create_SrcA_X0(x)
+#define SRCB_X0(x) create_SrcB_X0(x)
+#define DEST_X1(x) create_Dest_X1(x)
+#define SRCA_X1(x) create_SrcA_X1(x)
+#define SRCB_X1(x) create_SrcB_X1(x)
+#define IMM16_X1(x) create_Imm16_X1(x)
+#define IMM8_X1(x) create_Imm8_X1(x)
+#define BFSTART_X0(x) create_BFStart_X0(x)
+#define BFEND_X0(x) create_BFEnd_X0(x)
+#define SHIFTIMM_X1(x) create_ShAmt_X1(x)
+#define JOFF_X1(x) create_JumpOff_X1(x)
+#define BOFF_X1(x) create_BrOff_X1(x)
+
+static SLJIT_CONST tilegx_mnemonic data_transfer_insts[16] = {
+       /* u w s */ TILEGX_OPC_ST   /* st */,
+       /* u w l */ TILEGX_OPC_LD   /* ld */,
+       /* u b s */ TILEGX_OPC_ST1  /* st1 */,
+       /* u b l */ TILEGX_OPC_LD1U /* ld1u */,
+       /* u h s */ TILEGX_OPC_ST2  /* st2 */,
+       /* u h l */ TILEGX_OPC_LD2U /* ld2u */,
+       /* u i s */ TILEGX_OPC_ST4  /* st4 */,
+       /* u i l */ TILEGX_OPC_LD4U /* ld4u */,
+       /* s w s */ TILEGX_OPC_ST   /* st */,
+       /* s w l */ TILEGX_OPC_LD   /* ld */,
+       /* s b s */ TILEGX_OPC_ST1  /* st1 */,
+       /* s b l */ TILEGX_OPC_LD1S /* ld1s */,
+       /* s h s */ TILEGX_OPC_ST2  /* st2 */,
+       /* s h l */ TILEGX_OPC_LD2S /* ld2s */,
+       /* s i s */ TILEGX_OPC_ST4  /* st4 */,
+       /* s i l */ TILEGX_OPC_LD4S /* ld4s */,
+};
+
+#ifdef TILEGX_JIT_DEBUG
+static sljit_si push_inst_debug(struct sljit_compiler *compiler, sljit_ins ins, int line)
+{
+       sljit_ins *ptr = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins));
+       FAIL_IF(!ptr);
+       *ptr = ins;
+       compiler->size++;
+       printf("|%04d|S0|:\t\t", line);
+       print_insn_tilegx(ptr);
+       return SLJIT_SUCCESS;
+}
+
+static sljit_si push_inst_nodebug(struct sljit_compiler *compiler, sljit_ins ins)
+{
+       sljit_ins *ptr = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins));
+       FAIL_IF(!ptr);
+       *ptr = ins;
+       compiler->size++;
+       return SLJIT_SUCCESS;
+}
+
+#define push_inst(a, b) push_inst_debug(a, b, __LINE__)
+#else
+static sljit_si push_inst(struct sljit_compiler *compiler, sljit_ins ins)
+{
+       sljit_ins *ptr = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins));
+       FAIL_IF(!ptr);
+       *ptr = ins;
+       compiler->size++;
+       return SLJIT_SUCCESS;
+}
+#endif
+
+#define BUNDLE_FORMAT_MASK(p0, p1, p2) \
+       ((p0) | ((p1) << 8) | ((p2) << 16))
+
+#define BUNDLE_FORMAT(p0, p1, p2) \
+       { \
+               { \
+                       (tilegx_pipeline)(p0), \
+                       (tilegx_pipeline)(p1), \
+                       (tilegx_pipeline)(p2) \
+               }, \
+               BUNDLE_FORMAT_MASK(1 << (p0), 1 << (p1), (1 << (p2))) \
+       }
+
+#define NO_PIPELINE TILEGX_NUM_PIPELINE_ENCODINGS
+
+#define tilegx_is_x_pipeline(p) ((int)(p) <= (int)TILEGX_PIPELINE_X1)
+
+#define PI(encoding) \
+       push_inst(compiler, encoding)
+
+#define PB3(opcode, dst, srca, srcb) \
+       push_3_buffer(compiler, opcode, dst, srca, srcb, __LINE__)
+
+#define PB2(opcode, dst, src) \
+       push_2_buffer(compiler, opcode, dst, src, __LINE__)
+
+#define JR(reg) \
+       push_jr_buffer(compiler, TILEGX_OPC_JR, reg, __LINE__)
+
+#define ADD(dst, srca, srcb) \
+       push_3_buffer(compiler, TILEGX_OPC_ADD, dst, srca, srcb, __LINE__)
+
+#define SUB(dst, srca, srcb) \
+       push_3_buffer(compiler, TILEGX_OPC_SUB, dst, srca, srcb, __LINE__)
+
+#define NOR(dst, srca, srcb) \
+       push_3_buffer(compiler, TILEGX_OPC_NOR, dst, srca, srcb, __LINE__)
+
+#define OR(dst, srca, srcb) \
+       push_3_buffer(compiler, TILEGX_OPC_OR, dst, srca, srcb, __LINE__)
+
+#define XOR(dst, srca, srcb) \
+       push_3_buffer(compiler, TILEGX_OPC_XOR, dst, srca, srcb, __LINE__)
+
+#define AND(dst, srca, srcb) \
+       push_3_buffer(compiler, TILEGX_OPC_AND, dst, srca, srcb, __LINE__)
+
+#define CLZ(dst, src) \
+       push_2_buffer(compiler, TILEGX_OPC_CLZ, dst, src, __LINE__)
+
+#define SHLI(dst, srca, srcb) \
+       push_3_buffer(compiler, TILEGX_OPC_SHLI, dst, srca, srcb, __LINE__)
+
+#define SHRUI(dst, srca, imm) \
+       push_3_buffer(compiler, TILEGX_OPC_SHRUI, dst, srca, imm, __LINE__)
+
+#define XORI(dst, srca, imm) \
+       push_3_buffer(compiler, TILEGX_OPC_XORI, dst, srca, imm, __LINE__)
+
+#define ORI(dst, srca, imm) \
+       push_3_buffer(compiler, TILEGX_OPC_ORI, dst, srca, imm, __LINE__)
+
+#define CMPLTU(dst, srca, srcb) \
+       push_3_buffer(compiler, TILEGX_OPC_CMPLTU, dst, srca, srcb, __LINE__)
+
+#define CMPLTS(dst, srca, srcb) \
+       push_3_buffer(compiler, TILEGX_OPC_CMPLTS, dst, srca, srcb, __LINE__)
+
+#define CMPLTUI(dst, srca, imm) \
+       push_3_buffer(compiler, TILEGX_OPC_CMPLTUI, dst, srca, imm, __LINE__)
+
+#define CMOVNEZ(dst, srca, srcb) \
+       push_3_buffer(compiler, TILEGX_OPC_CMOVNEZ, dst, srca, srcb, __LINE__)
+
+#define CMOVEQZ(dst, srca, srcb) \
+       push_3_buffer(compiler, TILEGX_OPC_CMOVEQZ, dst, srca, srcb, __LINE__)
+
+#define ADDLI(dst, srca, srcb) \
+       push_3_buffer(compiler, TILEGX_OPC_ADDLI, dst, srca, srcb, __LINE__)
+
+#define SHL16INSLI(dst, srca, srcb) \
+       push_3_buffer(compiler, TILEGX_OPC_SHL16INSLI, dst, srca, srcb, __LINE__)
+
+#define LD_ADD(dst, addr, adjust) \
+       push_3_buffer(compiler, TILEGX_OPC_LD_ADD, dst, addr, adjust, __LINE__)
+
+#define ST_ADD(src, addr, adjust) \
+       push_3_buffer(compiler, TILEGX_OPC_ST_ADD, src, addr, adjust, __LINE__)
+
+#define LD(dst, addr) \
+       push_2_buffer(compiler, TILEGX_OPC_LD, dst, addr, __LINE__)
+
+#define BFEXTU(dst, src, start, end) \
+       push_4_buffer(compiler, TILEGX_OPC_BFEXTU, dst, src, start, end, __LINE__)
+
+#define BFEXTS(dst, src, start, end) \
+       push_4_buffer(compiler, TILEGX_OPC_BFEXTS, dst, src, start, end, __LINE__)
+
+#define ADD_SOLO(dest, srca, srcb) \
+       push_inst(compiler, ADD_X1 | DEST_X1(dest) | SRCA_X1(srca) | SRCB_X1(srcb))
+
+#define ADDI_SOLO(dest, srca, imm) \
+       push_inst(compiler, ADDI_X1 | DEST_X1(dest) | SRCA_X1(srca) | IMM8_X1(imm))
+
+#define ADDLI_SOLO(dest, srca, imm) \
+       push_inst(compiler, ADDLI_X1 | DEST_X1(dest) | SRCA_X1(srca) | IMM16_X1(imm))
+
+#define SHL16INSLI_SOLO(dest, srca, imm) \
+       push_inst(compiler, SHL16INSLI_X1 | DEST_X1(dest) | SRCA_X1(srca) | IMM16_X1(imm))
+
+#define JALR_SOLO(reg) \
+       push_inst(compiler, JALR_X1 | SRCA_X1(reg))
+
+#define JR_SOLO(reg) \
+       push_inst(compiler, JR_X1 | SRCA_X1(reg))
+
+struct Format {
+       /* Mapping of bundle issue slot to assigned pipe. */
+       tilegx_pipeline pipe[TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE];
+
+       /* Mask of pipes used by this bundle. */
+       unsigned int pipe_mask;
+};
+
+const struct Format formats[] =
+{
+       /* In Y format we must always have something in Y2, since it has
+       * no fnop, so this conveys that Y2 must always be used. */
+       BUNDLE_FORMAT(TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y2, NO_PIPELINE),
+       BUNDLE_FORMAT(TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y2, NO_PIPELINE),
+       BUNDLE_FORMAT(TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y0, NO_PIPELINE),
+       BUNDLE_FORMAT(TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y1, NO_PIPELINE),
+
+       /* Y format has three instructions. */
+       BUNDLE_FORMAT(TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y2),
+       BUNDLE_FORMAT(TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y1),
+       BUNDLE_FORMAT(TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y2),
+       BUNDLE_FORMAT(TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y0),
+       BUNDLE_FORMAT(TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y1),
+       BUNDLE_FORMAT(TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y0),
+
+       /* X format has only two instructions. */
+       BUNDLE_FORMAT(TILEGX_PIPELINE_X0, TILEGX_PIPELINE_X1, NO_PIPELINE),
+       BUNDLE_FORMAT(TILEGX_PIPELINE_X1, TILEGX_PIPELINE_X0, NO_PIPELINE)
+};
+
+
+struct jit_instr inst_buf[TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE];
+unsigned long inst_buf_index;
+
+tilegx_pipeline get_any_valid_pipe(const struct tilegx_opcode* opcode)
+{
+       /* FIXME: tile: we could pregenerate this. */
+       int pipe;
+       for (pipe = 0; ((opcode->pipes & (1 << pipe)) == 0 && pipe < TILEGX_NUM_PIPELINE_ENCODINGS); pipe++)
+               ;
+       return (tilegx_pipeline)(pipe);
+}
+
+void insert_nop(tilegx_mnemonic opc, int line)
+{
+       const struct tilegx_opcode* opcode = NULL;
+
+       memmove(&inst_buf[1], &inst_buf[0], inst_buf_index * sizeof inst_buf[0]);
+
+       opcode = &tilegx_opcodes[opc];
+       inst_buf[0].opcode = opcode;
+       inst_buf[0].pipe = get_any_valid_pipe(opcode);
+       inst_buf[0].input_registers = 0;
+       inst_buf[0].output_registers = 0;
+       inst_buf[0].line = line;
+       ++inst_buf_index;
+}
+
+const struct Format* compute_format()
+{
+       unsigned int compatible_pipes = BUNDLE_FORMAT_MASK(
+               inst_buf[0].opcode->pipes,
+               inst_buf[1].opcode->pipes,
+               (inst_buf_index == 3 ? inst_buf[2].opcode->pipes : (1 << NO_PIPELINE)));
+
+       const struct Format* match = NULL;
+       const struct Format *b = NULL;
+       unsigned int i = 0;
+       for (i; i < sizeof formats / sizeof formats[0]; i++) {
+               b = &formats[i];
+               if ((b->pipe_mask & compatible_pipes) == b->pipe_mask) {
+                       match = b;
+                       break;
+               }
+       }
+
+       return match;
+}
+
+sljit_si assign_pipes()
+{
+       unsigned long output_registers = 0;
+       unsigned int i = 0;
+
+       if (inst_buf_index == 1) {
+               tilegx_mnemonic opc = inst_buf[0].opcode->can_bundle
+                                       ? TILEGX_OPC_FNOP : TILEGX_OPC_NOP;
+               insert_nop(opc, __LINE__);
+       }
+
+       const struct Format* match = compute_format();
+
+       if (match == NULL)
+               return -1;
+
+       for (i = 0; i < inst_buf_index; i++) {
+
+               if ((i > 0) && ((inst_buf[i].input_registers & output_registers) != 0))
+                       return -1;
+
+               if ((i > 0) && ((inst_buf[i].output_registers & output_registers) != 0))
+                       return -1;
+
+               /* Don't include Rzero in the match set, to avoid triggering
+                  needlessly on 'prefetch' instrs. */
+
+               output_registers |= inst_buf[i].output_registers & 0xFFFFFFFFFFFFFFL;
+
+               inst_buf[i].pipe = match->pipe[i];
+       }
+
+       /* If only 2 instrs, and in Y-mode, insert a nop. */
+       if (inst_buf_index == 2 && !tilegx_is_x_pipeline(match->pipe[0])) {
+               insert_nop(TILEGX_OPC_FNOP, __LINE__);
+
+               /* Select the yet unassigned pipe. */
+               tilegx_pipeline pipe = (tilegx_pipeline)(((TILEGX_PIPELINE_Y0
+                                       + TILEGX_PIPELINE_Y1 + TILEGX_PIPELINE_Y2)
+                                       - (inst_buf[1].pipe + inst_buf[2].pipe)));
+
+               inst_buf[0].pipe = pipe;
+       }
+
+       return 0;
+}
+
+tilegx_bundle_bits get_bundle_bit(struct jit_instr *inst)
+{
+       int i, val;
+       const struct tilegx_opcode* opcode = inst->opcode;
+       tilegx_bundle_bits bits = opcode->fixed_bit_values[inst->pipe];
+
+       const struct tilegx_operand* operand = NULL;
+       for (i = 0; i < opcode->num_operands; i++) {
+               operand = &tilegx_operands[opcode->operands[inst->pipe][i]];
+               val = inst->operand_value[i];
+
+               bits |= operand->insert(val);
+       }
+
+       return bits;
+}
+
+static sljit_si update_buffer(struct sljit_compiler *compiler)
+{
+       int count;
+       int i;
+       int orig_index = inst_buf_index;
+       struct jit_instr inst0 = inst_buf[0];
+       struct jit_instr inst1 = inst_buf[1];
+       struct jit_instr inst2 = inst_buf[2];
+       tilegx_bundle_bits bits = 0;
+
+       /* If the bundle is valid as is, perform the encoding and return 1. */
+       if (assign_pipes() == 0) {
+               for (i = 0; i < inst_buf_index; i++) {
+                       bits |= get_bundle_bit(inst_buf + i);
+#ifdef TILEGX_JIT_DEBUG
+                       printf("|%04d", inst_buf[i].line);
+#endif
+               }
+#ifdef TILEGX_JIT_DEBUG
+               if (inst_buf_index == 3)
+                       printf("|M0|:\t");
+               else
+                       printf("|M0|:\t\t");
+               print_insn_tilegx(&bits);
+#endif
+
+               inst_buf_index = 0;
+
+#ifdef TILEGX_JIT_DEBUG
+               return push_inst_nodebug(compiler, bits);
+#else
+               return push_inst(compiler, bits);
+#endif
+       }
+
+       /* If the bundle is invalid, split it in two. First encode the first two
+          (or possibly 1) instructions, and then the last, separately. Note that
+          assign_pipes may have re-ordered the instrs (by inserting no-ops in
+          lower slots) so we need to reset them. */
+
+       inst_buf_index = orig_index - 1;
+       inst_buf[0] = inst0;
+       inst_buf[1] = inst1;
+       inst_buf[2] = inst2;
+       if (assign_pipes() == 0) {
+               for (i = 0; i < inst_buf_index; i++) {
+                       bits |= get_bundle_bit(inst_buf + i);
+#ifdef TILEGX_JIT_DEBUG
+                       printf("|%04d", inst_buf[i].line);
+#endif
+               }
+
+#ifdef TILEGX_JIT_DEBUG
+               if (inst_buf_index == 3)
+                       printf("|M1|:\t");
+               else
+                       printf("|M1|:\t\t");
+               print_insn_tilegx(&bits);
+#endif
+
+               if ((orig_index - 1) == 2) {
+                       inst_buf[0] = inst2;
+                       inst_buf_index = 1;
+               } else if ((orig_index - 1) == 1) {
+                       inst_buf[0] = inst1;
+                       inst_buf_index = 1;
+               } else
+                       SLJIT_ASSERT_STOP();
+
+#ifdef TILEGX_JIT_DEBUG
+               return push_inst_nodebug(compiler, bits);
+#else
+               return push_inst(compiler, bits);
+#endif
+       } else {
+               /* We had 3 instrs of which the first 2 can't live in the same bundle.
+                  Split those two. Note that we don't try to then combine the second
+                  and third instr into a single bundle.  First instruction: */
+               inst_buf_index = 1;
+               inst_buf[0] = inst0;
+               inst_buf[1] = inst1;
+               inst_buf[2] = inst2;
+               if (assign_pipes() == 0) {
+                       for (i = 0; i < inst_buf_index; i++) {
+                               bits |= get_bundle_bit(inst_buf + i);
+#ifdef TILEGX_JIT_DEBUG
+                               printf("|%04d", inst_buf[i].line);
+#endif
+                       }
+
+#ifdef TILEGX_JIT_DEBUG
+                       if (inst_buf_index == 3)
+                               printf("|M2|:\t");
+                       else
+                               printf("|M2|:\t\t");
+                       print_insn_tilegx(&bits);
+#endif
+
+                       inst_buf[0] = inst1;
+                       inst_buf[1] = inst2;
+                       inst_buf_index = orig_index - 1;
+#ifdef TILEGX_JIT_DEBUG
+                       return push_inst_nodebug(compiler, bits);
+#else
+                       return push_inst(compiler, bits);
+#endif
+               } else
+                       SLJIT_ASSERT_STOP();
+       }
+
+       SLJIT_ASSERT_STOP();
+}
+
+static sljit_si flush_buffer(struct sljit_compiler *compiler)
+{
+       while (inst_buf_index != 0)
+               update_buffer(compiler);
+}
+
+static sljit_si push_4_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int op1, int op2, int op3, int line)
+{
+       if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE)
+               FAIL_IF(update_buffer(compiler));
+
+       const struct tilegx_opcode* opcode = &tilegx_opcodes[opc];
+       inst_buf[inst_buf_index].opcode = opcode;
+       inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode);
+       inst_buf[inst_buf_index].operand_value[0] = op0;
+       inst_buf[inst_buf_index].operand_value[1] = op1;
+       inst_buf[inst_buf_index].operand_value[2] = op2;
+       inst_buf[inst_buf_index].operand_value[3] = op3;
+       inst_buf[inst_buf_index].input_registers = 1L << op1;
+       inst_buf[inst_buf_index].output_registers = 1L << op0;
+       inst_buf[inst_buf_index].line = line;
+       inst_buf_index++;
+
+       return SLJIT_SUCCESS;
+}
+
+static sljit_si push_3_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int op1, int op2, int line)
+{
+       if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE)
+               FAIL_IF(update_buffer(compiler));
+
+       const struct tilegx_opcode* opcode = &tilegx_opcodes[opc];
+       inst_buf[inst_buf_index].opcode = opcode;
+       inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode);
+       inst_buf[inst_buf_index].operand_value[0] = op0;
+       inst_buf[inst_buf_index].operand_value[1] = op1;
+       inst_buf[inst_buf_index].operand_value[2] = op2;
+       inst_buf[inst_buf_index].line = line;
+
+       switch (opc) {
+       case TILEGX_OPC_ST_ADD:
+               inst_buf[inst_buf_index].input_registers = (1L << op0) | (1L << op1);
+               inst_buf[inst_buf_index].output_registers = 1L << op0;
+               break;
+       case TILEGX_OPC_LD_ADD:
+               inst_buf[inst_buf_index].input_registers = 1L << op1;
+               inst_buf[inst_buf_index].output_registers = (1L << op0) | (1L << op1);
+               break;
+       case TILEGX_OPC_ADD:
+       case TILEGX_OPC_AND:
+       case TILEGX_OPC_SUB:
+       case TILEGX_OPC_OR:
+       case TILEGX_OPC_XOR:
+       case TILEGX_OPC_NOR:
+       case TILEGX_OPC_SHL:
+       case TILEGX_OPC_SHRU:
+       case TILEGX_OPC_SHRS:
+       case TILEGX_OPC_CMPLTU:
+       case TILEGX_OPC_CMPLTS:
+       case TILEGX_OPC_CMOVEQZ:
+       case TILEGX_OPC_CMOVNEZ:
+               inst_buf[inst_buf_index].input_registers = (1L << op1) | (1L << op2);
+               inst_buf[inst_buf_index].output_registers = 1L << op0;
+               break;
+       case TILEGX_OPC_ADDLI:
+       case TILEGX_OPC_XORI:
+       case TILEGX_OPC_ORI:
+       case TILEGX_OPC_SHLI:
+       case TILEGX_OPC_SHRUI:
+       case TILEGX_OPC_SHRSI:
+       case TILEGX_OPC_SHL16INSLI:
+       case TILEGX_OPC_CMPLTUI:
+       case TILEGX_OPC_CMPLTSI:
+               inst_buf[inst_buf_index].input_registers = 1L << op1;
+               inst_buf[inst_buf_index].output_registers = 1L << op0;
+               break;
+       default:
+               printf("unrecoginzed opc: %s\n", opcode->name);
+               SLJIT_ASSERT_STOP();
+       }
+
+       inst_buf_index++;
+
+       return SLJIT_SUCCESS;
+}
+
+static sljit_si push_2_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int op1, int line)
+{
+       if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE)
+               FAIL_IF(update_buffer(compiler));
+
+       const struct tilegx_opcode* opcode = &tilegx_opcodes[opc];
+       inst_buf[inst_buf_index].opcode = opcode;
+       inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode);
+       inst_buf[inst_buf_index].operand_value[0] = op0;
+       inst_buf[inst_buf_index].operand_value[1] = op1;
+       inst_buf[inst_buf_index].line = line;
+
+       switch (opc) {
+       case TILEGX_OPC_BEQZ:
+       case TILEGX_OPC_BNEZ:
+               inst_buf[inst_buf_index].input_registers = 1L << op0;
+               break;
+       case TILEGX_OPC_ST:
+       case TILEGX_OPC_ST1:
+       case TILEGX_OPC_ST2:
+       case TILEGX_OPC_ST4:
+               inst_buf[inst_buf_index].input_registers = (1L << op0) | (1L << op1);
+               inst_buf[inst_buf_index].output_registers = 0;
+               break;
+       case TILEGX_OPC_CLZ:
+       case TILEGX_OPC_LD:
+       case TILEGX_OPC_LD1U:
+       case TILEGX_OPC_LD1S:
+       case TILEGX_OPC_LD2U:
+       case TILEGX_OPC_LD2S:
+       case TILEGX_OPC_LD4U:
+       case TILEGX_OPC_LD4S:
+               inst_buf[inst_buf_index].input_registers = 1L << op1;
+               inst_buf[inst_buf_index].output_registers = 1L << op0;
+               break;
+       default:
+               printf("unrecoginzed opc: %s\n", opcode->name);
+               SLJIT_ASSERT_STOP();
+       }
+
+       inst_buf_index++;
+
+       return SLJIT_SUCCESS;
+}
+
+static sljit_si push_0_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int line)
+{
+       if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE)
+               FAIL_IF(update_buffer(compiler));
+
+       const struct tilegx_opcode* opcode = &tilegx_opcodes[opc];
+       inst_buf[inst_buf_index].opcode = opcode;
+       inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode);
+       inst_buf[inst_buf_index].input_registers = 0;
+       inst_buf[inst_buf_index].output_registers = 0;
+       inst_buf[inst_buf_index].line = line;
+       inst_buf_index++;
+
+       return SLJIT_SUCCESS;
+}
+
+static sljit_si push_jr_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int line)
+{
+       if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE)
+               FAIL_IF(update_buffer(compiler));
+
+       const struct tilegx_opcode* opcode = &tilegx_opcodes[opc];
+       inst_buf[inst_buf_index].opcode = opcode;
+       inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode);
+       inst_buf[inst_buf_index].operand_value[0] = op0;
+       inst_buf[inst_buf_index].input_registers = 1L << op0;
+       inst_buf[inst_buf_index].output_registers = 0;
+       inst_buf[inst_buf_index].line = line;
+       inst_buf_index++;
+       return flush_buffer(compiler);
+}
+
+static SLJIT_INLINE sljit_ins * detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code)
+{
+       sljit_sw diff;
+       sljit_uw target_addr;
+       sljit_ins *inst;
+       sljit_ins saved_inst;
+
+       if (jump->flags & SLJIT_REWRITABLE_JUMP)
+               return code_ptr;
+
+       if (jump->flags & JUMP_ADDR)
+               target_addr = jump->u.target;
+       else {
+               SLJIT_ASSERT(jump->flags & JUMP_LABEL);
+               target_addr = (sljit_uw)(code + jump->u.label->size);
+       }
+
+       inst = (sljit_ins *)jump->addr;
+       if (jump->flags & IS_COND)
+               inst--;
+
+       diff = ((sljit_sw) target_addr - (sljit_sw) inst) >> 3;
+       if (diff <= SIMM_17BIT_MAX && diff >= SIMM_17BIT_MIN) {
+               jump->flags |= PATCH_B;
+
+               if (!(jump->flags & IS_COND)) {
+                       if (jump->flags & IS_JAL) {
+                               jump->flags &= ~(PATCH_B);
+                               jump->flags |= PATCH_J;
+                               inst[0] = JAL_X1;
+
+#ifdef TILEGX_JIT_DEBUG
+                               printf("[runtime relocate]%04d:\t", __LINE__);
+                               print_insn_tilegx(inst);
+#endif
+                       } else {
+                               inst[0] = BEQZ_X1 | SRCA_X1(ZERO);
+
+#ifdef TILEGX_JIT_DEBUG
+                               printf("[runtime relocate]%04d:\t", __LINE__);
+                               print_insn_tilegx(inst);
+#endif
+                       }
+
+                       return inst;
+               }
+
+               inst[0] = inst[0] ^ (0x7L << 55);
+
+#ifdef TILEGX_JIT_DEBUG
+               printf("[runtime relocate]%04d:\t", __LINE__);
+               print_insn_tilegx(inst);
+#endif
+               jump->addr -= sizeof(sljit_ins);
+               return inst;
+       }
+
+       if (jump->flags & IS_COND) {
+               if ((target_addr & ~0x3FFFFFFFL) == ((jump->addr + sizeof(sljit_ins)) & ~0x3FFFFFFFL)) {
+                       jump->flags |= PATCH_J;
+                       inst[0] = (inst[0] & ~(BOFF_X1(-1))) | BOFF_X1(2);
+                       inst[1] = J_X1;
+                       return inst + 1;
+               }
+
+               return code_ptr;
+       }
+
+       if ((target_addr & ~0x3FFFFFFFL) == ((jump->addr + sizeof(sljit_ins)) & ~0x3FFFFFFFL)) {
+               jump->flags |= PATCH_J;
+
+               if (jump->flags & IS_JAL) {
+                       inst[0] = JAL_X1;
+
+#ifdef TILEGX_JIT_DEBUG
+                       printf("[runtime relocate]%04d:\t", __LINE__);
+                       print_insn_tilegx(inst);
+#endif
+
+               } else {
+                       inst[0] = J_X1;
+
+#ifdef TILEGX_JIT_DEBUG
+                       printf("[runtime relocate]%04d:\t", __LINE__);
+                       print_insn_tilegx(inst);
+#endif
+               }
+
+               return inst;
+       }
+
+       return code_ptr;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void * sljit_generate_code(struct sljit_compiler *compiler)
+{
+       struct sljit_memory_fragment *buf;
+       sljit_ins *code;
+       sljit_ins *code_ptr;
+       sljit_ins *buf_ptr;
+       sljit_ins *buf_end;
+       sljit_uw word_count;
+       sljit_uw addr;
+
+       struct sljit_label *label;
+       struct sljit_jump *jump;
+       struct sljit_const *const_;
+
+       CHECK_ERROR_PTR();
+       check_sljit_generate_code(compiler);
+       reverse_buf(compiler);
+
+       code = (sljit_ins *)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins));
+       PTR_FAIL_WITH_EXEC_IF(code);
+       buf = compiler->buf;
+
+       code_ptr = code;
+       word_count = 0;
+       label = compiler->labels;
+       jump = compiler->jumps;
+       const_ = compiler->consts;
+       do {
+               buf_ptr = (sljit_ins *)buf->memory;
+               buf_end = buf_ptr + (buf->used_size >> 3);
+               do {
+                       *code_ptr = *buf_ptr++;
+                       SLJIT_ASSERT(!label || label->size >= word_count);
+                       SLJIT_ASSERT(!jump || jump->addr >= word_count);
+                       SLJIT_ASSERT(!const_ || const_->addr >= word_count);
+                       /* These structures are ordered by their address. */
+                       if (label && label->size == word_count) {
+                               /* Just recording the address. */
+                               label->addr = (sljit_uw) code_ptr;
+                               label->size = code_ptr - code;
+                               label = label->next;
+                       }
+
+                       if (jump && jump->addr == word_count) {
+                               if (jump->flags & IS_JAL)
+                                       jump->addr = (sljit_uw)(code_ptr - 4);
+                               else
+                                       jump->addr = (sljit_uw)(code_ptr - 3);
+
+                               code_ptr = detect_jump_type(jump, code_ptr, code);
+                               jump = jump->next;
+                       }
+
+                       if (const_ && const_->addr == word_count) {
+                               /* Just recording the address. */
+                               const_->addr = (sljit_uw) code_ptr;
+                               const_ = const_->next;
+                       }
+
+                       code_ptr++;
+                       word_count++;
+               } while (buf_ptr < buf_end);
+
+               buf = buf->next;
+       } while (buf);
+
+       if (label && label->size == word_count) {
+               label->addr = (sljit_uw) code_ptr;
+               label->size = code_ptr - code;
+               label = label->next;
+       }
+
+       SLJIT_ASSERT(!label);
+       SLJIT_ASSERT(!jump);
+       SLJIT_ASSERT(!const_);
+       SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
+
+       jump = compiler->jumps;
+       while (jump) {
+               do {
+                       addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
+                       buf_ptr = (sljit_ins *)jump->addr;
+
+                       if (jump->flags & PATCH_B) {
+                               addr = (sljit_sw)(addr - (jump->addr)) >> 3;
+                               SLJIT_ASSERT((sljit_sw) addr <= SIMM_17BIT_MAX && (sljit_sw) addr >= SIMM_17BIT_MIN);
+                               buf_ptr[0] = (buf_ptr[0] & ~(BOFF_X1(-1))) | BOFF_X1(addr);
+
+#ifdef TILEGX_JIT_DEBUG
+                               printf("[runtime relocate]%04d:\t", __LINE__);
+                               print_insn_tilegx(buf_ptr);
+#endif
+                               break;
+                       }
+
+                       if (jump->flags & PATCH_J) {
+                               SLJIT_ASSERT((addr & ~0x3FFFFFFFL) == ((jump->addr + sizeof(sljit_ins)) & ~0x3FFFFFFFL));
+                               addr = (sljit_sw)(addr - (jump->addr)) >> 3;
+                               buf_ptr[0] = (buf_ptr[0] & ~(JOFF_X1(-1))) | JOFF_X1(addr);
+
+#ifdef TILEGX_JIT_DEBUG
+                               printf("[runtime relocate]%04d:\t", __LINE__);
+                               print_insn_tilegx(buf_ptr);
+#endif
+                               break;
+                       }
+
+                       SLJIT_ASSERT(!(jump->flags & IS_JAL));
+
+                       /* Set the fields of immediate loads. */
+                       buf_ptr[0] = (buf_ptr[0] & ~(0xFFFFL << 43)) | (((addr >> 32) & 0xFFFFL) << 43);
+                       buf_ptr[1] = (buf_ptr[1] & ~(0xFFFFL << 43)) | (((addr >> 16) & 0xFFFFL) << 43);
+                       buf_ptr[2] = (buf_ptr[2] & ~(0xFFFFL << 43)) | ((addr & 0xFFFFL) << 43);
+               } while (0);
+
+               jump = jump->next;
+       }
+
+       compiler->error = SLJIT_ERR_COMPILED;
+       compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
+       SLJIT_CACHE_FLUSH(code, code_ptr);
+       return code;
+}
+
+static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si dst_ar, sljit_sw imm)
+{
+
+       if (imm <= SIMM_16BIT_MAX && imm >= SIMM_16BIT_MIN)
+               return ADDLI(dst_ar, ZERO, imm);
+
+       if (imm <= SIMM_32BIT_MAX && imm >= SIMM_32BIT_MIN) {
+               FAIL_IF(ADDLI(dst_ar, ZERO, imm >> 16));
+               return SHL16INSLI(dst_ar, dst_ar, imm);
+       }
+
+       if (imm <= SIMM_48BIT_MAX && imm >= SIMM_48BIT_MIN) {
+               FAIL_IF(ADDLI(dst_ar, ZERO, imm >> 32));
+               FAIL_IF(SHL16INSLI(dst_ar, dst_ar, imm >> 16));
+               return SHL16INSLI(dst_ar, dst_ar, imm);
+       }
+
+       FAIL_IF(ADDLI(dst_ar, ZERO, imm >> 48));
+       FAIL_IF(SHL16INSLI(dst_ar, dst_ar, imm >> 32));
+       FAIL_IF(SHL16INSLI(dst_ar, dst_ar, imm >> 16));
+       return SHL16INSLI(dst_ar, dst_ar, imm);
+}
+
+static sljit_si emit_const(struct sljit_compiler *compiler, sljit_si dst_ar, sljit_sw imm, int flush)
+{
+       /* Should *not* be optimized as load_immediate, as pcre relocation
+          mechanism will match this fixed 4-instruction pattern. */
+       if (flush) {
+               FAIL_IF(ADDLI_SOLO(dst_ar, ZERO, imm >> 32));
+               FAIL_IF(SHL16INSLI_SOLO(dst_ar, dst_ar, imm >> 16));
+               return SHL16INSLI_SOLO(dst_ar, dst_ar, imm);
+       }
+
+       FAIL_IF(ADDLI(dst_ar, ZERO, imm >> 32));
+       FAIL_IF(SHL16INSLI(dst_ar, dst_ar, imm >> 16));
+       return SHL16INSLI(dst_ar, dst_ar, imm);
+}
+
+static sljit_si emit_const_64(struct sljit_compiler *compiler, sljit_si dst_ar, sljit_sw imm, int flush)
+{
+       /* Should *not* be optimized as load_immediate, as pcre relocation
+          mechanism will match this fixed 4-instruction pattern. */
+       if (flush) {
+               FAIL_IF(ADDLI_SOLO(reg_map[dst_ar], ZERO, imm >> 48));
+               FAIL_IF(SHL16INSLI_SOLO(reg_map[dst_ar], reg_map[dst_ar], imm >> 32));
+               FAIL_IF(SHL16INSLI_SOLO(reg_map[dst_ar], reg_map[dst_ar], imm >> 16));
+               return SHL16INSLI_SOLO(reg_map[dst_ar], reg_map[dst_ar], imm);
+       }
+
+       FAIL_IF(ADDLI(reg_map[dst_ar], ZERO, imm >> 48));
+       FAIL_IF(SHL16INSLI(reg_map[dst_ar], reg_map[dst_ar], imm >> 32));
+       FAIL_IF(SHL16INSLI(reg_map[dst_ar], reg_map[dst_ar], imm >> 16));
+       return SHL16INSLI(reg_map[dst_ar], reg_map[dst_ar], imm);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
+{
+       sljit_ins base;
+       sljit_ins bundle = 0;
+
+       CHECK_ERROR();
+       check_sljit_emit_enter(compiler, args, scratches, saveds, local_size);
+
+       compiler->scratches = scratches;
+       compiler->saveds = saveds;
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       compiler->logical_local_size = local_size;
+#endif
+
+       local_size += (saveds + 1) * sizeof(sljit_sw);
+       local_size = (local_size + 7) & ~7;
+       compiler->local_size = local_size;
+
+       if (local_size <= SIMM_16BIT_MAX) {
+               /* Frequent case. */
+               FAIL_IF(ADDLI(SLJIT_LOCALS_REG_mapped, SLJIT_LOCALS_REG_mapped, -local_size));
+               base = SLJIT_LOCALS_REG_mapped;
+       } else {
+               FAIL_IF(load_immediate(compiler, TMP_REG1_mapped, local_size));
+               FAIL_IF(ADD(TMP_REG2_mapped, SLJIT_LOCALS_REG_mapped, ZERO));
+               FAIL_IF(SUB(SLJIT_LOCALS_REG_mapped, SLJIT_LOCALS_REG_mapped, TMP_REG1_mapped));
+               base = TMP_REG2_mapped;
+               local_size = 0;
+       }
+
+       FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 8));
+       FAIL_IF(ST_ADD(ADDR_TMP_mapped, RA, -8));
+
+       if (saveds >= 1)
+               FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_REG1_mapped, -8));
+
+       if (saveds >= 2)
+               FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_REG2_mapped, -8));
+
+       if (saveds >= 3)
+               FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_REG3_mapped, -8));
+
+       if (saveds >= 4)
+               FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_EREG1_mapped, -8));
+
+       if (saveds >= 5)
+               FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_EREG2_mapped, -8));
+
+       if (args >= 1)
+               FAIL_IF(ADD(SLJIT_SAVED_REG1_mapped, 0, ZERO));
+
+       if (args >= 2)
+               FAIL_IF(ADD(SLJIT_SAVED_REG2_mapped, 1, ZERO));
+
+       if (args >= 3)
+               FAIL_IF(ADD(SLJIT_SAVED_REG3_mapped, 2, ZERO));
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
+{
+       CHECK_ERROR_VOID();
+       check_sljit_set_context(compiler, args, scratches, saveds, local_size);
+
+       compiler->scratches = scratches;
+       compiler->saveds = saveds;
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       compiler->logical_local_size = local_size;
+#endif
+
+       local_size += (saveds + 1) * sizeof(sljit_sw);
+       compiler->local_size = (local_size + 7) & ~7;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
+{
+       sljit_si local_size;
+       sljit_ins base;
+       int addr_initialized = 0;
+
+       CHECK_ERROR();
+       check_sljit_emit_return(compiler, op, src, srcw);
+
+       FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
+
+       local_size = compiler->local_size;
+       if (local_size <= SIMM_16BIT_MAX)
+               base = SLJIT_LOCALS_REG_mapped;
+       else {
+               FAIL_IF(load_immediate(compiler, TMP_REG1_mapped, local_size));
+               FAIL_IF(ADD(TMP_REG1_mapped, SLJIT_LOCALS_REG_mapped, TMP_REG1_mapped));
+               base = TMP_REG1_mapped;
+               local_size = 0;
+       }
+
+       FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 8));
+       FAIL_IF(LD(RA, ADDR_TMP_mapped));
+
+       if (compiler->saveds >= 5) {
+               FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 48));
+               addr_initialized = 1;
+
+               FAIL_IF(LD_ADD(SLJIT_SAVED_EREG2_mapped, ADDR_TMP_mapped, 8));
+       }
+
+       if (compiler->saveds >= 4) {
+               if (addr_initialized == 0) {
+                       FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 40));
+                       addr_initialized = 1;
+               }
+
+               FAIL_IF(LD_ADD(SLJIT_SAVED_EREG1_mapped, ADDR_TMP_mapped, 8));
+       }
+
+       if (compiler->saveds >= 3) {
+               if (addr_initialized == 0) {
+                       FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 32));
+                       addr_initialized = 1;
+               }
+
+               FAIL_IF(LD_ADD(SLJIT_SAVED_REG3_mapped, ADDR_TMP_mapped, 8));
+       }
+
+       if (compiler->saveds >= 2) {
+               if (addr_initialized == 0) {
+                       FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 24));
+                       addr_initialized = 1;
+               }
+
+               FAIL_IF(LD_ADD(SLJIT_SAVED_REG2_mapped, ADDR_TMP_mapped, 8));
+       }
+
+       if (compiler->saveds >= 1) {
+               if (addr_initialized == 0) {
+                       FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 16));
+                       /* addr_initialized = 1; no need to initialize as it's the last one. */
+               }
+
+               FAIL_IF(LD_ADD(SLJIT_SAVED_REG1_mapped, ADDR_TMP_mapped, 8));
+       }
+
+       if (compiler->local_size <= SIMM_16BIT_MAX)
+               FAIL_IF(ADDLI(SLJIT_LOCALS_REG_mapped, SLJIT_LOCALS_REG_mapped, compiler->local_size));
+       else
+               FAIL_IF(ADD(SLJIT_LOCALS_REG_mapped, TMP_REG1_mapped, ZERO));
+
+       return JR(RA);
+}
+
+/* reg_ar is an absoulute register! */
+
+/* Can perform an operation using at most 1 instruction. */
+static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg_ar, sljit_si arg, sljit_sw argw)
+{
+       SLJIT_ASSERT(arg & SLJIT_MEM);
+
+       if ((!(flags & WRITE_BACK) || !(arg & REG_MASK))
+                       && !(arg & OFFS_REG_MASK) && argw <= SIMM_16BIT_MAX && argw >= SIMM_16BIT_MIN) {
+               /* Works for both absoulte and relative addresses. */
+               if (SLJIT_UNLIKELY(flags & ARG_TEST))
+                       return 1;
+
+               FAIL_IF(ADDLI(ADDR_TMP_mapped, reg_map[arg & REG_MASK], argw));
+
+               if (flags & LOAD_DATA)
+                       FAIL_IF(PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, ADDR_TMP_mapped));
+               else
+                       FAIL_IF(PB2(data_transfer_insts[flags & MEM_MASK], ADDR_TMP_mapped, reg_ar));
+
+               return -1;
+       }
+
+       return 0;
+}
+
+/* See getput_arg below.
+   Note: can_cache is called only for binary operators. Those
+   operators always uses word arguments without write back. */
+static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw)
+{
+       SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM));
+
+       /* Simple operation except for updates. */
+       if (arg & OFFS_REG_MASK) {
+               argw &= 0x3;
+               next_argw &= 0x3;
+               if (argw && argw == next_argw
+                               && (arg == next_arg || (arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK)))
+                       return 1;
+               return 0;
+       }
+
+       if (arg == next_arg) {
+               if (((next_argw - argw) <= SIMM_16BIT_MAX
+                               && (next_argw - argw) >= SIMM_16BIT_MIN))
+                       return 1;
+
+               return 0;
+       }
+
+       return 0;
+}
+
+/* Emit the necessary instructions. See can_cache above. */
+static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg_ar, sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw)
+{
+       sljit_si tmp_ar, base;
+
+       SLJIT_ASSERT(arg & SLJIT_MEM);
+       if (!(next_arg & SLJIT_MEM)) {
+               next_arg = 0;
+               next_argw = 0;
+       }
+
+       if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA))
+               tmp_ar = reg_ar;
+       else
+               tmp_ar = TMP_REG1_mapped;
+
+       base = arg & REG_MASK;
+
+       if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
+               argw &= 0x3;
+
+               if ((flags & WRITE_BACK) && reg_ar == reg_map[base]) {
+                       SLJIT_ASSERT(!(flags & LOAD_DATA) && reg_map[TMP_REG1] != reg_ar);
+                       FAIL_IF(ADD(TMP_REG1_mapped, reg_ar, ZERO));
+                       reg_ar = TMP_REG1_mapped;
+               }
+
+               /* Using the cache. */
+               if (argw == compiler->cache_argw) {
+                       if (!(flags & WRITE_BACK)) {
+                               if (arg == compiler->cache_arg) {
+                                       if (flags & LOAD_DATA)
+                                               return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped);
+                                       else
+                                               return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar);
+                               }
+
+                               if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) {
+                                       if (arg == next_arg && argw == (next_argw & 0x3)) {
+                                               compiler->cache_arg = arg;
+                                               compiler->cache_argw = argw;
+                                               FAIL_IF(ADD(TMP_REG3_mapped, reg_map[base], TMP_REG3_mapped));
+                                               if (flags & LOAD_DATA)
+                                                       return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped);
+                                               else
+                                                       return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar);
+                                       }
+
+                                       FAIL_IF(ADD(tmp_ar, reg_map[base], TMP_REG3_mapped));
+                                       if (flags & LOAD_DATA)
+                                               return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, tmp_ar);
+                                       else
+                                               return PB2(data_transfer_insts[flags & MEM_MASK], tmp_ar, reg_ar);
+                               }
+                       } else {
+                               if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) {
+                                       FAIL_IF(ADD(reg_map[base], reg_map[base], TMP_REG3_mapped));
+                                       if (flags & LOAD_DATA)
+                                               return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, reg_map[base]);
+                                       else
+                                               return PB2(data_transfer_insts[flags & MEM_MASK], reg_map[base], reg_ar);
+                               }
+                       }
+               }
+
+               if (SLJIT_UNLIKELY(argw)) {
+                       compiler->cache_arg = SLJIT_MEM | (arg & OFFS_REG_MASK);
+                       compiler->cache_argw = argw;
+                       FAIL_IF(SHLI(TMP_REG3_mapped, reg_map[OFFS_REG(arg)], argw));
+               }
+
+               if (!(flags & WRITE_BACK)) {
+                       if (arg == next_arg && argw == (next_argw & 0x3)) {
+                               compiler->cache_arg = arg;
+                               compiler->cache_argw = argw;
+                               FAIL_IF(ADD(TMP_REG3_mapped, reg_map[base], reg_map[!argw ? OFFS_REG(arg) : TMP_REG3]));
+                               tmp_ar = TMP_REG3_mapped;
+                       } else
+                               FAIL_IF(ADD(tmp_ar, reg_map[base], reg_map[!argw ? OFFS_REG(arg) : TMP_REG3]));
+
+                       if (flags & LOAD_DATA)
+                               return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, tmp_ar);
+                       else
+                               return PB2(data_transfer_insts[flags & MEM_MASK], tmp_ar, reg_ar);
+               }
+
+               FAIL_IF(ADD(reg_map[base], reg_map[base], reg_map[!argw ? OFFS_REG(arg) : TMP_REG3]));
+
+               if (flags & LOAD_DATA)
+                       return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, reg_map[base]);
+               else
+                       return PB2(data_transfer_insts[flags & MEM_MASK], reg_map[base], reg_ar);
+       }
+
+       if (SLJIT_UNLIKELY(flags & WRITE_BACK) && base) {
+               /* Update only applies if a base register exists. */
+               if (reg_ar == reg_map[base]) {
+                       SLJIT_ASSERT(!(flags & LOAD_DATA) && TMP_REG1_mapped != reg_ar);
+                       if (argw <= SIMM_16BIT_MAX && argw >= SIMM_16BIT_MIN) {
+                               FAIL_IF(ADDLI(ADDR_TMP_mapped, reg_map[base], argw));
+                               if (flags & LOAD_DATA)
+                                       FAIL_IF(PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, ADDR_TMP_mapped));
+                               else
+                                       FAIL_IF(PB2(data_transfer_insts[flags & MEM_MASK], ADDR_TMP_mapped, reg_ar));
+
+                               if (argw)
+                                       return ADDLI(reg_map[base], reg_map[base], argw);
+
+                               return SLJIT_SUCCESS;
+                       }
+
+                       FAIL_IF(ADD(TMP_REG1_mapped, reg_ar, ZERO));
+                       reg_ar = TMP_REG1_mapped;
+               }
+
+               if (argw <= SIMM_16BIT_MAX && argw >= SIMM_16BIT_MIN) {
+                       if (argw)
+                               FAIL_IF(ADDLI(reg_map[base], reg_map[base], argw));
+               } else {
+                       if (compiler->cache_arg == SLJIT_MEM
+                                       && argw - compiler->cache_argw <= SIMM_16BIT_MAX
+                                       && argw - compiler->cache_argw >= SIMM_16BIT_MIN) {
+                               if (argw != compiler->cache_argw) {
+                                       FAIL_IF(ADD(TMP_REG3_mapped, TMP_REG3_mapped, argw - compiler->cache_argw));
+                                       compiler->cache_argw = argw;
+                               }
+
+                               FAIL_IF(ADD(reg_map[base], reg_map[base], TMP_REG3_mapped));
+                       } else {
+                               compiler->cache_arg = SLJIT_MEM;
+                               compiler->cache_argw = argw;
+                               FAIL_IF(load_immediate(compiler, TMP_REG3_mapped, argw));
+                               FAIL_IF(ADD(reg_map[base], reg_map[base], TMP_REG3_mapped));
+                       }
+               }
+
+               if (flags & LOAD_DATA)
+                       return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, reg_map[base]);
+               else
+                       return PB2(data_transfer_insts[flags & MEM_MASK], reg_map[base], reg_ar);
+       }
+
+       if (compiler->cache_arg == arg
+                       && argw - compiler->cache_argw <= SIMM_16BIT_MAX
+                       && argw - compiler->cache_argw >= SIMM_16BIT_MIN) {
+               if (argw != compiler->cache_argw) {
+                       FAIL_IF(ADDLI(TMP_REG3_mapped, TMP_REG3_mapped, argw - compiler->cache_argw));
+                       compiler->cache_argw = argw;
+               }
+
+               if (flags & LOAD_DATA)
+                       return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped);
+               else
+                       return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar);
+       }
+
+       if (compiler->cache_arg == SLJIT_MEM
+                       && argw - compiler->cache_argw <= SIMM_16BIT_MAX
+                       && argw - compiler->cache_argw >= SIMM_16BIT_MIN) {
+               if (argw != compiler->cache_argw)
+                       FAIL_IF(ADDLI(TMP_REG3_mapped, TMP_REG3_mapped, argw - compiler->cache_argw));
+       } else {
+               compiler->cache_arg = SLJIT_MEM;
+               FAIL_IF(load_immediate(compiler, TMP_REG3_mapped, argw));
+       }
+
+       compiler->cache_argw = argw;
+
+       if (!base) {
+               if (flags & LOAD_DATA)
+                       return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped);
+               else
+                       return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar);
+       }
+
+       if (arg == next_arg
+                       && next_argw - argw <= SIMM_16BIT_MAX
+                       && next_argw - argw >= SIMM_16BIT_MIN) {
+               compiler->cache_arg = arg;
+               FAIL_IF(ADD(TMP_REG3_mapped, TMP_REG3_mapped, reg_map[base]));
+               if (flags & LOAD_DATA)
+                       return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped);
+               else
+                       return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar);
+       }
+
+       FAIL_IF(ADD(tmp_ar, TMP_REG3_mapped, reg_map[base]));
+
+       if (flags & LOAD_DATA)
+               return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, tmp_ar);
+       else
+               return PB2(data_transfer_insts[flags & MEM_MASK], tmp_ar, reg_ar);
+}
+
+static SLJIT_INLINE sljit_si emit_op_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg_ar, sljit_si arg, sljit_sw argw)
+{
+       if (getput_arg_fast(compiler, flags, reg_ar, arg, argw))
+               return compiler->error;
+
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+       return getput_arg(compiler, flags, reg_ar, arg, argw, 0, 0);
+}
+
+static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg1, sljit_sw arg1w, sljit_si arg2, sljit_sw arg2w)
+{
+       if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
+               return compiler->error;
+       return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw)
+{
+       CHECK_ERROR();
+       check_sljit_emit_fast_enter(compiler, dst, dstw);
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+
+       /* For UNUSED dst. Uncommon, but possible. */
+       if (dst == SLJIT_UNUSED)
+               return SLJIT_SUCCESS;
+
+       if (FAST_IS_REG(dst))
+               return ADD(reg_map[dst], RA, ZERO);
+
+       /* Memory. */
+       return emit_op_mem(compiler, WORD_DATA, RA, dst, dstw);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw)
+{
+       CHECK_ERROR();
+       check_sljit_emit_fast_return(compiler, src, srcw);
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       if (FAST_IS_REG(src))
+               FAIL_IF(ADD(RA, reg_map[src], ZERO));
+
+       else if (src & SLJIT_MEM)
+               FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RA, src, srcw));
+
+       else if (src & SLJIT_IMM)
+               FAIL_IF(load_immediate(compiler, RA, srcw));
+
+       return JR(RA);
+}
+
+static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags, sljit_si dst, sljit_si src1, sljit_sw src2)
+{
+       sljit_si overflow_ra = 0;
+
+       switch (GET_OPCODE(op)) {
+       case SLJIT_MOV:
+       case SLJIT_MOV_P:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+               if (dst != src2)
+                       return ADD(reg_map[dst], reg_map[src2], ZERO);
+               return SLJIT_SUCCESS;
+
+       case SLJIT_MOV_UI:
+       case SLJIT_MOV_SI:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+               if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
+                       if (op == SLJIT_MOV_SI)
+                               return BFEXTS(reg_map[dst], reg_map[src2], 0, 31);
+
+               return BFEXTU(reg_map[dst], reg_map[src2], 0, 31);
+               } else if (dst != src2)
+                       SLJIT_ASSERT_STOP();
+
+               return SLJIT_SUCCESS;
+
+       case SLJIT_MOV_UB:
+       case SLJIT_MOV_SB:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+               if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
+                       if (op == SLJIT_MOV_SB)
+                               return BFEXTS(reg_map[dst], reg_map[src2], 0, 7);
+
+                       return BFEXTU(reg_map[dst], reg_map[src2], 0, 7);
+               } else if (dst != src2)
+                       SLJIT_ASSERT_STOP();
+
+               return SLJIT_SUCCESS;
+
+       case SLJIT_MOV_UH:
+       case SLJIT_MOV_SH:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+               if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
+                       if (op == SLJIT_MOV_SH)
+                               return BFEXTS(reg_map[dst], reg_map[src2], 0, 15);
+
+                       return BFEXTU(reg_map[dst], reg_map[src2], 0, 15);
+               } else if (dst != src2)
+                       SLJIT_ASSERT_STOP();
+
+               return SLJIT_SUCCESS;
+
+       case SLJIT_NOT:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+               if (op & SLJIT_SET_E)
+                       FAIL_IF(NOR(EQUAL_FLAG, reg_map[src2], reg_map[src2]));
+               if (CHECK_FLAGS(SLJIT_SET_E))
+                       FAIL_IF(NOR(reg_map[dst], reg_map[src2], reg_map[src2]));
+
+               return SLJIT_SUCCESS;
+
+       case SLJIT_CLZ:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+               if (op & SLJIT_SET_E)
+                       FAIL_IF(CLZ(EQUAL_FLAG, reg_map[src2]));
+               if (CHECK_FLAGS(SLJIT_SET_E))
+                       FAIL_IF(CLZ(reg_map[dst], reg_map[src2]));
+
+               return SLJIT_SUCCESS;
+
+       case SLJIT_ADD:
+               if (flags & SRC2_IMM) {
+                       if (op & SLJIT_SET_O) {
+                               FAIL_IF(SHRUI(TMP_EREG1, reg_map[src1], 63));
+                               if (src2 < 0)
+                                       FAIL_IF(XORI(TMP_EREG1, TMP_EREG1, 1));
+                       }
+
+                       if (op & SLJIT_SET_E)
+                               FAIL_IF(ADDLI(EQUAL_FLAG, reg_map[src1], src2));
+
+                       if (op & SLJIT_SET_C) {
+                               if (src2 >= 0)
+                                       FAIL_IF(ORI(ULESS_FLAG ,reg_map[src1], src2));
+                               else {
+                                       FAIL_IF(ADDLI(ULESS_FLAG ,ZERO, src2));
+                                       FAIL_IF(OR(ULESS_FLAG,reg_map[src1],ULESS_FLAG));
+                               }
+                       }
+
+                       /* dst may be the same as src1 or src2. */
+                       if (CHECK_FLAGS(SLJIT_SET_E))
+                               FAIL_IF(ADDLI(reg_map[dst], reg_map[src1], src2));
+
+                       if (op & SLJIT_SET_O) {
+                               FAIL_IF(SHRUI(OVERFLOW_FLAG, reg_map[dst], 63));
+
+                               if (src2 < 0)
+                                       FAIL_IF(XORI(OVERFLOW_FLAG, OVERFLOW_FLAG, 1));
+                       }
+               } else {
+                       if (op & SLJIT_SET_O) {
+                               FAIL_IF(XOR(TMP_EREG1, reg_map[src1], reg_map[src2]));
+                               FAIL_IF(SHRUI(TMP_EREG1, TMP_EREG1, 63));
+
+                               if (src1 != dst)
+                                       overflow_ra = reg_map[src1];
+                               else if (src2 != dst)
+                                       overflow_ra = reg_map[src2];
+                               else {
+                                       /* Rare ocasion. */
+                                       FAIL_IF(ADD(TMP_EREG2, reg_map[src1], ZERO));
+                                       overflow_ra = TMP_EREG2;
+                               }
+                       }
+
+                       if (op & SLJIT_SET_E)
+                               FAIL_IF(ADD(EQUAL_FLAG ,reg_map[src1], reg_map[src2]));
+
+                       if (op & SLJIT_SET_C)
+                               FAIL_IF(OR(ULESS_FLAG,reg_map[src1], reg_map[src2]));
+
+                       /* dst may be the same as src1 or src2. */
+                       if (CHECK_FLAGS(SLJIT_SET_E))
+                               FAIL_IF(ADD(reg_map[dst],reg_map[src1], reg_map[src2]));
+
+                       if (op & SLJIT_SET_O) {
+                               FAIL_IF(XOR(OVERFLOW_FLAG,reg_map[dst], overflow_ra));
+                               FAIL_IF(SHRUI(OVERFLOW_FLAG, OVERFLOW_FLAG, 63));
+                       }
+               }
+
+               /* a + b >= a | b (otherwise, the carry should be set to 1). */
+               if (op & SLJIT_SET_C)
+                       FAIL_IF(CMPLTU(ULESS_FLAG ,reg_map[dst] ,ULESS_FLAG));
+
+               if (op & SLJIT_SET_O)
+                       return CMOVNEZ(OVERFLOW_FLAG, TMP_EREG1, ZERO);
+
+               return SLJIT_SUCCESS;
+
+       case SLJIT_ADDC:
+               if (flags & SRC2_IMM) {
+                       if (op & SLJIT_SET_C) {
+                               if (src2 >= 0)
+                                       FAIL_IF(ORI(TMP_EREG1, reg_map[src1], src2));
+                               else {
+                                       FAIL_IF(ADDLI(TMP_EREG1, ZERO, src2));
+                                       FAIL_IF(OR(TMP_EREG1, reg_map[src1], TMP_EREG1));
+                               }
+                       }
+
+                       FAIL_IF(ADDLI(reg_map[dst], reg_map[src1], src2));
+
+               } else {
+                       if (op & SLJIT_SET_C)
+                               FAIL_IF(OR(TMP_EREG1, reg_map[src1], reg_map[src2]));
+
+                       /* dst may be the same as src1 or src2. */
+                       FAIL_IF(ADD(reg_map[dst], reg_map[src1], reg_map[src2]));
+               }
+
+               if (op & SLJIT_SET_C)
+                       FAIL_IF(CMPLTU(TMP_EREG1, reg_map[dst], TMP_EREG1));
+
+               FAIL_IF(ADD(reg_map[dst], reg_map[dst], ULESS_FLAG));
+
+               if (!(op & SLJIT_SET_C))
+                       return SLJIT_SUCCESS;
+
+               /* Set TMP_EREG2 (dst == 0) && (ULESS_FLAG == 1). */
+               FAIL_IF(CMPLTUI(TMP_EREG2, reg_map[dst], 1));
+               FAIL_IF(AND(TMP_EREG2, TMP_EREG2, ULESS_FLAG));
+               /* Set carry flag. */
+               return OR(ULESS_FLAG, TMP_EREG2, TMP_EREG1);
+
+       case SLJIT_SUB:
+               if ((flags & SRC2_IMM) && ((op & (SLJIT_SET_U | SLJIT_SET_S)) || src2 == SIMM_16BIT_MIN)) {
+                       FAIL_IF(ADDLI(TMP_REG2_mapped, ZERO, src2));
+                       src2 = TMP_REG2;
+                       flags &= ~SRC2_IMM;
+               }
+
+               if (flags & SRC2_IMM) {
+                       if (op & SLJIT_SET_O) {
+                               FAIL_IF(SHRUI(TMP_EREG1,reg_map[src1], 63));
+
+                               if (src2 < 0)
+                                       FAIL_IF(XORI(TMP_EREG1, TMP_EREG1, 1));
+
+                               if (src1 != dst)
+                                       overflow_ra = reg_map[src1];
+                               else {
+                                       /* Rare ocasion. */
+                                       FAIL_IF(ADD(TMP_EREG2, reg_map[src1], ZERO));
+       
+                                       overflow_ra = TMP_EREG2;
+                               }
+                       }
+
+                       if (op & SLJIT_SET_E)
+                               FAIL_IF(ADDLI(EQUAL_FLAG, reg_map[src1], -src2));
+
+                       if (op & SLJIT_SET_C) {
+                               FAIL_IF(load_immediate(compiler, ADDR_TMP_mapped, src2));
+                               FAIL_IF(CMPLTU(ULESS_FLAG, reg_map[src1], ADDR_TMP_mapped));
+                       }
+
+                       /* dst may be the same as src1 or src2. */
+                       if (CHECK_FLAGS(SLJIT_SET_E))
+                               FAIL_IF(ADDLI(reg_map[dst], reg_map[src1], -src2));
+
+               } else {
+
+                       if (op & SLJIT_SET_O) {
+                               FAIL_IF(XOR(TMP_EREG1, reg_map[src1], reg_map[src2]));
+                               FAIL_IF(SHRUI(TMP_EREG1, TMP_EREG1, 63));
+
+                               if (src1 != dst)
+                                       overflow_ra = reg_map[src1];
+                               else {
+                                       /* Rare ocasion. */
+                                       FAIL_IF(ADD(TMP_EREG2, reg_map[src1], ZERO));
+                                       overflow_ra = TMP_EREG2;
+                               }
+                       }
+
+                       if (op & SLJIT_SET_E)
+                               FAIL_IF(SUB(EQUAL_FLAG, reg_map[src1], reg_map[src2]));
+
+                       if (op & (SLJIT_SET_U | SLJIT_SET_C))
+                               FAIL_IF(CMPLTU(ULESS_FLAG, reg_map[src1], reg_map[src2]));
+
+                       if (op & SLJIT_SET_U)
+                               FAIL_IF(CMPLTU(UGREATER_FLAG, reg_map[src2], reg_map[src1]));
+
+                       if (op & SLJIT_SET_S) {
+                               FAIL_IF(CMPLTS(LESS_FLAG ,reg_map[src1] ,reg_map[src2]));
+                               FAIL_IF(CMPLTS(GREATER_FLAG ,reg_map[src2] ,reg_map[src1]));
+                       }
+
+                       /* dst may be the same as src1 or src2. */
+                       if (CHECK_FLAGS(SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_C))
+                               FAIL_IF(SUB(reg_map[dst], reg_map[src1], reg_map[src2]));
+               }
+
+               if (op & SLJIT_SET_O) {
+                       FAIL_IF(XOR(OVERFLOW_FLAG, reg_map[dst], overflow_ra));
+                       FAIL_IF(SHRUI(OVERFLOW_FLAG, OVERFLOW_FLAG, 63));
+                       return CMOVEQZ(OVERFLOW_FLAG, TMP_EREG1, ZERO);
+               }
+
+               return SLJIT_SUCCESS;
+
+       case SLJIT_SUBC:
+               if ((flags & SRC2_IMM) && src2 == SIMM_16BIT_MIN) {
+                       FAIL_IF(ADDLI(TMP_REG2_mapped, ZERO, src2));
+                       src2 = TMP_REG2;
+                       flags &= ~SRC2_IMM;
+               }
+
+               if (flags & SRC2_IMM) {
+                       if (op & SLJIT_SET_C) {
+                               FAIL_IF(load_immediate(compiler, ADDR_TMP_mapped, -src2));
+                               FAIL_IF(CMPLTU(TMP_EREG1, reg_map[src1], ADDR_TMP_mapped));
+                       }
+
+                       /* dst may be the same as src1 or src2. */
+                       FAIL_IF(ADDLI(reg_map[dst], reg_map[src1], -src2));
+
+               } else {
+                       if (op & SLJIT_SET_C)
+                               FAIL_IF(CMPLTU(TMP_EREG1, reg_map[src1], reg_map[src2]));
+                               /* dst may be the same as src1 or src2. */
+                       FAIL_IF(SUB(reg_map[dst], reg_map[src1], reg_map[src2]));
+               }
+
+               if (op & SLJIT_SET_C)
+                       FAIL_IF(CMOVEQZ(TMP_EREG1, reg_map[dst], ULESS_FLAG));
+
+               FAIL_IF(SUB(reg_map[dst], reg_map[dst], ULESS_FLAG));
+
+               if (op & SLJIT_SET_C)
+                       FAIL_IF(ADD(ULESS_FLAG, TMP_EREG1, ZERO));
+
+               return SLJIT_SUCCESS;
+
+#define EMIT_LOGICAL(op_imm, op_norm) \
+       if (flags & SRC2_IMM) { \
+               FAIL_IF(load_immediate(compiler, ADDR_TMP_mapped, src2)); \
+               if (op & SLJIT_SET_E) \
+                       FAIL_IF(push_3_buffer( \
+                               compiler, op_norm, EQUAL_FLAG, reg_map[src1], \
+                               ADDR_TMP_mapped, __LINE__)); \
+               if (CHECK_FLAGS(SLJIT_SET_E)) \
+                       FAIL_IF(push_3_buffer( \
+                               compiler, op_norm, reg_map[dst], reg_map[src1], \
+                               ADDR_TMP_mapped, __LINE__)); \
+       } else { \
+               if (op & SLJIT_SET_E) \
+                       FAIL_IF(push_3_buffer( \
+                               compiler, op_norm, EQUAL_FLAG, reg_map[src1], \
+                               reg_map[src2], __LINE__)); \
+               if (CHECK_FLAGS(SLJIT_SET_E)) \
+                       FAIL_IF(push_3_buffer( \
+                               compiler, op_norm, reg_map[dst], reg_map[src1], \
+                               reg_map[src2], __LINE__)); \
+       }
+
+       case SLJIT_AND:
+               EMIT_LOGICAL(TILEGX_OPC_ANDI, TILEGX_OPC_AND);
+               return SLJIT_SUCCESS;
+
+       case SLJIT_OR:
+               EMIT_LOGICAL(TILEGX_OPC_ORI, TILEGX_OPC_OR);
+               return SLJIT_SUCCESS;
+
+       case SLJIT_XOR:
+               EMIT_LOGICAL(TILEGX_OPC_XORI, TILEGX_OPC_XOR);
+               return SLJIT_SUCCESS;
+
+#define EMIT_SHIFT(op_imm, op_norm) \
+       if (flags & SRC2_IMM) { \
+               if (op & SLJIT_SET_E) \
+                       FAIL_IF(push_3_buffer( \
+                               compiler, op_imm, EQUAL_FLAG, reg_map[src1], \
+                               src2 & 0x3F, __LINE__)); \
+               if (CHECK_FLAGS(SLJIT_SET_E)) \
+                       FAIL_IF(push_3_buffer( \
+                               compiler, op_imm, reg_map[dst], reg_map[src1], \
+                               src2 & 0x3F, __LINE__)); \
+       } else { \
+               if (op & SLJIT_SET_E) \
+                       FAIL_IF(push_3_buffer( \
+                               compiler, op_imm, reg_map[dst], reg_map[src1], \
+                               src2 & 0x3F, __LINE__)); \
+               if (CHECK_FLAGS(SLJIT_SET_E)) \
+                       FAIL_IF(push_3_buffer( \
+                               compiler, op_norm, reg_map[dst], reg_map[src1], \
+                               reg_map[src2], __LINE__)); \
+       }
+
+       case SLJIT_SHL:
+               EMIT_SHIFT(TILEGX_OPC_SHLI, TILEGX_OPC_SHL);
+               return SLJIT_SUCCESS;
+
+       case SLJIT_LSHR:
+               EMIT_SHIFT(TILEGX_OPC_SHRUI, TILEGX_OPC_SHRU);
+               return SLJIT_SUCCESS;
+
+       case SLJIT_ASHR:
+               EMIT_SHIFT(TILEGX_OPC_SHRSI, TILEGX_OPC_SHRS);
+               return SLJIT_SUCCESS;
+       }
+
+       SLJIT_ASSERT_STOP();
+       return SLJIT_SUCCESS;
+}
+
+static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags, sljit_si dst, sljit_sw dstw, sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w)
+{
+       /* arg1 goes to TMP_REG1 or src reg.
+          arg2 goes to TMP_REG2, imm or src reg.
+          TMP_REG3 can be used for caching.
+          result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */
+       sljit_si dst_r = TMP_REG2;
+       sljit_si src1_r;
+       sljit_sw src2_r = 0;
+       sljit_si sugg_src2_r = TMP_REG2;
+
+       if (!(flags & ALT_KEEP_CACHE)) {
+               compiler->cache_arg = 0;
+               compiler->cache_argw = 0;
+       }
+
+       if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
+               if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI && !(src2 & SLJIT_MEM))
+                       return SLJIT_SUCCESS;
+               if (GET_FLAGS(op))
+                       flags |= UNUSED_DEST;
+       } else if (FAST_IS_REG(dst)) {
+               dst_r = dst;
+               flags |= REG_DEST;
+               if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI)
+                       sugg_src2_r = dst_r;
+       } else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1_mapped, dst, dstw))
+               flags |= SLOW_DEST;
+
+       if (flags & IMM_OP) {
+               if ((src2 & SLJIT_IMM) && src2w) {
+                       if ((!(flags & LOGICAL_OP)
+                                       && (src2w <= SIMM_16BIT_MAX && src2w >= SIMM_16BIT_MIN))
+                                       || ((flags & LOGICAL_OP) && !(src2w & ~UIMM_16BIT_MAX))) {
+                               flags |= SRC2_IMM;
+                               src2_r = src2w;
+                       }
+               }
+
+               if (!(flags & SRC2_IMM) && (flags & CUMULATIVE_OP) && (src1 & SLJIT_IMM) && src1w) {
+                       if ((!(flags & LOGICAL_OP)
+                                       && (src1w <= SIMM_16BIT_MAX && src1w >= SIMM_16BIT_MIN))
+                                       || ((flags & LOGICAL_OP) && !(src1w & ~UIMM_16BIT_MAX))) {
+                               flags |= SRC2_IMM;
+                               src2_r = src1w;
+
+                               /* And swap arguments. */
+                               src1 = src2;
+                               src1w = src2w;
+                               src2 = SLJIT_IMM;
+                               /* src2w = src2_r unneeded. */
+                       }
+               }
+       }
+
+       /* Source 1. */
+       if (FAST_IS_REG(src1)) {
+               src1_r = src1;
+               flags |= REG1_SOURCE;
+       } else if (src1 & SLJIT_IMM) {
+               if (src1w) {
+                       FAIL_IF(load_immediate(compiler, TMP_REG1_mapped, src1w));
+                       src1_r = TMP_REG1;
+               } else
+                       src1_r = 0;
+       } else {
+               if (getput_arg_fast(compiler, flags | LOAD_DATA, TMP_REG1_mapped, src1, src1w))
+                       FAIL_IF(compiler->error);
+               else
+                       flags |= SLOW_SRC1;
+               src1_r = TMP_REG1;
+       }
+
+       /* Source 2. */
+       if (FAST_IS_REG(src2)) {
+               src2_r = src2;
+               flags |= REG2_SOURCE;
+               if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_SI)
+                       dst_r = src2_r;
+       } else if (src2 & SLJIT_IMM) {
+               if (!(flags & SRC2_IMM)) {
+                       if (src2w) {
+                               FAIL_IF(load_immediate(compiler, reg_map[sugg_src2_r], src2w));
+                               src2_r = sugg_src2_r;
+                       } else {
+                               src2_r = 0;
+                               if ((op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) && (dst & SLJIT_MEM))
+                                       dst_r = 0;
+                       }
+               }
+       } else {
+               if (getput_arg_fast(compiler, flags | LOAD_DATA, reg_map[sugg_src2_r], src2, src2w))
+                       FAIL_IF(compiler->error);
+               else
+                       flags |= SLOW_SRC2;
+               src2_r = sugg_src2_r;
+       }
+
+       if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
+               SLJIT_ASSERT(src2_r == TMP_REG2);
+               if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
+                       FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2_mapped, src2, src2w, src1, src1w));
+                       FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1_mapped, src1, src1w, dst, dstw));
+               } else {
+                       FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1_mapped, src1, src1w, src2, src2w));
+                       FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2_mapped, src2, src2w, dst, dstw));
+               }
+       } else if (flags & SLOW_SRC1)
+               FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1_mapped, src1, src1w, dst, dstw));
+       else if (flags & SLOW_SRC2)
+               FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, reg_map[sugg_src2_r], src2, src2w, dst, dstw));
+
+       FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));
+
+       if (dst & SLJIT_MEM) {
+               if (!(flags & SLOW_DEST)) {
+                       getput_arg_fast(compiler, flags, reg_map[dst_r], dst, dstw);
+                       return compiler->error;
+               }
+
+               return getput_arg(compiler, flags, reg_map[dst_r], dst, dstw, 0, 0);
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw, sljit_si type)
+{
+       sljit_si sugg_dst_ar, dst_ar;
+       sljit_si flags = GET_ALL_FLAGS(op);
+
+       CHECK_ERROR();
+       check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type);
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+
+       if (dst == SLJIT_UNUSED)
+               return SLJIT_SUCCESS;
+
+       op = GET_OPCODE(op);
+       sugg_dst_ar = reg_map[(op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2];
+
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+       if (op >= SLJIT_ADD && (src & SLJIT_MEM)) {
+               ADJUST_LOCAL_OFFSET(src, srcw);
+               FAIL_IF(emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, TMP_REG1_mapped, src, srcw, dst, dstw));
+               src = TMP_REG1;
+               srcw = 0;
+       }
+
+       switch (type) {
+       case SLJIT_C_EQUAL:
+       case SLJIT_C_NOT_EQUAL:
+               FAIL_IF(CMPLTUI(sugg_dst_ar, EQUAL_FLAG, 1));
+               dst_ar = sugg_dst_ar;
+               break;
+       case SLJIT_C_LESS:
+       case SLJIT_C_GREATER_EQUAL:
+       case SLJIT_C_FLOAT_LESS:
+       case SLJIT_C_FLOAT_GREATER_EQUAL:
+               dst_ar = ULESS_FLAG;
+               break;
+       case SLJIT_C_GREATER:
+       case SLJIT_C_LESS_EQUAL:
+       case SLJIT_C_FLOAT_GREATER:
+       case SLJIT_C_FLOAT_LESS_EQUAL:
+               dst_ar = UGREATER_FLAG;
+               break;
+       case SLJIT_C_SIG_LESS:
+       case SLJIT_C_SIG_GREATER_EQUAL:
+               dst_ar = LESS_FLAG;
+               break;
+       case SLJIT_C_SIG_GREATER:
+       case SLJIT_C_SIG_LESS_EQUAL:
+               dst_ar = GREATER_FLAG;
+               break;
+       case SLJIT_C_OVERFLOW:
+       case SLJIT_C_NOT_OVERFLOW:
+               dst_ar = OVERFLOW_FLAG;
+               break;
+       case SLJIT_C_MUL_OVERFLOW:
+       case SLJIT_C_MUL_NOT_OVERFLOW:
+               FAIL_IF(CMPLTUI(sugg_dst_ar, OVERFLOW_FLAG, 1));
+               dst_ar = sugg_dst_ar;
+               type ^= 0x1; /* Flip type bit for the XORI below. */
+               break;
+       case SLJIT_C_FLOAT_EQUAL:
+       case SLJIT_C_FLOAT_NOT_EQUAL:
+               dst_ar = EQUAL_FLAG;
+               break;
+
+       default:
+               SLJIT_ASSERT_STOP();
+               dst_ar = sugg_dst_ar;
+               break;
+       }
+
+       if (type & 0x1) {
+               FAIL_IF(XORI(sugg_dst_ar, dst_ar, 1));
+               dst_ar = sugg_dst_ar;
+       }
+
+       if (op >= SLJIT_ADD) {
+               if (TMP_REG2_mapped != dst_ar)
+                       FAIL_IF(ADD(TMP_REG2_mapped, dst_ar, ZERO));
+               return emit_op(compiler, op | flags, CUMULATIVE_OP | LOGICAL_OP | IMM_OP | ALT_KEEP_CACHE, dst, dstw, src, srcw, TMP_REG2, 0);
+       }
+
+       if (dst & SLJIT_MEM)
+               return emit_op_mem(compiler, WORD_DATA, dst_ar, dst, dstw);
+
+       if (sugg_dst_ar != dst_ar)
+               return ADD(sugg_dst_ar, dst_ar, ZERO);
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op) {
+       CHECK_ERROR();
+       check_sljit_emit_op0(compiler, op);
+
+       op = GET_OPCODE(op);
+       switch (op) {
+       case SLJIT_NOP:
+               return push_0_buffer(compiler, TILEGX_OPC_FNOP, __LINE__);
+
+       case SLJIT_BREAKPOINT:
+               return PI(BPT);
+
+       case SLJIT_UMUL:
+       case SLJIT_SMUL:
+       case SLJIT_UDIV:
+       case SLJIT_SDIV:
+               SLJIT_ASSERT_STOP();
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw)
+{
+       CHECK_ERROR();
+       check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       switch (GET_OPCODE(op)) {
+       case SLJIT_MOV:
+       case SLJIT_MOV_P:
+               return emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
+
+       case SLJIT_MOV_UI:
+               return emit_op(compiler, SLJIT_MOV_UI, INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
+
+       case SLJIT_MOV_SI:
+               return emit_op(compiler, SLJIT_MOV_SI, INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
+
+       case SLJIT_MOV_UB:
+               return emit_op(compiler, SLJIT_MOV_UB, BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ub) srcw : srcw);
+
+       case SLJIT_MOV_SB:
+               return emit_op(compiler, SLJIT_MOV_SB, BYTE_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sb) srcw : srcw);
+
+       case SLJIT_MOV_UH:
+               return emit_op(compiler, SLJIT_MOV_UH, HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_uh) srcw : srcw);
+
+       case SLJIT_MOV_SH:
+               return emit_op(compiler, SLJIT_MOV_SH, HALF_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sh) srcw : srcw);
+
+       case SLJIT_MOVU:
+       case SLJIT_MOVU_P:
+               return emit_op(compiler, SLJIT_MOV, WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
+
+       case SLJIT_MOVU_UI:
+               return emit_op(compiler, SLJIT_MOV_UI, INT_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
+
+       case SLJIT_MOVU_SI:
+               return emit_op(compiler, SLJIT_MOV_SI, INT_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
+
+       case SLJIT_MOVU_UB:
+               return emit_op(compiler, SLJIT_MOV_UB, BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ub) srcw : srcw);
+
+       case SLJIT_MOVU_SB:
+               return emit_op(compiler, SLJIT_MOV_SB, BYTE_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sb) srcw : srcw);
+
+       case SLJIT_MOVU_UH:
+               return emit_op(compiler, SLJIT_MOV_UH, HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_uh) srcw : srcw);
+
+       case SLJIT_MOVU_SH:
+               return emit_op(compiler, SLJIT_MOV_SH, HALF_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sh) srcw : srcw);
+
+       case SLJIT_NOT:
+               return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw);
+
+       case SLJIT_NEG:
+               return emit_op(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), IMM_OP, dst, dstw, SLJIT_IMM, 0, src, srcw);
+
+       case SLJIT_CLZ:
+               return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw);
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w)
+{
+       CHECK_ERROR();
+       check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+       ADJUST_LOCAL_OFFSET(src1, src1w);
+       ADJUST_LOCAL_OFFSET(src2, src2w);
+
+       switch (GET_OPCODE(op)) {
+       case SLJIT_ADD:
+       case SLJIT_ADDC:
+               return emit_op(compiler, op, CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
+
+       case SLJIT_SUB:
+       case SLJIT_SUBC:
+               return emit_op(compiler, op, IMM_OP, dst, dstw, src1, src1w, src2, src2w);
+
+       case SLJIT_MUL:
+               return emit_op(compiler, op, CUMULATIVE_OP, dst, dstw, src1, src1w, src2, src2w);
+
+       case SLJIT_AND:
+       case SLJIT_OR:
+       case SLJIT_XOR:
+               return emit_op(compiler, op, CUMULATIVE_OP | LOGICAL_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
+
+       case SLJIT_SHL:
+       case SLJIT_LSHR:
+       case SLJIT_ASHR:
+               if (src2 & SLJIT_IMM)
+                       src2w &= 0x3f;
+               if (op & SLJIT_INT_OP)
+                       src2w &= 0x1f;
+
+               return emit_op(compiler, op, IMM_OP, dst, dstw, src1, src1w, src2, src2w);
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_label * sljit_emit_label(struct sljit_compiler *compiler)
+{
+       struct sljit_label *label;
+
+       flush_buffer(compiler);
+
+       CHECK_ERROR_PTR();
+       check_sljit_emit_label(compiler);
+
+       if (compiler->last_label && compiler->last_label->size == compiler->size)
+               return compiler->last_label;
+
+       label = (struct sljit_label *)ensure_abuf(compiler, sizeof(struct sljit_label));
+       PTR_FAIL_IF(!label);
+       set_label(label, compiler);
+       return label;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw)
+{
+       sljit_si src_r = TMP_REG2;
+       struct sljit_jump *jump = NULL;
+
+       flush_buffer(compiler);
+
+       CHECK_ERROR();
+       check_sljit_emit_ijump(compiler, type, src, srcw);
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       if (FAST_IS_REG(src)) {
+               if (reg_map[src] != 0)
+                       src_r = src;
+               else
+                       FAIL_IF(ADD_SOLO(TMP_REG2_mapped, reg_map[src], ZERO));
+       }
+
+       if (type >= SLJIT_CALL0) {
+               SLJIT_ASSERT(reg_map[PIC_ADDR_REG] == 16 && PIC_ADDR_REG == TMP_REG2);
+               if (src & (SLJIT_IMM | SLJIT_MEM)) {
+                       if (src & SLJIT_IMM)
+                               FAIL_IF(emit_const(compiler, reg_map[PIC_ADDR_REG], srcw, 1));
+                       else {
+                               SLJIT_ASSERT(src_r == TMP_REG2 && (src & SLJIT_MEM));
+                               FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw));
+                       }
+
+                       FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_SCRATCH_REG1], ZERO));
+
+                       FAIL_IF(ADDI_SOLO(54, 54, -16));
+
+                       FAIL_IF(JALR_SOLO(reg_map[PIC_ADDR_REG]));
+
+                       return ADDI_SOLO(54, 54, 16);
+               }
+
+               /* Register input. */
+               if (type >= SLJIT_CALL1)
+                       FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_SCRATCH_REG1], ZERO));
+
+               FAIL_IF(ADD_SOLO(reg_map[PIC_ADDR_REG], reg_map[src_r], ZERO));
+
+               FAIL_IF(ADDI_SOLO(54, 54, -16));
+
+               FAIL_IF(JALR_SOLO(reg_map[src_r]));
+
+               return ADDI_SOLO(54, 54, 16);
+       }
+
+       if (src & SLJIT_IMM) {
+               jump = (struct sljit_jump *)ensure_abuf(compiler, sizeof(struct sljit_jump));
+               FAIL_IF(!jump);
+               set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_JAL : 0));
+               jump->u.target = srcw;
+               FAIL_IF(emit_const(compiler, TMP_REG2_mapped, 0, 1));
+
+               if (type >= SLJIT_FAST_CALL) {
+                       FAIL_IF(ADD_SOLO(ZERO, ZERO, ZERO));
+                       jump->addr = compiler->size;
+                       FAIL_IF(JR_SOLO(reg_map[src_r]));
+               } else {
+                       jump->addr = compiler->size;
+                       FAIL_IF(JR_SOLO(reg_map[src_r]));
+               }
+
+               return SLJIT_SUCCESS;
+
+       } else if (src & SLJIT_MEM)
+               FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw));
+
+       FAIL_IF(JR_SOLO(reg_map[src_r]));
+
+       if (jump)
+               jump->addr = compiler->size;
+
+       return SLJIT_SUCCESS;
+}
+
+#define BR_Z(src) \
+       inst = BEQZ_X1 | SRCA_X1(src); \
+       flags = IS_COND;
+
+#define BR_NZ(src) \
+       inst = BNEZ_X1 | SRCA_X1(src); \
+       flags = IS_COND;
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump * sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type)
+{
+       struct sljit_jump *jump;
+       sljit_ins inst;
+       sljit_si flags = 0;
+
+       flush_buffer(compiler);
+
+       CHECK_ERROR_PTR();
+       check_sljit_emit_jump(compiler, type);
+
+       jump = (struct sljit_jump *)ensure_abuf(compiler, sizeof(struct sljit_jump));
+       PTR_FAIL_IF(!jump);
+       set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
+       type &= 0xff;
+
+       switch (type) {
+       case SLJIT_C_EQUAL:
+       case SLJIT_C_FLOAT_NOT_EQUAL:
+               BR_NZ(EQUAL_FLAG);
+               break;
+       case SLJIT_C_NOT_EQUAL:
+       case SLJIT_C_FLOAT_EQUAL:
+               BR_Z(EQUAL_FLAG);
+               break;
+       case SLJIT_C_LESS:
+       case SLJIT_C_FLOAT_LESS:
+               BR_Z(ULESS_FLAG);
+               break;
+       case SLJIT_C_GREATER_EQUAL:
+       case SLJIT_C_FLOAT_GREATER_EQUAL:
+               BR_NZ(ULESS_FLAG);
+               break;
+       case SLJIT_C_GREATER:
+       case SLJIT_C_FLOAT_GREATER:
+               BR_Z(UGREATER_FLAG);
+               break;
+       case SLJIT_C_LESS_EQUAL:
+       case SLJIT_C_FLOAT_LESS_EQUAL:
+               BR_NZ(UGREATER_FLAG);
+               break;
+       case SLJIT_C_SIG_LESS:
+               BR_Z(LESS_FLAG);
+               break;
+       case SLJIT_C_SIG_GREATER_EQUAL:
+               BR_NZ(LESS_FLAG);
+               break;
+       case SLJIT_C_SIG_GREATER:
+               BR_Z(GREATER_FLAG);
+               break;
+       case SLJIT_C_SIG_LESS_EQUAL:
+               BR_NZ(GREATER_FLAG);
+               break;
+       case SLJIT_C_OVERFLOW:
+       case SLJIT_C_MUL_OVERFLOW:
+               BR_Z(OVERFLOW_FLAG);
+               break;
+       case SLJIT_C_NOT_OVERFLOW:
+       case SLJIT_C_MUL_NOT_OVERFLOW:
+               BR_NZ(OVERFLOW_FLAG);
+               break;
+       default:
+               /* Not conditional branch. */
+               inst = 0;
+               break;
+       }
+
+       jump->flags |= flags;
+
+       if (inst) {
+               inst = inst | ((type <= SLJIT_JUMP) ? BOFF_X1(5) : BOFF_X1(6));
+               PTR_FAIL_IF(PI(inst));
+       }
+
+       PTR_FAIL_IF(emit_const(compiler, TMP_REG2_mapped, 0, 1));
+       if (type <= SLJIT_JUMP) {
+               jump->addr = compiler->size;
+               PTR_FAIL_IF(JR_SOLO(TMP_REG2_mapped));
+       } else {
+               SLJIT_ASSERT(reg_map[PIC_ADDR_REG] == 16 && PIC_ADDR_REG == TMP_REG2);
+               /* Cannot be optimized out if type is >= CALL0. */
+               jump->flags |= IS_JAL | (type >= SLJIT_CALL0 ? SLJIT_REWRITABLE_JUMP : 0);
+               PTR_FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_SCRATCH_REG1], ZERO));
+               jump->addr = compiler->size;
+               PTR_FAIL_IF(JALR_SOLO(TMP_REG2_mapped));
+       }
+
+       return jump;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
+{
+       return 0;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw)
+{
+       SLJIT_ASSERT_STOP();
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w)
+{
+       SLJIT_ASSERT_STOP();
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_const * sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value)
+{
+       struct sljit_const *const_;
+       sljit_si reg;
+
+       flush_buffer(compiler);
+
+       CHECK_ERROR_PTR();
+       check_sljit_emit_const(compiler, dst, dstw, init_value);
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+
+       const_ = (struct sljit_const *)ensure_abuf(compiler, sizeof(struct sljit_const));
+       PTR_FAIL_IF(!const_);
+       set_const(const_, compiler);
+
+       reg = FAST_IS_REG(dst) ? dst : TMP_REG2;
+
+       PTR_FAIL_IF(emit_const_64(compiler, reg, init_value, 1));
+
+       if (dst & SLJIT_MEM)
+               PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0));
+       return const_;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
+{
+       sljit_ins *inst = (sljit_ins *)addr;
+
+       inst[0] = (inst[0] & ~(0xFFFFL << 43)) | (((new_addr >> 32) & 0xffff) << 43);
+       inst[1] = (inst[1] & ~(0xFFFFL << 43)) | (((new_addr >> 16) & 0xffff) << 43);
+       inst[2] = (inst[2] & ~(0xFFFFL << 43)) | ((new_addr & 0xffff) << 43);
+       SLJIT_CACHE_FLUSH(inst, inst + 3);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
+{
+       sljit_ins *inst = (sljit_ins *)addr;
+
+       inst[0] = (inst[0] & ~(0xFFFFL << 43)) | (((new_constant >> 48) & 0xFFFFL) << 43);
+       inst[1] = (inst[1] & ~(0xFFFFL << 43)) | (((new_constant >> 32) & 0xFFFFL) << 43);
+       inst[2] = (inst[2] & ~(0xFFFFL << 43)) | (((new_constant >> 16) & 0xFFFFL) << 43);
+       inst[3] = (inst[3] & ~(0xFFFFL << 43)) | ((new_constant & 0xFFFFL) << 43);
+       SLJIT_CACHE_FLUSH(inst, inst + 4);
+}
diff --git a/ext/pcre/pcrelib/sljit/sljitNativeX86_32.c b/ext/pcre/pcrelib/sljit/sljitNativeX86_32.c
new file mode 100644 (file)
index 0000000..dd03f26
--- /dev/null
@@ -0,0 +1,557 @@
+/*
+ *    Stack-less Just-In-Time compiler
+ *
+ *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice, this list of
+ *      conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *      of conditions and the following disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* x86 32-bit arch dependent functions. */
+
+static sljit_si emit_do_imm(struct sljit_compiler *compiler, sljit_ub opcode, sljit_sw imm)
+{
+       sljit_ub *inst;
+
+       inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + sizeof(sljit_sw));
+       FAIL_IF(!inst);
+       INC_SIZE(1 + sizeof(sljit_sw));
+       *inst++ = opcode;
+       *(sljit_sw*)inst = imm;
+       return SLJIT_SUCCESS;
+}
+
+static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_si type)
+{
+       if (type == SLJIT_JUMP) {
+               *code_ptr++ = JMP_i32;
+               jump->addr++;
+       }
+       else if (type >= SLJIT_FAST_CALL) {
+               *code_ptr++ = CALL_i32;
+               jump->addr++;
+       }
+       else {
+               *code_ptr++ = GROUP_0F;
+               *code_ptr++ = get_jump_code(type);
+               jump->addr += 2;
+       }
+
+       if (jump->flags & JUMP_LABEL)
+               jump->flags |= PATCH_MW;
+       else
+               *(sljit_sw*)code_ptr = jump->u.target - (jump->addr + 4);
+       code_ptr += 4;
+
+       return code_ptr;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
+{
+       sljit_si size;
+       sljit_si locals_offset;
+       sljit_ub *inst;
+
+       CHECK_ERROR();
+       check_sljit_emit_enter(compiler, args, scratches, saveds, local_size);
+
+       compiler->scratches = scratches;
+       compiler->saveds = saveds;
+       compiler->args = args;
+       compiler->flags_saved = 0;
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       compiler->logical_local_size = local_size;
+#endif
+
+#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
+       size = 1 + (saveds <= 3 ? saveds : 3) + (args > 0 ? (args * 2) : 0) + (args > 2 ? 2 : 0);
+#else
+       size = 1 + (saveds <= 3 ? saveds : 3) + (args > 0 ? (2 + args * 3) : 0);
+#endif
+       inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
+       FAIL_IF(!inst);
+
+       INC_SIZE(size);
+       PUSH_REG(reg_map[TMP_REG1]);
+#if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
+       if (args > 0) {
+               *inst++ = MOV_r_rm;
+               *inst++ = MOD_REG | (reg_map[TMP_REG1] << 3) | 0x4 /* esp */;
+       }
+#endif
+       if (saveds > 2)
+               PUSH_REG(reg_map[SLJIT_SAVED_REG3]);
+       if (saveds > 1)
+               PUSH_REG(reg_map[SLJIT_SAVED_REG2]);
+       if (saveds > 0)
+               PUSH_REG(reg_map[SLJIT_SAVED_REG1]);
+
+#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
+       if (args > 0) {
+               *inst++ = MOV_r_rm;
+               *inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG1] << 3) | reg_map[SLJIT_SCRATCH_REG3];
+       }
+       if (args > 1) {
+               *inst++ = MOV_r_rm;
+               *inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG2] << 3) | reg_map[SLJIT_SCRATCH_REG2];
+       }
+       if (args > 2) {
+               *inst++ = MOV_r_rm;
+               *inst++ = MOD_DISP8 | (reg_map[SLJIT_SAVED_REG3] << 3) | 0x4 /* esp */;
+               *inst++ = 0x24;
+               *inst++ = sizeof(sljit_sw) * (3 + 2); /* saveds >= 3 as well. */
+       }
+#else
+       if (args > 0) {
+               *inst++ = MOV_r_rm;
+               *inst++ = MOD_DISP8 | (reg_map[SLJIT_SAVED_REG1] << 3) | reg_map[TMP_REG1];
+               *inst++ = sizeof(sljit_sw) * 2;
+       }
+       if (args > 1) {
+               *inst++ = MOV_r_rm;
+               *inst++ = MOD_DISP8 | (reg_map[SLJIT_SAVED_REG2] << 3) | reg_map[TMP_REG1];
+               *inst++ = sizeof(sljit_sw) * 3;
+       }
+       if (args > 2) {
+               *inst++ = MOV_r_rm;
+               *inst++ = MOD_DISP8 | (reg_map[SLJIT_SAVED_REG3] << 3) | reg_map[TMP_REG1];
+               *inst++ = sizeof(sljit_sw) * 4;
+       }
+#endif
+
+#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
+       locals_offset = 2 * sizeof(sljit_uw);
+#else
+       SLJIT_COMPILE_ASSERT(FIXED_LOCALS_OFFSET >= 2 * sizeof(sljit_uw), require_at_least_two_words);
+       locals_offset = FIXED_LOCALS_OFFSET;
+#endif
+       compiler->scratches_start = locals_offset;
+       if (scratches > 3)
+               locals_offset += (scratches - 3) * sizeof(sljit_uw);
+       compiler->saveds_start = locals_offset;
+       if (saveds > 3)
+               locals_offset += (saveds - 3) * sizeof(sljit_uw);
+       compiler->locals_offset = locals_offset;
+#if defined(__APPLE__)
+       saveds = (2 + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw);
+       local_size = ((locals_offset + saveds + local_size + 15) & ~15) - saveds;
+#else
+       local_size = locals_offset + ((local_size + sizeof(sljit_uw) - 1) & ~(sizeof(sljit_uw) - 1));
+#endif
+
+       compiler->local_size = local_size;
+#ifdef _WIN32
+       if (local_size > 1024) {
+#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
+               FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_SCRATCH_REG1], local_size));
+#else
+               local_size -= FIXED_LOCALS_OFFSET;
+               FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_SCRATCH_REG1], local_size));
+               FAIL_IF(emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
+                       SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, FIXED_LOCALS_OFFSET));
+#endif
+               FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack)));
+       }
+#endif
+
+       SLJIT_ASSERT(local_size > 0);
+       return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
+               SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, local_size);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
+{
+       sljit_si locals_offset;
+
+       CHECK_ERROR_VOID();
+       check_sljit_set_context(compiler, args, scratches, saveds, local_size);
+
+       compiler->scratches = scratches;
+       compiler->saveds = saveds;
+       compiler->args = args;
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       compiler->logical_local_size = local_size;
+#endif
+
+#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
+       locals_offset = 2 * sizeof(sljit_uw);
+#else
+       locals_offset = FIXED_LOCALS_OFFSET;
+#endif
+       compiler->scratches_start = locals_offset;
+       if (scratches > 3)
+               locals_offset += (scratches - 3) * sizeof(sljit_uw);
+       compiler->saveds_start = locals_offset;
+       if (saveds > 3)
+               locals_offset += (saveds - 3) * sizeof(sljit_uw);
+       compiler->locals_offset = locals_offset;
+#if defined(__APPLE__)
+       saveds = (2 + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw);
+       compiler->local_size = ((locals_offset + saveds + local_size + 15) & ~15) - saveds;
+#else
+       compiler->local_size = locals_offset + ((local_size + sizeof(sljit_uw) - 1) & ~(sizeof(sljit_uw) - 1));
+#endif
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
+{
+       sljit_si size;
+       sljit_ub *inst;
+
+       CHECK_ERROR();
+       check_sljit_emit_return(compiler, op, src, srcw);
+       SLJIT_ASSERT(compiler->args >= 0);
+
+       compiler->flags_saved = 0;
+       FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
+
+       SLJIT_ASSERT(compiler->local_size > 0);
+       FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
+               SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, compiler->local_size));
+
+       size = 2 + (compiler->saveds <= 3 ? compiler->saveds : 3);
+#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
+       if (compiler->args > 2)
+               size += 2;
+#else
+       if (compiler->args > 0)
+               size += 2;
+#endif
+       inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
+       FAIL_IF(!inst);
+
+       INC_SIZE(size);
+
+       if (compiler->saveds > 0)
+               POP_REG(reg_map[SLJIT_SAVED_REG1]);
+       if (compiler->saveds > 1)
+               POP_REG(reg_map[SLJIT_SAVED_REG2]);
+       if (compiler->saveds > 2)
+               POP_REG(reg_map[SLJIT_SAVED_REG3]);
+       POP_REG(reg_map[TMP_REG1]);
+#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
+       if (compiler->args > 2)
+               RET_I16(sizeof(sljit_sw));
+       else
+               RET();
+#else
+       RET();
+#endif
+
+       return SLJIT_SUCCESS;
+}
+
+/* --------------------------------------------------------------------- */
+/*  Operators                                                            */
+/* --------------------------------------------------------------------- */
+
+/* Size contains the flags as well. */
+static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si size,
+       /* The register or immediate operand. */
+       sljit_si a, sljit_sw imma,
+       /* The general operand (not immediate). */
+       sljit_si b, sljit_sw immb)
+{
+       sljit_ub *inst;
+       sljit_ub *buf_ptr;
+       sljit_si flags = size & ~0xf;
+       sljit_si inst_size;
+
+       /* Both cannot be switched on. */
+       SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS));
+       /* Size flags not allowed for typed instructions. */
+       SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0);
+       /* Both size flags cannot be switched on. */
+       SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
+#if (defined SLJIT_SSE2 && SLJIT_SSE2)
+       /* SSE2 and immediate is not possible. */
+       SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2));
+       SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3)
+               && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66)
+               && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66));
+#endif
+
+       size &= 0xf;
+       inst_size = size;
+
+#if (defined SLJIT_SSE2 && SLJIT_SSE2)
+       if (flags & (EX86_PREF_F2 | EX86_PREF_F3))
+               inst_size++;
+#endif
+       if (flags & EX86_PREF_66)
+               inst_size++;
+
+       /* Calculate size of b. */
+       inst_size += 1; /* mod r/m byte. */
+       if (b & SLJIT_MEM) {
+               if ((b & REG_MASK) == SLJIT_UNUSED)
+                       inst_size += sizeof(sljit_sw);
+               else if (immb != 0 && !(b & OFFS_REG_MASK)) {
+                       /* Immediate operand. */
+                       if (immb <= 127 && immb >= -128)
+                               inst_size += sizeof(sljit_sb);
+                       else
+                               inst_size += sizeof(sljit_sw);
+               }
+
+               if ((b & REG_MASK) == SLJIT_LOCALS_REG && !(b & OFFS_REG_MASK))
+                       b |= TO_OFFS_REG(SLJIT_LOCALS_REG);
+
+               if ((b & OFFS_REG_MASK) != SLJIT_UNUSED)
+                       inst_size += 1; /* SIB byte. */
+       }
+
+       /* Calculate size of a. */
+       if (a & SLJIT_IMM) {
+               if (flags & EX86_BIN_INS) {
+                       if (imma <= 127 && imma >= -128) {
+                               inst_size += 1;
+                               flags |= EX86_BYTE_ARG;
+                       } else
+                               inst_size += 4;
+               }
+               else if (flags & EX86_SHIFT_INS) {
+                       imma &= 0x1f;
+                       if (imma != 1) {
+                               inst_size ++;
+                               flags |= EX86_BYTE_ARG;
+                       }
+               } else if (flags & EX86_BYTE_ARG)
+                       inst_size++;
+               else if (flags & EX86_HALF_ARG)
+                       inst_size += sizeof(short);
+               else
+                       inst_size += sizeof(sljit_sw);
+       }
+       else
+               SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
+
+       inst = (sljit_ub*)ensure_buf(compiler, 1 + inst_size);
+       PTR_FAIL_IF(!inst);
+
+       /* Encoding the byte. */
+       INC_SIZE(inst_size);
+#if (defined SLJIT_SSE2 && SLJIT_SSE2)
+       if (flags & EX86_PREF_F2)
+               *inst++ = 0xf2;
+       if (flags & EX86_PREF_F3)
+               *inst++ = 0xf3;
+#endif
+       if (flags & EX86_PREF_66)
+               *inst++ = 0x66;
+
+       buf_ptr = inst + size;
+
+       /* Encode mod/rm byte. */
+       if (!(flags & EX86_SHIFT_INS)) {
+               if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM))
+                       *inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81;
+
+               if ((a & SLJIT_IMM) || (a == 0))
+                       *buf_ptr = 0;
+#if (defined SLJIT_SSE2 && SLJIT_SSE2)
+               else if (!(flags & EX86_SSE2))
+                       *buf_ptr = reg_map[a] << 3;
+               else
+                       *buf_ptr = a << 3;
+#else
+               else
+                       *buf_ptr = reg_map[a] << 3;
+#endif
+       }
+       else {
+               if (a & SLJIT_IMM) {
+                       if (imma == 1)
+                               *inst = GROUP_SHIFT_1;
+                       else
+                               *inst = GROUP_SHIFT_N;
+               } else
+                       *inst = GROUP_SHIFT_CL;
+               *buf_ptr = 0;
+       }
+
+       if (!(b & SLJIT_MEM))
+#if (defined SLJIT_SSE2 && SLJIT_SSE2)
+               *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2)) ? reg_map[b] : b);
+#else
+               *buf_ptr++ |= MOD_REG + reg_map[b];
+#endif
+       else if ((b & REG_MASK) != SLJIT_UNUSED) {
+               if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_LOCALS_REG)) {
+                       if (immb != 0) {
+                               if (immb <= 127 && immb >= -128)
+                                       *buf_ptr |= 0x40;
+                               else
+                                       *buf_ptr |= 0x80;
+                       }
+
+                       if ((b & OFFS_REG_MASK) == SLJIT_UNUSED)
+                               *buf_ptr++ |= reg_map[b & REG_MASK];
+                       else {
+                               *buf_ptr++ |= 0x04;
+                               *buf_ptr++ = reg_map[b & REG_MASK] | (reg_map[OFFS_REG(b)] << 3);
+                       }
+
+                       if (immb != 0) {
+                               if (immb <= 127 && immb >= -128)
+                                       *buf_ptr++ = immb; /* 8 bit displacement. */
+                               else {
+                                       *(sljit_sw*)buf_ptr = immb; /* 32 bit displacement. */
+                                       buf_ptr += sizeof(sljit_sw);
+                               }
+                       }
+               }
+               else {
+                       *buf_ptr++ |= 0x04;
+                       *buf_ptr++ = reg_map[b & REG_MASK] | (reg_map[OFFS_REG(b)] << 3) | (immb << 6);
+               }
+       }
+       else {
+               *buf_ptr++ |= 0x05;
+               *(sljit_sw*)buf_ptr = immb; /* 32 bit displacement. */
+               buf_ptr += sizeof(sljit_sw);
+       }
+
+       if (a & SLJIT_IMM) {
+               if (flags & EX86_BYTE_ARG)
+                       *buf_ptr = imma;
+               else if (flags & EX86_HALF_ARG)
+                       *(short*)buf_ptr = imma;
+               else if (!(flags & EX86_SHIFT_INS))
+                       *(sljit_sw*)buf_ptr = imma;
+       }
+
+       return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1);
+}
+
+/* --------------------------------------------------------------------- */
+/*  Call / return instructions                                           */
+/* --------------------------------------------------------------------- */
+
+static SLJIT_INLINE sljit_si call_with_args(struct sljit_compiler *compiler, sljit_si type)
+{
+       sljit_ub *inst;
+
+#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
+       inst = (sljit_ub*)ensure_buf(compiler, type >= SLJIT_CALL3 ? 1 + 2 + 1 : 1 + 2);
+       FAIL_IF(!inst);
+       INC_SIZE(type >= SLJIT_CALL3 ? 2 + 1 : 2);
+
+       if (type >= SLJIT_CALL3)
+               PUSH_REG(reg_map[SLJIT_SCRATCH_REG3]);
+       *inst++ = MOV_r_rm;
+       *inst++ = MOD_REG | (reg_map[SLJIT_SCRATCH_REG3] << 3) | reg_map[SLJIT_SCRATCH_REG1];
+#else
+       inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 * (type - SLJIT_CALL0));
+       FAIL_IF(!inst);
+       INC_SIZE(4 * (type - SLJIT_CALL0));
+
+       *inst++ = MOV_rm_r;
+       *inst++ = MOD_DISP8 | (reg_map[SLJIT_SCRATCH_REG1] << 3) | 0x4 /* SIB */;
+       *inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_LOCALS_REG];
+       *inst++ = 0;
+       if (type >= SLJIT_CALL2) {
+               *inst++ = MOV_rm_r;
+               *inst++ = MOD_DISP8 | (reg_map[SLJIT_SCRATCH_REG2] << 3) | 0x4 /* SIB */;
+               *inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_LOCALS_REG];
+               *inst++ = sizeof(sljit_sw);
+       }
+       if (type >= SLJIT_CALL3) {
+               *inst++ = MOV_rm_r;
+               *inst++ = MOD_DISP8 | (reg_map[SLJIT_SCRATCH_REG3] << 3) | 0x4 /* SIB */;
+               *inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_LOCALS_REG];
+               *inst++ = 2 * sizeof(sljit_sw);
+       }
+#endif
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw)
+{
+       sljit_ub *inst;
+
+       CHECK_ERROR();
+       check_sljit_emit_fast_enter(compiler, dst, dstw);
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+
+       CHECK_EXTRA_REGS(dst, dstw, (void)0);
+
+       /* For UNUSED dst. Uncommon, but possible. */
+       if (dst == SLJIT_UNUSED)
+               dst = TMP_REG1;
+
+       if (FAST_IS_REG(dst)) {
+               /* Unused dest is possible here. */
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
+               FAIL_IF(!inst);
+
+               INC_SIZE(1);
+               POP_REG(reg_map[dst]);
+               return SLJIT_SUCCESS;
+       }
+
+       /* Memory. */
+       inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
+       FAIL_IF(!inst);
+       *inst++ = POP_rm;
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw)
+{
+       sljit_ub *inst;
+
+       CHECK_ERROR();
+       check_sljit_emit_fast_return(compiler, src, srcw);
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       CHECK_EXTRA_REGS(src, srcw, (void)0);
+
+       if (FAST_IS_REG(src)) {
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 1);
+               FAIL_IF(!inst);
+
+               INC_SIZE(1 + 1);
+               PUSH_REG(reg_map[src]);
+       }
+       else if (src & SLJIT_MEM) {
+               inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
+               FAIL_IF(!inst);
+               *inst++ = GROUP_FF;
+               *inst |= PUSH_rm;
+
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
+               FAIL_IF(!inst);
+               INC_SIZE(1);
+       }
+       else {
+               /* SLJIT_IMM. */
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + 5 + 1);
+               FAIL_IF(!inst);
+
+               INC_SIZE(5 + 1);
+               *inst++ = PUSH_i32;
+               *(sljit_sw*)inst = srcw;
+               inst += sizeof(sljit_sw);
+       }
+
+       RET();
+       return SLJIT_SUCCESS;
+}
diff --git a/ext/pcre/pcrelib/sljit/sljitNativeX86_64.c b/ext/pcre/pcrelib/sljit/sljitNativeX86_64.c
new file mode 100644 (file)
index 0000000..967f3c3
--- /dev/null
@@ -0,0 +1,810 @@
+/*
+ *    Stack-less Just-In-Time compiler
+ *
+ *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice, this list of
+ *      conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *      of conditions and the following disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* x86 64-bit arch dependent functions. */
+
+static sljit_si emit_load_imm64(struct sljit_compiler *compiler, sljit_si reg, sljit_sw imm)
+{
+       sljit_ub *inst;
+
+       inst = (sljit_ub*)ensure_buf(compiler, 1 + 2 + sizeof(sljit_sw));
+       FAIL_IF(!inst);
+       INC_SIZE(2 + sizeof(sljit_sw));
+       *inst++ = REX_W | ((reg_map[reg] <= 7) ? 0 : REX_B);
+       *inst++ = MOV_r_i32 + (reg_map[reg] & 0x7);
+       *(sljit_sw*)inst = imm;
+       return SLJIT_SUCCESS;
+}
+
+static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_si type)
+{
+       if (type < SLJIT_JUMP) {
+               /* Invert type. */
+               *code_ptr++ = get_jump_code(type ^ 0x1) - 0x10;
+               *code_ptr++ = 10 + 3;
+       }
+
+       SLJIT_COMPILE_ASSERT(reg_map[TMP_REG3] == 9, tmp3_is_9_first);
+       *code_ptr++ = REX_W | REX_B;
+       *code_ptr++ = MOV_r_i32 + 1;
+       jump->addr = (sljit_uw)code_ptr;
+
+       if (jump->flags & JUMP_LABEL)
+               jump->flags |= PATCH_MD;
+       else
+               *(sljit_sw*)code_ptr = jump->u.target;
+
+       code_ptr += sizeof(sljit_sw);
+       *code_ptr++ = REX_B;
+       *code_ptr++ = GROUP_FF;
+       *code_ptr++ = (type >= SLJIT_FAST_CALL) ? (MOD_REG | CALL_rm | 1) : (MOD_REG | JMP_rm | 1);
+
+       return code_ptr;
+}
+
+static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_sw addr, sljit_si type)
+{
+       sljit_sw delta = addr - ((sljit_sw)code_ptr + 1 + sizeof(sljit_si));
+
+       if (delta <= HALFWORD_MAX && delta >= HALFWORD_MIN) {
+               *code_ptr++ = (type == 2) ? CALL_i32 : JMP_i32;
+               *(sljit_sw*)code_ptr = delta;
+       }
+       else {
+               SLJIT_COMPILE_ASSERT(reg_map[TMP_REG3] == 9, tmp3_is_9_second);
+               *code_ptr++ = REX_W | REX_B;
+               *code_ptr++ = MOV_r_i32 + 1;
+               *(sljit_sw*)code_ptr = addr;
+               code_ptr += sizeof(sljit_sw);
+               *code_ptr++ = REX_B;
+               *code_ptr++ = GROUP_FF;
+               *code_ptr++ = (type == 2) ? (MOD_REG | CALL_rm | 1) : (MOD_REG | JMP_rm | 1);
+       }
+
+       return code_ptr;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
+{
+       sljit_si size, pushed_size;
+       sljit_ub *inst;
+
+       CHECK_ERROR();
+       check_sljit_emit_enter(compiler, args, scratches, saveds, local_size);
+
+       compiler->scratches = scratches;
+       compiler->saveds = saveds;
+       compiler->flags_saved = 0;
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       compiler->logical_local_size = local_size;
+#endif
+
+       size = saveds;
+       /* Including the return address saved by the call instruction. */
+       pushed_size = (saveds + 1) * sizeof(sljit_sw);
+#ifndef _WIN64
+       if (saveds >= 2)
+               size += saveds - 1;
+#else
+       if (saveds >= 4)
+               size += saveds - 3;
+       if (scratches >= 5) {
+               size += (5 - 4) * 2;
+               pushed_size += sizeof(sljit_sw);
+       }
+#endif
+       size += args * 3;
+       if (size > 0) {
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
+               FAIL_IF(!inst);
+
+               INC_SIZE(size);
+               if (saveds >= 5) {
+                       SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_EREG2] >= 8, saved_ereg2_is_hireg);
+                       *inst++ = REX_B;
+                       PUSH_REG(reg_lmap[SLJIT_SAVED_EREG2]);
+               }
+               if (saveds >= 4) {
+                       SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_EREG1] >= 8, saved_ereg1_is_hireg);
+                       *inst++ = REX_B;
+                       PUSH_REG(reg_lmap[SLJIT_SAVED_EREG1]);
+               }
+               if (saveds >= 3) {
+#ifndef _WIN64
+                       SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG3] >= 8, saved_reg3_is_hireg);
+                       *inst++ = REX_B;
+#else
+                       SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG3] < 8, saved_reg3_is_loreg);
+#endif
+                       PUSH_REG(reg_lmap[SLJIT_SAVED_REG3]);
+               }
+               if (saveds >= 2) {
+#ifndef _WIN64
+                       SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG2] >= 8, saved_reg2_is_hireg);
+                       *inst++ = REX_B;
+#else
+                       SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG2] < 8, saved_reg2_is_loreg);
+#endif
+                       PUSH_REG(reg_lmap[SLJIT_SAVED_REG2]);
+               }
+               if (saveds >= 1) {
+                       SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG1] < 8, saved_reg1_is_loreg);
+                       PUSH_REG(reg_lmap[SLJIT_SAVED_REG1]);
+               }
+#ifdef _WIN64
+               if (scratches >= 5) {
+                       SLJIT_COMPILE_ASSERT(reg_map[SLJIT_TEMPORARY_EREG2] >= 8, temporary_ereg2_is_hireg);
+                       *inst++ = REX_B;
+                       PUSH_REG(reg_lmap[SLJIT_TEMPORARY_EREG2]);
+               }
+#endif
+
+#ifndef _WIN64
+               if (args > 0) {
+                       *inst++ = REX_W;
+                       *inst++ = MOV_r_rm;
+                       *inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG1] << 3) | 0x7 /* rdi */;
+               }
+               if (args > 1) {
+                       *inst++ = REX_W | REX_R;
+                       *inst++ = MOV_r_rm;
+                       *inst++ = MOD_REG | (reg_lmap[SLJIT_SAVED_REG2] << 3) | 0x6 /* rsi */;
+               }
+               if (args > 2) {
+                       *inst++ = REX_W | REX_R;
+                       *inst++ = MOV_r_rm;
+                       *inst++ = MOD_REG | (reg_lmap[SLJIT_SAVED_REG3] << 3) | 0x2 /* rdx */;
+               }
+#else
+               if (args > 0) {
+                       *inst++ = REX_W;
+                       *inst++ = MOV_r_rm;
+                       *inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG1] << 3) | 0x1 /* rcx */;
+               }
+               if (args > 1) {
+                       *inst++ = REX_W;
+                       *inst++ = MOV_r_rm;
+                       *inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG2] << 3) | 0x2 /* rdx */;
+               }
+               if (args > 2) {
+                       *inst++ = REX_W | REX_B;
+                       *inst++ = MOV_r_rm;
+                       *inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG3] << 3) | 0x0 /* r8 */;
+               }
+#endif
+       }
+
+       local_size = ((local_size + FIXED_LOCALS_OFFSET + pushed_size + 16 - 1) & ~(16 - 1)) - pushed_size;
+       compiler->local_size = local_size;
+#ifdef _WIN64
+       if (local_size > 1024) {
+               /* Allocate stack for the callback, which grows the stack. */
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + (3 + sizeof(sljit_si)));
+               FAIL_IF(!inst);
+               INC_SIZE(4 + (3 + sizeof(sljit_si)));
+               *inst++ = REX_W;
+               *inst++ = GROUP_BINARY_83;
+               *inst++ = MOD_REG | SUB | 4;
+               /* Pushed size must be divisible by 8. */
+               SLJIT_ASSERT(!(pushed_size & 0x7));
+               if (pushed_size & 0x8) {
+                       *inst++ = 5 * sizeof(sljit_sw);
+                       local_size -= 5 * sizeof(sljit_sw);
+               } else {
+                       *inst++ = 4 * sizeof(sljit_sw);
+                       local_size -= 4 * sizeof(sljit_sw);
+               }
+               /* Second instruction */
+               SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SCRATCH_REG1] < 8, temporary_reg1_is_loreg);
+               *inst++ = REX_W;
+               *inst++ = MOV_rm_i32;
+               *inst++ = MOD_REG | reg_lmap[SLJIT_SCRATCH_REG1];
+               *(sljit_si*)inst = local_size;
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
+               compiler->skip_checks = 1;
+#endif
+               FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack)));
+       }
+#endif
+       SLJIT_ASSERT(local_size > 0);
+       if (local_size <= 127) {
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
+               FAIL_IF(!inst);
+               INC_SIZE(4);
+               *inst++ = REX_W;
+               *inst++ = GROUP_BINARY_83;
+               *inst++ = MOD_REG | SUB | 4;
+               *inst++ = local_size;
+       }
+       else {
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + 7);
+               FAIL_IF(!inst);
+               INC_SIZE(7);
+               *inst++ = REX_W;
+               *inst++ = GROUP_BINARY_81;
+               *inst++ = MOD_REG | SUB | 4;
+               *(sljit_si*)inst = local_size;
+               inst += sizeof(sljit_si);
+       }
+#ifdef _WIN64
+       /* Save xmm6 with MOVAPS instruction. */
+       inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
+       FAIL_IF(!inst);
+       INC_SIZE(5);
+       *inst++ = GROUP_0F;
+       *(sljit_si*)inst = 0x20247429;
+#endif
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
+{
+       sljit_si pushed_size;
+
+       CHECK_ERROR_VOID();
+       check_sljit_set_context(compiler, args, scratches, saveds, local_size);
+
+       compiler->scratches = scratches;
+       compiler->saveds = saveds;
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       compiler->logical_local_size = local_size;
+#endif
+
+       /* Including the return address saved by the call instruction. */
+       pushed_size = (saveds + 1) * sizeof(sljit_sw);
+#ifdef _WIN64
+       if (scratches >= 5)
+               pushed_size += sizeof(sljit_sw);
+#endif
+       compiler->local_size = ((local_size + FIXED_LOCALS_OFFSET + pushed_size + 16 - 1) & ~(16 - 1)) - pushed_size;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
+{
+       sljit_si size;
+       sljit_ub *inst;
+
+       CHECK_ERROR();
+       check_sljit_emit_return(compiler, op, src, srcw);
+
+       compiler->flags_saved = 0;
+       FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
+
+#ifdef _WIN64
+       /* Restore xmm6 with MOVAPS instruction. */
+       inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
+       FAIL_IF(!inst);
+       INC_SIZE(5);
+       *inst++ = GROUP_0F;
+       *(sljit_si*)inst = 0x20247428;
+#endif
+       SLJIT_ASSERT(compiler->local_size > 0);
+       if (compiler->local_size <= 127) {
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
+               FAIL_IF(!inst);
+               INC_SIZE(4);
+               *inst++ = REX_W;
+               *inst++ = GROUP_BINARY_83;
+               *inst++ = MOD_REG | ADD | 4;
+               *inst = compiler->local_size;
+       }
+       else {
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + 7);
+               FAIL_IF(!inst);
+               INC_SIZE(7);
+               *inst++ = REX_W;
+               *inst++ = GROUP_BINARY_81;
+               *inst++ = MOD_REG | ADD | 4;
+               *(sljit_si*)inst = compiler->local_size;
+       }
+
+       size = 1 + compiler->saveds;
+#ifndef _WIN64
+       if (compiler->saveds >= 2)
+               size += compiler->saveds - 1;
+#else
+       if (compiler->saveds >= 4)
+               size += compiler->saveds - 3;
+       if (compiler->scratches >= 5)
+               size += (5 - 4) * 2;
+#endif
+       inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
+       FAIL_IF(!inst);
+
+       INC_SIZE(size);
+
+#ifdef _WIN64
+       if (compiler->scratches >= 5) {
+               *inst++ = REX_B;
+               POP_REG(reg_lmap[SLJIT_TEMPORARY_EREG2]);
+       }
+#endif
+       if (compiler->saveds >= 1)
+               POP_REG(reg_map[SLJIT_SAVED_REG1]);
+       if (compiler->saveds >= 2) {
+#ifndef _WIN64
+               *inst++ = REX_B;
+#endif
+               POP_REG(reg_lmap[SLJIT_SAVED_REG2]);
+       }
+       if (compiler->saveds >= 3) {
+#ifndef _WIN64
+               *inst++ = REX_B;
+#endif
+               POP_REG(reg_lmap[SLJIT_SAVED_REG3]);
+       }
+       if (compiler->saveds >= 4) {
+               *inst++ = REX_B;
+               POP_REG(reg_lmap[SLJIT_SAVED_EREG1]);
+       }
+       if (compiler->saveds >= 5) {
+               *inst++ = REX_B;
+               POP_REG(reg_lmap[SLJIT_SAVED_EREG2]);
+       }
+
+       RET();
+       return SLJIT_SUCCESS;
+}
+
+/* --------------------------------------------------------------------- */
+/*  Operators                                                            */
+/* --------------------------------------------------------------------- */
+
+static sljit_si emit_do_imm32(struct sljit_compiler *compiler, sljit_ub rex, sljit_ub opcode, sljit_sw imm)
+{
+       sljit_ub *inst;
+       sljit_si length = 1 + (rex ? 1 : 0) + sizeof(sljit_si);
+
+       inst = (sljit_ub*)ensure_buf(compiler, 1 + length);
+       FAIL_IF(!inst);
+       INC_SIZE(length);
+       if (rex)
+               *inst++ = rex;
+       *inst++ = opcode;
+       *(sljit_si*)inst = imm;
+       return SLJIT_SUCCESS;
+}
+
+static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si size,
+       /* The register or immediate operand. */
+       sljit_si a, sljit_sw imma,
+       /* The general operand (not immediate). */
+       sljit_si b, sljit_sw immb)
+{
+       sljit_ub *inst;
+       sljit_ub *buf_ptr;
+       sljit_ub rex = 0;
+       sljit_si flags = size & ~0xf;
+       sljit_si inst_size;
+
+       /* The immediate operand must be 32 bit. */
+       SLJIT_ASSERT(!(a & SLJIT_IMM) || compiler->mode32 || IS_HALFWORD(imma));
+       /* Both cannot be switched on. */
+       SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS));
+       /* Size flags not allowed for typed instructions. */
+       SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0);
+       /* Both size flags cannot be switched on. */
+       SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
+#if (defined SLJIT_SSE2 && SLJIT_SSE2)
+       /* SSE2 and immediate is not possible. */
+       SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2));
+       SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3)
+               && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66)
+               && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66));
+#endif
+
+       size &= 0xf;
+       inst_size = size;
+
+       if ((b & SLJIT_MEM) && !(b & OFFS_REG_MASK) && NOT_HALFWORD(immb)) {
+               if (emit_load_imm64(compiler, TMP_REG3, immb))
+                       return NULL;
+               immb = 0;
+               if (b & REG_MASK)
+                       b |= TO_OFFS_REG(TMP_REG3);
+               else
+                       b |= TMP_REG3;
+       }
+
+       if (!compiler->mode32 && !(flags & EX86_NO_REXW))
+               rex |= REX_W;
+       else if (flags & EX86_REX)
+               rex |= REX;
+
+#if (defined SLJIT_SSE2 && SLJIT_SSE2)
+       if (flags & (EX86_PREF_F2 | EX86_PREF_F3))
+               inst_size++;
+#endif
+       if (flags & EX86_PREF_66)
+               inst_size++;
+
+       /* Calculate size of b. */
+       inst_size += 1; /* mod r/m byte. */
+       if (b & SLJIT_MEM) {
+               if ((b & REG_MASK) == SLJIT_UNUSED)
+                       inst_size += 1 + sizeof(sljit_si); /* SIB byte required to avoid RIP based addressing. */
+               else {
+                       if (reg_map[b & REG_MASK] >= 8)
+                               rex |= REX_B;
+                       if (immb != 0 && !(b & OFFS_REG_MASK)) {
+                               /* Immediate operand. */
+                               if (immb <= 127 && immb >= -128)
+                                       inst_size += sizeof(sljit_sb);
+                               else
+                                       inst_size += sizeof(sljit_si);
+                       }
+               }
+
+               if ((b & REG_MASK) == SLJIT_LOCALS_REG && !(b & OFFS_REG_MASK))
+                       b |= TO_OFFS_REG(SLJIT_LOCALS_REG);
+
+               if ((b & OFFS_REG_MASK) != SLJIT_UNUSED) {
+                       inst_size += 1; /* SIB byte. */
+                       if (reg_map[OFFS_REG(b)] >= 8)
+                               rex |= REX_X;
+               }
+       }
+#if (defined SLJIT_SSE2 && SLJIT_SSE2)
+       else if (!(flags & EX86_SSE2) && reg_map[b] >= 8)
+               rex |= REX_B;
+#else
+       else if (reg_map[b] >= 8)
+               rex |= REX_B;
+#endif
+
+       if (a & SLJIT_IMM) {
+               if (flags & EX86_BIN_INS) {
+                       if (imma <= 127 && imma >= -128) {
+                               inst_size += 1;
+                               flags |= EX86_BYTE_ARG;
+                       } else
+                               inst_size += 4;
+               }
+               else if (flags & EX86_SHIFT_INS) {
+                       imma &= compiler->mode32 ? 0x1f : 0x3f;
+                       if (imma != 1) {
+                               inst_size ++;
+                               flags |= EX86_BYTE_ARG;
+                       }
+               } else if (flags & EX86_BYTE_ARG)
+                       inst_size++;
+               else if (flags & EX86_HALF_ARG)
+                       inst_size += sizeof(short);
+               else
+                       inst_size += sizeof(sljit_si);
+       }
+       else {
+               SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
+               /* reg_map[SLJIT_PREF_SHIFT_REG] is less than 8. */
+#if (defined SLJIT_SSE2 && SLJIT_SSE2)
+               if (!(flags & EX86_SSE2) && reg_map[a] >= 8)
+                       rex |= REX_R;
+#else
+               if (reg_map[a] >= 8)
+                       rex |= REX_R;
+#endif
+       }
+
+       if (rex)
+               inst_size++;
+
+       inst = (sljit_ub*)ensure_buf(compiler, 1 + inst_size);
+       PTR_FAIL_IF(!inst);
+
+       /* Encoding the byte. */
+       INC_SIZE(inst_size);
+#if (defined SLJIT_SSE2 && SLJIT_SSE2)
+       if (flags & EX86_PREF_F2)
+               *inst++ = 0xf2;
+       if (flags & EX86_PREF_F3)
+               *inst++ = 0xf3;
+#endif
+       if (flags & EX86_PREF_66)
+               *inst++ = 0x66;
+       if (rex)
+               *inst++ = rex;
+       buf_ptr = inst + size;
+
+       /* Encode mod/rm byte. */
+       if (!(flags & EX86_SHIFT_INS)) {
+               if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM))
+                       *inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81;
+
+               if ((a & SLJIT_IMM) || (a == 0))
+                       *buf_ptr = 0;
+#if (defined SLJIT_SSE2 && SLJIT_SSE2)
+               else if (!(flags & EX86_SSE2))
+                       *buf_ptr = reg_lmap[a] << 3;
+               else
+                       *buf_ptr = a << 3;
+#else
+               else
+                       *buf_ptr = reg_lmap[a] << 3;
+#endif
+       }
+       else {
+               if (a & SLJIT_IMM) {
+                       if (imma == 1)
+                               *inst = GROUP_SHIFT_1;
+                       else
+                               *inst = GROUP_SHIFT_N;
+               } else
+                       *inst = GROUP_SHIFT_CL;
+               *buf_ptr = 0;
+       }
+
+       if (!(b & SLJIT_MEM))
+#if (defined SLJIT_SSE2 && SLJIT_SSE2)
+               *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2)) ? reg_lmap[b] : b);
+#else
+               *buf_ptr++ |= MOD_REG + reg_lmap[b];
+#endif
+       else if ((b & REG_MASK) != SLJIT_UNUSED) {
+               if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_LOCALS_REG)) {
+                       if (immb != 0) {
+                               if (immb <= 127 && immb >= -128)
+                                       *buf_ptr |= 0x40;
+                               else
+                                       *buf_ptr |= 0x80;
+                       }
+
+                       if ((b & OFFS_REG_MASK) == SLJIT_UNUSED)
+                               *buf_ptr++ |= reg_lmap[b & REG_MASK];
+                       else {
+                               *buf_ptr++ |= 0x04;
+                               *buf_ptr++ = reg_lmap[b & REG_MASK] | (reg_lmap[OFFS_REG(b)] << 3);
+                       }
+
+                       if (immb != 0) {
+                               if (immb <= 127 && immb >= -128)
+                                       *buf_ptr++ = immb; /* 8 bit displacement. */
+                               else {
+                                       *(sljit_si*)buf_ptr = immb; /* 32 bit displacement. */
+                                       buf_ptr += sizeof(sljit_si);
+                               }
+                       }
+               }
+               else {
+                       *buf_ptr++ |= 0x04;
+                       *buf_ptr++ = reg_lmap[b & REG_MASK] | (reg_lmap[OFFS_REG(b)] << 3) | (immb << 6);
+               }
+       }
+       else {
+               *buf_ptr++ |= 0x04;
+               *buf_ptr++ = 0x25;
+               *(sljit_si*)buf_ptr = immb; /* 32 bit displacement. */
+               buf_ptr += sizeof(sljit_si);
+       }
+
+       if (a & SLJIT_IMM) {
+               if (flags & EX86_BYTE_ARG)
+                       *buf_ptr = imma;
+               else if (flags & EX86_HALF_ARG)
+                       *(short*)buf_ptr = imma;
+               else if (!(flags & EX86_SHIFT_INS))
+                       *(sljit_si*)buf_ptr = imma;
+       }
+
+       return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1);
+}
+
+/* --------------------------------------------------------------------- */
+/*  Call / return instructions                                           */
+/* --------------------------------------------------------------------- */
+
+static SLJIT_INLINE sljit_si call_with_args(struct sljit_compiler *compiler, sljit_si type)
+{
+       sljit_ub *inst;
+
+#ifndef _WIN64
+       SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SCRATCH_REG2] == 6 && reg_map[SLJIT_SCRATCH_REG1] < 8 && reg_map[SLJIT_SCRATCH_REG3] < 8, args_registers);
+
+       inst = (sljit_ub*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6));
+       FAIL_IF(!inst);
+       INC_SIZE((type < SLJIT_CALL3) ? 3 : 6);
+       if (type >= SLJIT_CALL3) {
+               *inst++ = REX_W;
+               *inst++ = MOV_r_rm;
+               *inst++ = MOD_REG | (0x2 /* rdx */ << 3) | reg_lmap[SLJIT_SCRATCH_REG3];
+       }
+       *inst++ = REX_W;
+       *inst++ = MOV_r_rm;
+       *inst++ = MOD_REG | (0x7 /* rdi */ << 3) | reg_lmap[SLJIT_SCRATCH_REG1];
+#else
+       SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SCRATCH_REG2] == 2 && reg_map[SLJIT_SCRATCH_REG1] < 8 && reg_map[SLJIT_SCRATCH_REG3] < 8, args_registers);
+
+       inst = (sljit_ub*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6));
+       FAIL_IF(!inst);
+       INC_SIZE((type < SLJIT_CALL3) ? 3 : 6);
+       if (type >= SLJIT_CALL3) {
+               *inst++ = REX_W | REX_R;
+               *inst++ = MOV_r_rm;
+               *inst++ = MOD_REG | (0x0 /* r8 */ << 3) | reg_lmap[SLJIT_SCRATCH_REG3];
+       }
+       *inst++ = REX_W;
+       *inst++ = MOV_r_rm;
+       *inst++ = MOD_REG | (0x1 /* rcx */ << 3) | reg_lmap[SLJIT_SCRATCH_REG1];
+#endif
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw)
+{
+       sljit_ub *inst;
+
+       CHECK_ERROR();
+       check_sljit_emit_fast_enter(compiler, dst, dstw);
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+
+       /* For UNUSED dst. Uncommon, but possible. */
+       if (dst == SLJIT_UNUSED)
+               dst = TMP_REG1;
+
+       if (FAST_IS_REG(dst)) {
+               if (reg_map[dst] < 8) {
+                       inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
+                       FAIL_IF(!inst);
+                       INC_SIZE(1);
+                       POP_REG(reg_lmap[dst]);
+                       return SLJIT_SUCCESS;
+               }
+
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + 2);
+               FAIL_IF(!inst);
+               INC_SIZE(2);
+               *inst++ = REX_B;
+               POP_REG(reg_lmap[dst]);
+               return SLJIT_SUCCESS;
+       }
+
+       /* REX_W is not necessary (src is not immediate). */
+       compiler->mode32 = 1;
+       inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
+       FAIL_IF(!inst);
+       *inst++ = POP_rm;
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw)
+{
+       sljit_ub *inst;
+
+       CHECK_ERROR();
+       check_sljit_emit_fast_return(compiler, src, srcw);
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       if ((src & SLJIT_IMM) && NOT_HALFWORD(srcw)) {
+               FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw));
+               src = TMP_REG1;
+       }
+
+       if (FAST_IS_REG(src)) {
+               if (reg_map[src] < 8) {
+                       inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 1);
+                       FAIL_IF(!inst);
+
+                       INC_SIZE(1 + 1);
+                       PUSH_REG(reg_lmap[src]);
+               }
+               else {
+                       inst = (sljit_ub*)ensure_buf(compiler, 1 + 2 + 1);
+                       FAIL_IF(!inst);
+
+                       INC_SIZE(2 + 1);
+                       *inst++ = REX_B;
+                       PUSH_REG(reg_lmap[src]);
+               }
+       }
+       else if (src & SLJIT_MEM) {
+               /* REX_W is not necessary (src is not immediate). */
+               compiler->mode32 = 1;
+               inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
+               FAIL_IF(!inst);
+               *inst++ = GROUP_FF;
+               *inst |= PUSH_rm;
+
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
+               FAIL_IF(!inst);
+               INC_SIZE(1);
+       }
+       else {
+               SLJIT_ASSERT(IS_HALFWORD(srcw));
+               /* SLJIT_IMM. */
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + 5 + 1);
+               FAIL_IF(!inst);
+
+               INC_SIZE(5 + 1);
+               *inst++ = PUSH_i32;
+               *(sljit_si*)inst = srcw;
+               inst += sizeof(sljit_si);
+       }
+
+       RET();
+       return SLJIT_SUCCESS;
+}
+
+
+/* --------------------------------------------------------------------- */
+/*  Extend input                                                         */
+/* --------------------------------------------------------------------- */
+
+static sljit_si emit_mov_int(struct sljit_compiler *compiler, sljit_si sign,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       sljit_ub* inst;
+       sljit_si dst_r;
+
+       compiler->mode32 = 0;
+
+       if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
+               return SLJIT_SUCCESS; /* Empty instruction. */
+
+       if (src & SLJIT_IMM) {
+               if (FAST_IS_REG(dst)) {
+                       if (sign || ((sljit_uw)srcw <= 0x7fffffff)) {
+                               inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_si)srcw, dst, dstw);
+                               FAIL_IF(!inst);
+                               *inst = MOV_rm_i32;
+                               return SLJIT_SUCCESS;
+                       }
+                       return emit_load_imm64(compiler, dst, srcw);
+               }
+               compiler->mode32 = 1;
+               inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_si)srcw, dst, dstw);
+               FAIL_IF(!inst);
+               *inst = MOV_rm_i32;
+               compiler->mode32 = 0;
+               return SLJIT_SUCCESS;
+       }
+
+       dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
+
+       if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
+               dst_r = src;
+       else {
+               if (sign) {
+                       inst = emit_x86_instruction(compiler, 1, dst_r, 0, src, srcw);
+                       FAIL_IF(!inst);
+                       *inst++ = MOVSXD_r_rm;
+               } else {
+                       compiler->mode32 = 1;
+                       FAIL_IF(emit_mov(compiler, dst_r, 0, src, srcw));
+                       compiler->mode32 = 0;
+               }
+       }
+
+       if (dst & SLJIT_MEM) {
+               compiler->mode32 = 1;
+               inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
+               FAIL_IF(!inst);
+               *inst = MOV_rm_r;
+               compiler->mode32 = 0;
+       }
+
+       return SLJIT_SUCCESS;
+}
diff --git a/ext/pcre/pcrelib/sljit/sljitNativeX86_common.c b/ext/pcre/pcrelib/sljit/sljitNativeX86_common.c
new file mode 100644 (file)
index 0000000..653705f
--- /dev/null
@@ -0,0 +1,2883 @@
+/*
+ *    Stack-less Just-In-Time compiler
+ *
+ *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice, this list of
+ *      conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *      of conditions and the following disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
+{
+       return "x86" SLJIT_CPUINFO;
+}
+
+/*
+   32b register indexes:
+     0 - EAX
+     1 - ECX
+     2 - EDX
+     3 - EBX
+     4 - none
+     5 - EBP
+     6 - ESI
+     7 - EDI
+*/
+
+/*
+   64b register indexes:
+     0 - RAX
+     1 - RCX
+     2 - RDX
+     3 - RBX
+     4 - none
+     5 - RBP
+     6 - RSI
+     7 - RDI
+     8 - R8   - From now on REX prefix is required
+     9 - R9
+    10 - R10
+    11 - R11
+    12 - R12
+    13 - R13
+    14 - R14
+    15 - R15
+*/
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+
+/* Last register + 1. */
+#define TMP_REG1       (SLJIT_NO_REGISTERS + 1)
+
+static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 2] = {
+       0, 0, 2, 1, 0, 0, 3, 6, 7, 0, 0, 4, 5
+};
+
+#define CHECK_EXTRA_REGS(p, w, do) \
+       if (p >= SLJIT_TEMPORARY_EREG1 && p <= SLJIT_TEMPORARY_EREG2) { \
+               w = compiler->scratches_start + (p - SLJIT_TEMPORARY_EREG1) * sizeof(sljit_sw); \
+               p = SLJIT_MEM1(SLJIT_LOCALS_REG); \
+               do; \
+       } \
+       else if (p >= SLJIT_SAVED_EREG1 && p <= SLJIT_SAVED_EREG2) { \
+               w = compiler->saveds_start + (p - SLJIT_SAVED_EREG1) * sizeof(sljit_sw); \
+               p = SLJIT_MEM1(SLJIT_LOCALS_REG); \
+               do; \
+       }
+
+#else /* SLJIT_CONFIG_X86_32 */
+
+/* Last register + 1. */
+#define TMP_REG1       (SLJIT_NO_REGISTERS + 1)
+#define TMP_REG2       (SLJIT_NO_REGISTERS + 2)
+#define TMP_REG3       (SLJIT_NO_REGISTERS + 3)
+
+/* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
+   Note: avoid to use r12 and r13 for memory addessing
+   therefore r12 is better for SAVED_EREG than SAVED_REG. */
+#ifndef _WIN64
+/* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */
+static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
+       0, 0, 6, 1, 8, 11, 3, 15, 14, 13, 12, 4, 2, 7, 9
+};
+/* low-map. reg_map & 0x7. */
+static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
+       0, 0, 6, 1, 0, 3,  3, 7,  6,  5,  4,  4, 2, 7, 1
+};
+#else
+/* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */
+static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
+       0, 0, 2, 1, 11, 13, 3, 6, 7, 14, 15, 4, 10, 8, 9
+};
+/* low-map. reg_map & 0x7. */
+static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
+       0, 0, 2, 1, 3,  5,  3, 6, 7,  6,  7, 4, 2,  0, 1
+};
+#endif
+
+#define REX_W          0x48
+#define REX_R          0x44
+#define REX_X          0x42
+#define REX_B          0x41
+#define REX            0x40
+
+#ifndef _WIN64
+#define HALFWORD_MAX 0x7fffffffl
+#define HALFWORD_MIN -0x80000000l
+#else
+#define HALFWORD_MAX 0x7fffffffll
+#define HALFWORD_MIN -0x80000000ll
+#endif
+
+#define IS_HALFWORD(x)         ((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN)
+#define NOT_HALFWORD(x)                ((x) > HALFWORD_MAX || (x) < HALFWORD_MIN)
+
+#define CHECK_EXTRA_REGS(p, w, do)
+
+#endif /* SLJIT_CONFIG_X86_32 */
+
+#if (defined SLJIT_SSE2 && SLJIT_SSE2)
+#define TMP_FREG       (0)
+#endif
+
+/* Size flags for emit_x86_instruction: */
+#define EX86_BIN_INS           0x0010
+#define EX86_SHIFT_INS         0x0020
+#define EX86_REX               0x0040
+#define EX86_NO_REXW           0x0080
+#define EX86_BYTE_ARG          0x0100
+#define EX86_HALF_ARG          0x0200
+#define EX86_PREF_66           0x0400
+
+#if (defined SLJIT_SSE2 && SLJIT_SSE2)
+#define EX86_SSE2              0x0800
+#define EX86_PREF_F2           0x1000
+#define EX86_PREF_F3           0x2000
+#endif
+
+/* --------------------------------------------------------------------- */
+/*  Instrucion forms                                                     */
+/* --------------------------------------------------------------------- */
+
+#define ADD            (/* BINARY */ 0 << 3)
+#define ADD_EAX_i32    0x05
+#define ADD_r_rm       0x03
+#define ADD_rm_r       0x01
+#define ADDSD_x_xm     0x58
+#define ADC            (/* BINARY */ 2 << 3)
+#define ADC_EAX_i32    0x15
+#define ADC_r_rm       0x13
+#define ADC_rm_r       0x11
+#define AND            (/* BINARY */ 4 << 3)
+#define AND_EAX_i32    0x25
+#define AND_r_rm       0x23
+#define AND_rm_r       0x21
+#define ANDPD_x_xm     0x54
+#define BSR_r_rm       (/* GROUP_0F */ 0xbd)
+#define CALL_i32       0xe8
+#define CALL_rm                (/* GROUP_FF */ 2 << 3)
+#define CDQ            0x99
+#define CMOVNE_r_rm    (/* GROUP_0F */ 0x45)
+#define CMP            (/* BINARY */ 7 << 3)
+#define CMP_EAX_i32    0x3d
+#define CMP_r_rm       0x3b
+#define CMP_rm_r       0x39
+#define DIV            (/* GROUP_F7 */ 6 << 3)
+#define DIVSD_x_xm     0x5e
+#define INT3           0xcc
+#define IDIV           (/* GROUP_F7 */ 7 << 3)
+#define IMUL           (/* GROUP_F7 */ 5 << 3)
+#define IMUL_r_rm      (/* GROUP_0F */ 0xaf)
+#define IMUL_r_rm_i8   0x6b
+#define IMUL_r_rm_i32  0x69
+#define JE_i8          0x74
+#define JMP_i8         0xeb
+#define JMP_i32                0xe9
+#define JMP_rm         (/* GROUP_FF */ 4 << 3)
+#define LEA_r_m                0x8d
+#define MOV_r_rm       0x8b
+#define MOV_r_i32      0xb8
+#define MOV_rm_r       0x89
+#define MOV_rm_i32     0xc7
+#define MOV_rm8_i8     0xc6
+#define MOV_rm8_r8     0x88
+#define MOVSD_x_xm     0x10
+#define MOVSD_xm_x     0x11
+#define MOVSXD_r_rm    0x63
+#define MOVSX_r_rm8    (/* GROUP_0F */ 0xbe)
+#define MOVSX_r_rm16   (/* GROUP_0F */ 0xbf)
+#define MOVZX_r_rm8    (/* GROUP_0F */ 0xb6)
+#define MOVZX_r_rm16   (/* GROUP_0F */ 0xb7)
+#define MUL            (/* GROUP_F7 */ 4 << 3)
+#define MULSD_x_xm     0x59
+#define NEG_rm         (/* GROUP_F7 */ 3 << 3)
+#define NOP            0x90
+#define NOT_rm         (/* GROUP_F7 */ 2 << 3)
+#define OR             (/* BINARY */ 1 << 3)
+#define OR_r_rm                0x0b
+#define OR_EAX_i32     0x0d
+#define OR_rm_r                0x09
+#define OR_rm8_r8      0x08
+#define POP_r          0x58
+#define POP_rm         0x8f
+#define POPF           0x9d
+#define PUSH_i32       0x68
+#define PUSH_r         0x50
+#define PUSH_rm                (/* GROUP_FF */ 6 << 3)
+#define PUSHF          0x9c
+#define RET_near       0xc3
+#define RET_i16                0xc2
+#define SBB            (/* BINARY */ 3 << 3)
+#define SBB_EAX_i32    0x1d
+#define SBB_r_rm       0x1b
+#define SBB_rm_r       0x19
+#define SAR            (/* SHIFT */ 7 << 3)
+#define SHL            (/* SHIFT */ 4 << 3)
+#define SHR            (/* SHIFT */ 5 << 3)
+#define SUB            (/* BINARY */ 5 << 3)
+#define SUB_EAX_i32    0x2d
+#define SUB_r_rm       0x2b
+#define SUB_rm_r       0x29
+#define SUBSD_x_xm     0x5c
+#define TEST_EAX_i32   0xa9
+#define TEST_rm_r      0x85
+#define UCOMISD_x_xm   0x2e
+#define XCHG_EAX_r     0x90
+#define XCHG_r_rm      0x87
+#define XOR            (/* BINARY */ 6 << 3)
+#define XOR_EAX_i32    0x35
+#define XOR_r_rm       0x33
+#define XOR_rm_r       0x31
+#define XORPD_x_xm     0x57
+
+#define GROUP_0F       0x0f
+#define GROUP_F7       0xf7
+#define GROUP_FF       0xff
+#define GROUP_BINARY_81        0x81
+#define GROUP_BINARY_83        0x83
+#define GROUP_SHIFT_1  0xd1
+#define GROUP_SHIFT_N  0xc1
+#define GROUP_SHIFT_CL 0xd3
+
+#define MOD_REG                0xc0
+#define MOD_DISP8      0x40
+
+#define INC_SIZE(s)                    (*inst++ = (s), compiler->size += (s))
+
+#define PUSH_REG(r)                    (*inst++ = (PUSH_r + (r)))
+#define POP_REG(r)                     (*inst++ = (POP_r + (r)))
+#define RET()                          (*inst++ = (RET_near))
+#define RET_I16(n)                     (*inst++ = (RET_i16), *inst++ = n, *inst++ = 0)
+/* r32, r/m32 */
+#define MOV_RM(mod, reg, rm)           (*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm))
+
+/* Multithreading does not affect these static variables, since they store
+   built-in CPU features. Therefore they can be overwritten by different threads
+   if they detect the CPU features in the same time. */
+#if (defined SLJIT_SSE2 && SLJIT_SSE2) && (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
+static sljit_si cpu_has_sse2 = -1;
+#endif
+static sljit_si cpu_has_cmov = -1;
+
+#if defined(_MSC_VER) && _MSC_VER >= 1400
+#include <intrin.h>
+#endif
+
+static void get_cpu_features(void)
+{
+       sljit_ui features;
+
+#if defined(_MSC_VER) && _MSC_VER >= 1400
+
+       int CPUInfo[4];
+       __cpuid(CPUInfo, 1);
+       features = (sljit_ui)CPUInfo[3];
+
+#elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C)
+
+       /* AT&T syntax. */
+       __asm__ (
+               "movl $0x1, %%eax\n"
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+               /* On x86-32, there is no red zone, so this
+                  should work (no need for a local variable). */
+               "push %%ebx\n"
+#endif
+               "cpuid\n"
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+               "pop %%ebx\n"
+#endif
+               "movl %%edx, %0\n"
+               : "=g" (features)
+               :
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+               : "%eax", "%ecx", "%edx"
+#else
+               : "%rax", "%rbx", "%rcx", "%rdx"
+#endif
+       );
+
+#else /* _MSC_VER && _MSC_VER >= 1400 */
+
+       /* Intel syntax. */
+       __asm {
+               mov eax, 1
+               cpuid
+               mov features, edx
+       }
+
+#endif /* _MSC_VER && _MSC_VER >= 1400 */
+
+#if (defined SLJIT_SSE2 && SLJIT_SSE2) && (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
+       cpu_has_sse2 = (features >> 26) & 0x1;
+#endif
+       cpu_has_cmov = (features >> 15) & 0x1;
+}
+
+static sljit_ub get_jump_code(sljit_si type)
+{
+       switch (type) {
+       case SLJIT_C_EQUAL:
+       case SLJIT_C_FLOAT_EQUAL:
+               return 0x84 /* je */;
+
+       case SLJIT_C_NOT_EQUAL:
+       case SLJIT_C_FLOAT_NOT_EQUAL:
+               return 0x85 /* jne */;
+
+       case SLJIT_C_LESS:
+       case SLJIT_C_FLOAT_LESS:
+               return 0x82 /* jc */;
+
+       case SLJIT_C_GREATER_EQUAL:
+       case SLJIT_C_FLOAT_GREATER_EQUAL:
+               return 0x83 /* jae */;
+
+       case SLJIT_C_GREATER:
+       case SLJIT_C_FLOAT_GREATER:
+               return 0x87 /* jnbe */;
+
+       case SLJIT_C_LESS_EQUAL:
+       case SLJIT_C_FLOAT_LESS_EQUAL:
+               return 0x86 /* jbe */;
+
+       case SLJIT_C_SIG_LESS:
+               return 0x8c /* jl */;
+
+       case SLJIT_C_SIG_GREATER_EQUAL:
+               return 0x8d /* jnl */;
+
+       case SLJIT_C_SIG_GREATER:
+               return 0x8f /* jnle */;
+
+       case SLJIT_C_SIG_LESS_EQUAL:
+               return 0x8e /* jle */;
+
+       case SLJIT_C_OVERFLOW:
+       case SLJIT_C_MUL_OVERFLOW:
+               return 0x80 /* jo */;
+
+       case SLJIT_C_NOT_OVERFLOW:
+       case SLJIT_C_MUL_NOT_OVERFLOW:
+               return 0x81 /* jno */;
+
+       case SLJIT_C_FLOAT_UNORDERED:
+               return 0x8a /* jp */;
+
+       case SLJIT_C_FLOAT_ORDERED:
+               return 0x8b /* jpo */;
+       }
+       return 0;
+}
+
+static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_si type);
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_sw addr, sljit_si type);
+#endif
+
+static sljit_ub* generate_near_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_ub *code, sljit_si type)
+{
+       sljit_si short_jump;
+       sljit_uw label_addr;
+
+       if (jump->flags & JUMP_LABEL)
+               label_addr = (sljit_uw)(code + jump->u.label->size);
+       else
+               label_addr = jump->u.target;
+       short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127;
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+       if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN)
+               return generate_far_jump_code(jump, code_ptr, type);
+#endif
+
+       if (type == SLJIT_JUMP) {
+               if (short_jump)
+                       *code_ptr++ = JMP_i8;
+               else
+                       *code_ptr++ = JMP_i32;
+               jump->addr++;
+       }
+       else if (type >= SLJIT_FAST_CALL) {
+               short_jump = 0;
+               *code_ptr++ = CALL_i32;
+               jump->addr++;
+       }
+       else if (short_jump) {
+               *code_ptr++ = get_jump_code(type) - 0x10;
+               jump->addr++;
+       }
+       else {
+               *code_ptr++ = GROUP_0F;
+               *code_ptr++ = get_jump_code(type);
+               jump->addr += 2;
+       }
+
+       if (short_jump) {
+               jump->flags |= PATCH_MB;
+               code_ptr += sizeof(sljit_sb);
+       } else {
+               jump->flags |= PATCH_MW;
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+               code_ptr += sizeof(sljit_sw);
+#else
+               code_ptr += sizeof(sljit_si);
+#endif
+       }
+
+       return code_ptr;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
+{
+       struct sljit_memory_fragment *buf;
+       sljit_ub *code;
+       sljit_ub *code_ptr;
+       sljit_ub *buf_ptr;
+       sljit_ub *buf_end;
+       sljit_ub len;
+
+       struct sljit_label *label;
+       struct sljit_jump *jump;
+       struct sljit_const *const_;
+
+       CHECK_ERROR_PTR();
+       check_sljit_generate_code(compiler);
+       reverse_buf(compiler);
+
+       /* Second code generation pass. */
+       code = (sljit_ub*)SLJIT_MALLOC_EXEC(compiler->size);
+       PTR_FAIL_WITH_EXEC_IF(code);
+       buf = compiler->buf;
+
+       code_ptr = code;
+       label = compiler->labels;
+       jump = compiler->jumps;
+       const_ = compiler->consts;
+       do {
+               buf_ptr = buf->memory;
+               buf_end = buf_ptr + buf->used_size;
+               do {
+                       len = *buf_ptr++;
+                       if (len > 0) {
+                               /* The code is already generated. */
+                               SLJIT_MEMMOVE(code_ptr, buf_ptr, len);
+                               code_ptr += len;
+                               buf_ptr += len;
+                       }
+                       else {
+                               if (*buf_ptr >= 4) {
+                                       jump->addr = (sljit_uw)code_ptr;
+                                       if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
+                                               code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 4);
+                                       else
+                                               code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 4);
+                                       jump = jump->next;
+                               }
+                               else if (*buf_ptr == 0) {
+                                       label->addr = (sljit_uw)code_ptr;
+                                       label->size = code_ptr - code;
+                                       label = label->next;
+                               }
+                               else if (*buf_ptr == 1) {
+                                       const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw);
+                                       const_ = const_->next;
+                               }
+                               else {
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+                                       *code_ptr++ = (*buf_ptr == 2) ? CALL_i32 : JMP_i32;
+                                       buf_ptr++;
+                                       *(sljit_sw*)code_ptr = *(sljit_sw*)buf_ptr - ((sljit_sw)code_ptr + sizeof(sljit_sw));
+                                       code_ptr += sizeof(sljit_sw);
+                                       buf_ptr += sizeof(sljit_sw) - 1;
+#else
+                                       code_ptr = generate_fixed_jump(code_ptr, *(sljit_sw*)(buf_ptr + 1), *buf_ptr);
+                                       buf_ptr += sizeof(sljit_sw);
+#endif
+                               }
+                               buf_ptr++;
+                       }
+               } while (buf_ptr < buf_end);
+               SLJIT_ASSERT(buf_ptr == buf_end);
+               buf = buf->next;
+       } while (buf);
+
+       SLJIT_ASSERT(!label);
+       SLJIT_ASSERT(!jump);
+       SLJIT_ASSERT(!const_);
+
+       jump = compiler->jumps;
+       while (jump) {
+               if (jump->flags & PATCH_MB) {
+                       SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))) <= 127);
+                       *(sljit_ub*)jump->addr = (sljit_ub)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb)));
+               } else if (jump->flags & PATCH_MW) {
+                       if (jump->flags & JUMP_LABEL) {
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+                               *(sljit_sw*)jump->addr = (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sw)));
+#else
+                               SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))) <= HALFWORD_MAX);
+                               *(sljit_si*)jump->addr = (sljit_si)(jump->u.label->addr - (jump->addr + sizeof(sljit_si)));
+#endif
+                       }
+                       else {
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+                               *(sljit_sw*)jump->addr = (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_sw)));
+#else
+                               SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_si))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_si))) <= HALFWORD_MAX);
+                               *(sljit_si*)jump->addr = (sljit_si)(jump->u.target - (jump->addr + sizeof(sljit_si)));
+#endif
+                       }
+               }
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+               else if (jump->flags & PATCH_MD)
+                       *(sljit_sw*)jump->addr = jump->u.label->addr;
+#endif
+
+               jump = jump->next;
+       }
+
+       /* Maybe we waste some space because of short jumps. */
+       SLJIT_ASSERT(code_ptr <= code + compiler->size);
+       compiler->error = SLJIT_ERR_COMPILED;
+       compiler->executable_size = code_ptr - code;
+       return (void*)code;
+}
+
+/* --------------------------------------------------------------------- */
+/*  Operators                                                            */
+/* --------------------------------------------------------------------- */
+
+static sljit_si emit_cum_binary(struct sljit_compiler *compiler,
+       sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w);
+
+static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler,
+       sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w);
+
+static sljit_si emit_mov(struct sljit_compiler *compiler,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw);
+
+static SLJIT_INLINE sljit_si emit_save_flags(struct sljit_compiler *compiler)
+{
+       sljit_ub *inst;
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+       inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
+       FAIL_IF(!inst);
+       INC_SIZE(5);
+#else
+       inst = (sljit_ub*)ensure_buf(compiler, 1 + 6);
+       FAIL_IF(!inst);
+       INC_SIZE(6);
+       *inst++ = REX_W;
+#endif
+       *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp + sizeof(sljit_sw)] */
+       *inst++ = 0x64;
+       *inst++ = 0x24;
+       *inst++ = (sljit_ub)sizeof(sljit_sw);
+       *inst++ = PUSHF;
+       compiler->flags_saved = 1;
+       return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_si emit_restore_flags(struct sljit_compiler *compiler, sljit_si keep_flags)
+{
+       sljit_ub *inst;
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+       inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
+       FAIL_IF(!inst);
+       INC_SIZE(5);
+       *inst++ = POPF;
+#else
+       inst = (sljit_ub*)ensure_buf(compiler, 1 + 6);
+       FAIL_IF(!inst);
+       INC_SIZE(6);
+       *inst++ = POPF;
+       *inst++ = REX_W;
+#endif
+       *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp - sizeof(sljit_sw)] */
+       *inst++ = 0x64;
+       *inst++ = 0x24;
+       *inst++ = (sljit_ub)-(sljit_sb)sizeof(sljit_sw);
+       compiler->flags_saved = keep_flags;
+       return SLJIT_SUCCESS;
+}
+
+#ifdef _WIN32
+#include <malloc.h>
+
+static void SLJIT_CALL sljit_grow_stack(sljit_sw local_size)
+{
+       /* Workaround for calling the internal _chkstk() function on Windows.
+       This function touches all 4k pages belongs to the requested stack space,
+       which size is passed in local_size. This is necessary on Windows where
+       the stack can only grow in 4k steps. However, this function just burn
+       CPU cycles if the stack is large enough. However, you don't know it in
+       advance, so it must always be called. I think this is a bad design in
+       general even if it has some reasons. */
+       *(volatile sljit_si*)alloca(local_size) = 0;
+}
+
+#endif
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+#include "sljitNativeX86_32.c"
+#else
+#include "sljitNativeX86_64.c"
+#endif
+
+static sljit_si emit_mov(struct sljit_compiler *compiler,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       sljit_ub* inst;
+
+       if (dst == SLJIT_UNUSED) {
+               /* No destination, doesn't need to setup flags. */
+               if (src & SLJIT_MEM) {
+                       inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
+                       FAIL_IF(!inst);
+                       *inst = MOV_r_rm;
+               }
+               return SLJIT_SUCCESS;
+       }
+       if (FAST_IS_REG(src)) {
+               inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
+               FAIL_IF(!inst);
+               *inst = MOV_rm_r;
+               return SLJIT_SUCCESS;
+       }
+       if (src & SLJIT_IMM) {
+               if (FAST_IS_REG(dst)) {
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+                       return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
+#else
+                       if (!compiler->mode32) {
+                               if (NOT_HALFWORD(srcw))
+                                       return emit_load_imm64(compiler, dst, srcw);
+                       }
+                       else
+                               return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, MOV_r_i32 + reg_lmap[dst], srcw);
+#endif
+               }
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+               if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
+                       FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw));
+                       inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw);
+                       FAIL_IF(!inst);
+                       *inst = MOV_rm_r;
+                       return SLJIT_SUCCESS;
+               }
+#endif
+               inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
+               FAIL_IF(!inst);
+               *inst = MOV_rm_i32;
+               return SLJIT_SUCCESS;
+       }
+       if (FAST_IS_REG(dst)) {
+               inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
+               FAIL_IF(!inst);
+               *inst = MOV_r_rm;
+               return SLJIT_SUCCESS;
+       }
+
+       /* Memory to memory move. Requires two instruction. */
+       inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
+       FAIL_IF(!inst);
+       *inst = MOV_r_rm;
+       inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
+       FAIL_IF(!inst);
+       *inst = MOV_rm_r;
+       return SLJIT_SUCCESS;
+}
+
+#define EMIT_MOV(compiler, dst, dstw, src, srcw) \
+       FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op)
+{
+       sljit_ub *inst;
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+       sljit_si size;
+#endif
+
+       CHECK_ERROR();
+       check_sljit_emit_op0(compiler, op);
+
+       switch (GET_OPCODE(op)) {
+       case SLJIT_BREAKPOINT:
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
+               FAIL_IF(!inst);
+               INC_SIZE(1);
+               *inst = INT3;
+               break;
+       case SLJIT_NOP:
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
+               FAIL_IF(!inst);
+               INC_SIZE(1);
+               *inst = NOP;
+               break;
+       case SLJIT_UMUL:
+       case SLJIT_SMUL:
+       case SLJIT_UDIV:
+       case SLJIT_SDIV:
+               compiler->flags_saved = 0;
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+#ifdef _WIN64
+               SLJIT_COMPILE_ASSERT(
+                       reg_map[SLJIT_SCRATCH_REG1] == 0
+                       && reg_map[SLJIT_SCRATCH_REG2] == 2
+                       && reg_map[TMP_REG1] > 7,
+                       invalid_register_assignment_for_div_mul);
+#else
+               SLJIT_COMPILE_ASSERT(
+                       reg_map[SLJIT_SCRATCH_REG1] == 0
+                       && reg_map[SLJIT_SCRATCH_REG2] < 7
+                       && reg_map[TMP_REG1] == 2,
+                       invalid_register_assignment_for_div_mul);
+#endif
+               compiler->mode32 = op & SLJIT_INT_OP;
+#endif
+
+               op = GET_OPCODE(op);
+               if (op == SLJIT_UDIV) {
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
+                       EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_SCRATCH_REG2, 0);
+                       inst = emit_x86_instruction(compiler, 1, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0);
+#else
+                       inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
+#endif
+                       FAIL_IF(!inst);
+                       *inst = XOR_r_rm;
+               }
+
+               if (op == SLJIT_SDIV) {
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
+                       EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_SCRATCH_REG2, 0);
+#endif
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+                       inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
+                       FAIL_IF(!inst);
+                       INC_SIZE(1);
+                       *inst = CDQ;
+#else
+                       if (compiler->mode32) {
+                               inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
+                               FAIL_IF(!inst);
+                               INC_SIZE(1);
+                               *inst = CDQ;
+                       } else {
+                               inst = (sljit_ub*)ensure_buf(compiler, 1 + 2);
+                               FAIL_IF(!inst);
+                               INC_SIZE(2);
+                               *inst++ = REX_W;
+                               *inst = CDQ;
+                       }
+#endif
+               }
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + 2);
+               FAIL_IF(!inst);
+               INC_SIZE(2);
+               *inst++ = GROUP_F7;
+               *inst = MOD_REG | ((op >= SLJIT_UDIV) ? reg_map[TMP_REG1] : reg_map[SLJIT_SCRATCH_REG2]);
+#else
+#ifdef _WIN64
+               size = (!compiler->mode32 || op >= SLJIT_UDIV) ? 3 : 2;
+#else
+               size = (!compiler->mode32) ? 3 : 2;
+#endif
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
+               FAIL_IF(!inst);
+               INC_SIZE(size);
+#ifdef _WIN64
+               if (!compiler->mode32)
+                       *inst++ = REX_W | ((op >= SLJIT_UDIV) ? REX_B : 0);
+               else if (op >= SLJIT_UDIV)
+                       *inst++ = REX_B;
+               *inst++ = GROUP_F7;
+               *inst = MOD_REG | ((op >= SLJIT_UDIV) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_SCRATCH_REG2]);
+#else
+               if (!compiler->mode32)
+                       *inst++ = REX_W;
+               *inst++ = GROUP_F7;
+               *inst = MOD_REG | reg_map[SLJIT_SCRATCH_REG2];
+#endif
+#endif
+               switch (op) {
+               case SLJIT_UMUL:
+                       *inst |= MUL;
+                       break;
+               case SLJIT_SMUL:
+                       *inst |= IMUL;
+                       break;
+               case SLJIT_UDIV:
+                       *inst |= DIV;
+                       break;
+               case SLJIT_SDIV:
+                       *inst |= IDIV;
+                       break;
+               }
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
+               EMIT_MOV(compiler, SLJIT_SCRATCH_REG2, 0, TMP_REG1, 0);
+#endif
+               break;
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+#define ENCODE_PREFIX(prefix) \
+       do { \
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); \
+               FAIL_IF(!inst); \
+               INC_SIZE(1); \
+               *inst = (prefix); \
+       } while (0)
+
+static sljit_si emit_mov_byte(struct sljit_compiler *compiler, sljit_si sign,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       sljit_ub* inst;
+       sljit_si dst_r;
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+       sljit_si work_r;
+#endif
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+       compiler->mode32 = 0;
+#endif
+
+       if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
+               return SLJIT_SUCCESS; /* Empty instruction. */
+
+       if (src & SLJIT_IMM) {
+               if (FAST_IS_REG(dst)) {
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+                       return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
+#else
+                       inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
+                       FAIL_IF(!inst);
+                       *inst = MOV_rm_i32;
+                       return SLJIT_SUCCESS;
+#endif
+               }
+               inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
+               FAIL_IF(!inst);
+               *inst = MOV_rm8_i8;
+               return SLJIT_SUCCESS;
+       }
+
+       dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
+
+       if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) {
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+               if (reg_map[src] >= 4) {
+                       SLJIT_ASSERT(dst_r == TMP_REG1);
+                       EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
+               } else
+                       dst_r = src;
+#else
+               dst_r = src;
+#endif
+       }
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+       else if (FAST_IS_REG(src) && reg_map[src] >= 4) {
+               /* src, dst are registers. */
+               SLJIT_ASSERT(SLOW_IS_REG(dst));
+               if (reg_map[dst] < 4) {
+                       if (dst != src)
+                               EMIT_MOV(compiler, dst, 0, src, 0);
+                       inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
+                       FAIL_IF(!inst);
+                       *inst++ = GROUP_0F;
+                       *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
+               }
+               else {
+                       if (dst != src)
+                               EMIT_MOV(compiler, dst, 0, src, 0);
+                       if (sign) {
+                               /* shl reg, 24 */
+                               inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
+                               FAIL_IF(!inst);
+                               *inst |= SHL;
+                               /* sar reg, 24 */
+                               inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
+                               FAIL_IF(!inst);
+                               *inst |= SAR;
+                       }
+                       else {
+                               inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0);
+                               FAIL_IF(!inst);
+                               *(inst + 1) |= AND;
+                       }
+               }
+               return SLJIT_SUCCESS;
+       }
+#endif
+       else {
+               /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
+               inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
+               FAIL_IF(!inst);
+               *inst++ = GROUP_0F;
+               *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
+       }
+
+       if (dst & SLJIT_MEM) {
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+               if (dst_r == TMP_REG1) {
+                       /* Find a non-used register, whose reg_map[src] < 4. */
+                       if ((dst & REG_MASK) == SLJIT_SCRATCH_REG1) {
+                               if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SCRATCH_REG2))
+                                       work_r = SLJIT_SCRATCH_REG3;
+                               else
+                                       work_r = SLJIT_SCRATCH_REG2;
+                       }
+                       else {
+                               if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_SCRATCH_REG1))
+                                       work_r = SLJIT_SCRATCH_REG1;
+                               else if ((dst & REG_MASK) == SLJIT_SCRATCH_REG2)
+                                       work_r = SLJIT_SCRATCH_REG3;
+                               else
+                                       work_r = SLJIT_SCRATCH_REG2;
+                       }
+
+                       if (work_r == SLJIT_SCRATCH_REG1) {
+                               ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
+                       }
+                       else {
+                               inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
+                               FAIL_IF(!inst);
+                               *inst = XCHG_r_rm;
+                       }
+
+                       inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
+                       FAIL_IF(!inst);
+                       *inst = MOV_rm8_r8;
+
+                       if (work_r == SLJIT_SCRATCH_REG1) {
+                               ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
+                       }
+                       else {
+                               inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
+                               FAIL_IF(!inst);
+                               *inst = XCHG_r_rm;
+                       }
+               }
+               else {
+                       inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
+                       FAIL_IF(!inst);
+                       *inst = MOV_rm8_r8;
+               }
+#else
+               inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
+               FAIL_IF(!inst);
+               *inst = MOV_rm8_r8;
+#endif
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+static sljit_si emit_mov_half(struct sljit_compiler *compiler, sljit_si sign,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       sljit_ub* inst;
+       sljit_si dst_r;
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+       compiler->mode32 = 0;
+#endif
+
+       if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
+               return SLJIT_SUCCESS; /* Empty instruction. */
+
+       if (src & SLJIT_IMM) {
+               if (FAST_IS_REG(dst)) {
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+                       return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
+#else
+                       inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
+                       FAIL_IF(!inst);
+                       *inst = MOV_rm_i32;
+                       return SLJIT_SUCCESS;
+#endif
+               }
+               inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
+               FAIL_IF(!inst);
+               *inst = MOV_rm_i32;
+               return SLJIT_SUCCESS;
+       }
+
+       dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
+
+       if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
+               dst_r = src;
+       else {
+               inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
+               FAIL_IF(!inst);
+               *inst++ = GROUP_0F;
+               *inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16;
+       }
+
+       if (dst & SLJIT_MEM) {
+               inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
+               FAIL_IF(!inst);
+               *inst = MOV_rm_r;
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+static sljit_si emit_unary(struct sljit_compiler *compiler, sljit_ub opcode,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       sljit_ub* inst;
+
+       if (dst == SLJIT_UNUSED) {
+               EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
+               inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
+               FAIL_IF(!inst);
+               *inst++ = GROUP_F7;
+               *inst |= opcode;
+               return SLJIT_SUCCESS;
+       }
+       if (dst == src && dstw == srcw) {
+               /* Same input and output */
+               inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
+               FAIL_IF(!inst);
+               *inst++ = GROUP_F7;
+               *inst |= opcode;
+               return SLJIT_SUCCESS;
+       }
+       if (FAST_IS_REG(dst)) {
+               EMIT_MOV(compiler, dst, 0, src, srcw);
+               inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
+               FAIL_IF(!inst);
+               *inst++ = GROUP_F7;
+               *inst |= opcode;
+               return SLJIT_SUCCESS;
+       }
+       EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
+       inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
+       FAIL_IF(!inst);
+       *inst++ = GROUP_F7;
+       *inst |= opcode;
+       EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
+       return SLJIT_SUCCESS;
+}
+
+static sljit_si emit_not_with_flags(struct sljit_compiler *compiler,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       sljit_ub* inst;
+
+       if (dst == SLJIT_UNUSED) {
+               EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
+               inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
+               FAIL_IF(!inst);
+               *inst++ = GROUP_F7;
+               *inst |= NOT_rm;
+               inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
+               FAIL_IF(!inst);
+               *inst = OR_r_rm;
+               return SLJIT_SUCCESS;
+       }
+       if (FAST_IS_REG(dst)) {
+               EMIT_MOV(compiler, dst, 0, src, srcw);
+               inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
+               FAIL_IF(!inst);
+               *inst++ = GROUP_F7;
+               *inst |= NOT_rm;
+               inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
+               FAIL_IF(!inst);
+               *inst = OR_r_rm;
+               return SLJIT_SUCCESS;
+       }
+       EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
+       inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
+       FAIL_IF(!inst);
+       *inst++ = GROUP_F7;
+       *inst |= NOT_rm;
+       inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
+       FAIL_IF(!inst);
+       *inst = OR_r_rm;
+       EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
+       return SLJIT_SUCCESS;
+}
+
+static sljit_si emit_clz(struct sljit_compiler *compiler, sljit_si op_flags,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       sljit_ub* inst;
+       sljit_si dst_r;
+
+       SLJIT_UNUSED_ARG(op_flags);
+       if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
+               /* Just set the zero flag. */
+               EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
+               inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
+               FAIL_IF(!inst);
+               *inst++ = GROUP_F7;
+               *inst |= NOT_rm;
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+               inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REG1, 0);
+#else
+               inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, TMP_REG1, 0);
+#endif
+               FAIL_IF(!inst);
+               *inst |= SHR;
+               return SLJIT_SUCCESS;
+       }
+
+       if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
+               EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
+               src = TMP_REG1;
+               srcw = 0;
+       }
+
+       inst = emit_x86_instruction(compiler, 2, TMP_REG1, 0, src, srcw);
+       FAIL_IF(!inst);
+       *inst++ = GROUP_0F;
+       *inst = BSR_r_rm;
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+       if (FAST_IS_REG(dst))
+               dst_r = dst;
+       else {
+               /* Find an unused temporary register. */
+               if ((dst & REG_MASK) != SLJIT_SCRATCH_REG1 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_SCRATCH_REG1))
+                       dst_r = SLJIT_SCRATCH_REG1;
+               else if ((dst & REG_MASK) != SLJIT_SCRATCH_REG2 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_SCRATCH_REG2))
+                       dst_r = SLJIT_SCRATCH_REG2;
+               else
+                       dst_r = SLJIT_SCRATCH_REG3;
+               EMIT_MOV(compiler, dst, dstw, dst_r, 0);
+       }
+       EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31);
+#else
+       dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
+       compiler->mode32 = 0;
+       EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 64 + 63 : 32 + 31);
+       compiler->mode32 = op_flags & SLJIT_INT_OP;
+#endif
+
+       if (cpu_has_cmov == -1)
+               get_cpu_features();
+
+       if (cpu_has_cmov) {
+               inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);
+               FAIL_IF(!inst);
+               *inst++ = GROUP_0F;
+               *inst = CMOVNE_r_rm;
+       } else {
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
+               FAIL_IF(!inst);
+               INC_SIZE(4);
+
+               *inst++ = JE_i8;
+               *inst++ = 2;
+               *inst++ = MOV_r_rm;
+               *inst++ = MOD_REG | (reg_map[dst_r] << 3) | reg_map[TMP_REG1];
+#else
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
+               FAIL_IF(!inst);
+               INC_SIZE(5);
+
+               *inst++ = JE_i8;
+               *inst++ = 3;
+               *inst++ = REX_W | (reg_map[dst_r] >= 8 ? REX_R : 0) | (reg_map[TMP_REG1] >= 8 ? REX_B : 0);
+               *inst++ = MOV_r_rm;
+               *inst++ = MOD_REG | (reg_lmap[dst_r] << 3) | reg_lmap[TMP_REG1];
+#endif
+       }
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+       inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
+#else
+       inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, dst_r, 0);
+#endif
+       FAIL_IF(!inst);
+       *(inst + 1) |= XOR;
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+       if (dst & SLJIT_MEM) {
+               inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
+               FAIL_IF(!inst);
+               *inst = XCHG_r_rm;
+       }
+#else
+       if (dst & SLJIT_MEM)
+               EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0);
+#endif
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       sljit_ub* inst;
+       sljit_si update = 0;
+       sljit_si op_flags = GET_ALL_FLAGS(op);
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+       sljit_si dst_is_ereg = 0;
+       sljit_si src_is_ereg = 0;
+#else
+#      define src_is_ereg 0
+#endif
+
+       CHECK_ERROR();
+       check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
+       CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1);
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+       compiler->mode32 = op_flags & SLJIT_INT_OP;
+#endif
+
+       op = GET_OPCODE(op);
+       if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) {
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+               compiler->mode32 = 0;
+#endif
+
+               if (op_flags & SLJIT_INT_OP) {
+                       if (FAST_IS_REG(src) && src == dst) {
+                               if (!TYPE_CAST_NEEDED(op))
+                                       return SLJIT_SUCCESS;
+                       }
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+                       if (op == SLJIT_MOV_SI && (src & SLJIT_MEM))
+                               op = SLJIT_MOV_UI;
+                       if (op == SLJIT_MOVU_SI && (src & SLJIT_MEM))
+                               op = SLJIT_MOVU_UI;
+                       if (op == SLJIT_MOV_UI && (src & SLJIT_IMM))
+                               op = SLJIT_MOV_SI;
+                       if (op == SLJIT_MOVU_UI && (src & SLJIT_IMM))
+                               op = SLJIT_MOVU_SI;
+#endif
+               }
+
+               SLJIT_COMPILE_ASSERT(SLJIT_MOV + 8 == SLJIT_MOVU, movu_offset);
+               if (op >= SLJIT_MOVU) {
+                       update = 1;
+                       op -= 8;
+               }
+
+               if (src & SLJIT_IMM) {
+                       switch (op) {
+                       case SLJIT_MOV_UB:
+                               srcw = (sljit_ub)srcw;
+                               break;
+                       case SLJIT_MOV_SB:
+                               srcw = (sljit_sb)srcw;
+                               break;
+                       case SLJIT_MOV_UH:
+                               srcw = (sljit_uh)srcw;
+                               break;
+                       case SLJIT_MOV_SH:
+                               srcw = (sljit_sh)srcw;
+                               break;
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+                       case SLJIT_MOV_UI:
+                               srcw = (sljit_ui)srcw;
+                               break;
+                       case SLJIT_MOV_SI:
+                               srcw = (sljit_si)srcw;
+                               break;
+#endif
+                       }
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+                       if (SLJIT_UNLIKELY(dst_is_ereg))
+                               return emit_mov(compiler, dst, dstw, src, srcw);
+#endif
+               }
+
+               if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & REG_MASK) && (srcw != 0 || (src & OFFS_REG_MASK) != 0)) {
+                       inst = emit_x86_instruction(compiler, 1, src & REG_MASK, 0, src, srcw);
+                       FAIL_IF(!inst);
+                       *inst = LEA_r_m;
+                       src &= SLJIT_MEM | 0xf;
+                       srcw = 0;
+               }
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+               if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) {
+                       SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_LOCALS_REG));
+                       dst = TMP_REG1;
+               }
+#endif
+
+               switch (op) {
+               case SLJIT_MOV:
+               case SLJIT_MOV_P:
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+               case SLJIT_MOV_UI:
+               case SLJIT_MOV_SI:
+#endif
+                       FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
+                       break;
+               case SLJIT_MOV_UB:
+                       FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw));
+                       break;
+               case SLJIT_MOV_SB:
+                       FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw));
+                       break;
+               case SLJIT_MOV_UH:
+                       FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw));
+                       break;
+               case SLJIT_MOV_SH:
+                       FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw));
+                       break;
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+               case SLJIT_MOV_UI:
+                       FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw));
+                       break;
+               case SLJIT_MOV_SI:
+                       FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw));
+                       break;
+#endif
+               }
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+               if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1)
+                       return emit_mov(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), dstw, TMP_REG1, 0);
+#endif
+
+               if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & REG_MASK) && (dstw != 0 || (dst & OFFS_REG_MASK) != 0)) {
+                       inst = emit_x86_instruction(compiler, 1, dst & REG_MASK, 0, dst, dstw);
+                       FAIL_IF(!inst);
+                       *inst = LEA_r_m;
+               }
+               return SLJIT_SUCCESS;
+       }
+
+       if (SLJIT_UNLIKELY(GET_FLAGS(op_flags)))
+               compiler->flags_saved = 0;
+
+       switch (op) {
+       case SLJIT_NOT:
+               if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_E))
+                       return emit_not_with_flags(compiler, dst, dstw, src, srcw);
+               return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw);
+
+       case SLJIT_NEG:
+               if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
+                       FAIL_IF(emit_save_flags(compiler));
+               return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw);
+
+       case SLJIT_CLZ:
+               if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
+                       FAIL_IF(emit_save_flags(compiler));
+               return emit_clz(compiler, op_flags, dst, dstw, src, srcw);
+       }
+
+       return SLJIT_SUCCESS;
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+#      undef src_is_ereg
+#endif
+}
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+
+#define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
+       if (IS_HALFWORD(immw) || compiler->mode32) { \
+               inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
+               FAIL_IF(!inst); \
+               *(inst + 1) |= (op_imm); \
+       } \
+       else { \
+               FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \
+               inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \
+               FAIL_IF(!inst); \
+               *inst = (op_mr); \
+       }
+
+#define BINARY_EAX_IMM(op_eax_imm, immw) \
+       FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw))
+
+#else
+
+#define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
+       inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
+       FAIL_IF(!inst); \
+       *(inst + 1) |= (op_imm);
+
+#define BINARY_EAX_IMM(op_eax_imm, immw) \
+       FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw))
+
+#endif
+
+static sljit_si emit_cum_binary(struct sljit_compiler *compiler,
+       sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       sljit_ub* inst;
+
+       if (dst == SLJIT_UNUSED) {
+               EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
+               if (src2 & SLJIT_IMM) {
+                       BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
+               }
+               else {
+                       inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
+                       FAIL_IF(!inst);
+                       *inst = op_rm;
+               }
+               return SLJIT_SUCCESS;
+       }
+
+       if (dst == src1 && dstw == src1w) {
+               if (src2 & SLJIT_IMM) {
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+                       if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
+#else
+                       if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128)) {
+#endif
+                               BINARY_EAX_IMM(op_eax_imm, src2w);
+                       }
+                       else {
+                               BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
+                       }
+               }
+               else if (FAST_IS_REG(dst)) {
+                       inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
+                       FAIL_IF(!inst);
+                       *inst = op_rm;
+               }
+               else if (FAST_IS_REG(src2)) {
+                       /* Special exception for sljit_emit_op_flags. */
+                       inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
+                       FAIL_IF(!inst);
+                       *inst = op_mr;
+               }
+               else {
+                       EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
+                       inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
+                       FAIL_IF(!inst);
+                       *inst = op_mr;
+               }
+               return SLJIT_SUCCESS;
+       }
+
+       /* Only for cumulative operations. */
+       if (dst == src2 && dstw == src2w) {
+               if (src1 & SLJIT_IMM) {
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+                       if ((dst == SLJIT_SCRATCH_REG1) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
+#else
+                       if ((dst == SLJIT_SCRATCH_REG1) && (src1w > 127 || src1w < -128)) {
+#endif
+                               BINARY_EAX_IMM(op_eax_imm, src1w);
+                       }
+                       else {
+                               BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
+                       }
+               }
+               else if (FAST_IS_REG(dst)) {
+                       inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
+                       FAIL_IF(!inst);
+                       *inst = op_rm;
+               }
+               else if (FAST_IS_REG(src1)) {
+                       inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
+                       FAIL_IF(!inst);
+                       *inst = op_mr;
+               }
+               else {
+                       EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
+                       inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
+                       FAIL_IF(!inst);
+                       *inst = op_mr;
+               }
+               return SLJIT_SUCCESS;
+       }
+
+       /* General version. */
+       if (FAST_IS_REG(dst)) {
+               EMIT_MOV(compiler, dst, 0, src1, src1w);
+               if (src2 & SLJIT_IMM) {
+                       BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
+               }
+               else {
+                       inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
+                       FAIL_IF(!inst);
+                       *inst = op_rm;
+               }
+       }
+       else {
+               /* This version requires less memory writing. */
+               EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
+               if (src2 & SLJIT_IMM) {
+                       BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
+               }
+               else {
+                       inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
+                       FAIL_IF(!inst);
+                       *inst = op_rm;
+               }
+               EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler,
+       sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       sljit_ub* inst;
+
+       if (dst == SLJIT_UNUSED) {
+               EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
+               if (src2 & SLJIT_IMM) {
+                       BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
+               }
+               else {
+                       inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
+                       FAIL_IF(!inst);
+                       *inst = op_rm;
+               }
+               return SLJIT_SUCCESS;
+       }
+
+       if (dst == src1 && dstw == src1w) {
+               if (src2 & SLJIT_IMM) {
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+                       if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
+#else
+                       if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128)) {
+#endif
+                               BINARY_EAX_IMM(op_eax_imm, src2w);
+                       }
+                       else {
+                               BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
+                       }
+               }
+               else if (FAST_IS_REG(dst)) {
+                       inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
+                       FAIL_IF(!inst);
+                       *inst = op_rm;
+               }
+               else if (FAST_IS_REG(src2)) {
+                       inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
+                       FAIL_IF(!inst);
+                       *inst = op_mr;
+               }
+               else {
+                       EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
+                       inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
+                       FAIL_IF(!inst);
+                       *inst = op_mr;
+               }
+               return SLJIT_SUCCESS;
+       }
+
+       /* General version. */
+       if (FAST_IS_REG(dst) && dst != src2) {
+               EMIT_MOV(compiler, dst, 0, src1, src1w);
+               if (src2 & SLJIT_IMM) {
+                       BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
+               }
+               else {
+                       inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
+                       FAIL_IF(!inst);
+                       *inst = op_rm;
+               }
+       }
+       else {
+               /* This version requires less memory writing. */
+               EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
+               if (src2 & SLJIT_IMM) {
+                       BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
+               }
+               else {
+                       inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
+                       FAIL_IF(!inst);
+                       *inst = op_rm;
+               }
+               EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+static sljit_si emit_mul(struct sljit_compiler *compiler,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       sljit_ub* inst;
+       sljit_si dst_r;
+
+       dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
+
+       /* Register destination. */
+       if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
+               inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
+               FAIL_IF(!inst);
+               *inst++ = GROUP_0F;
+               *inst = IMUL_r_rm;
+       }
+       else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
+               inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
+               FAIL_IF(!inst);
+               *inst++ = GROUP_0F;
+               *inst = IMUL_r_rm;
+       }
+       else if (src1 & SLJIT_IMM) {
+               if (src2 & SLJIT_IMM) {
+                       EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
+                       src2 = dst_r;
+                       src2w = 0;
+               }
+
+               if (src1w <= 127 && src1w >= -128) {
+                       inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
+                       FAIL_IF(!inst);
+                       *inst = IMUL_r_rm_i8;
+                       inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
+                       FAIL_IF(!inst);
+                       INC_SIZE(1);
+                       *inst = (sljit_sb)src1w;
+               }
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+               else {
+                       inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
+                       FAIL_IF(!inst);
+                       *inst = IMUL_r_rm_i32;
+                       inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
+                       FAIL_IF(!inst);
+                       INC_SIZE(4);
+                       *(sljit_sw*)inst = src1w;
+               }
+#else
+               else if (IS_HALFWORD(src1w)) {
+                       inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
+                       FAIL_IF(!inst);
+                       *inst = IMUL_r_rm_i32;
+                       inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
+                       FAIL_IF(!inst);
+                       INC_SIZE(4);
+                       *(sljit_si*)inst = (sljit_si)src1w;
+               }
+               else {
+                       EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
+                       if (dst_r != src2)
+                               EMIT_MOV(compiler, dst_r, 0, src2, src2w);
+                       inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
+                       FAIL_IF(!inst);
+                       *inst++ = GROUP_0F;
+                       *inst = IMUL_r_rm;
+               }
+#endif
+       }
+       else if (src2 & SLJIT_IMM) {
+               /* Note: src1 is NOT immediate. */
+
+               if (src2w <= 127 && src2w >= -128) {
+                       inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
+                       FAIL_IF(!inst);
+                       *inst = IMUL_r_rm_i8;
+                       inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
+                       FAIL_IF(!inst);
+                       INC_SIZE(1);
+                       *inst = (sljit_sb)src2w;
+               }
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+               else {
+                       inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
+                       FAIL_IF(!inst);
+                       *inst = IMUL_r_rm_i32;
+                       inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
+                       FAIL_IF(!inst);
+                       INC_SIZE(4);
+                       *(sljit_sw*)inst = src2w;
+               }
+#else
+               else if (IS_HALFWORD(src2w)) {
+                       inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
+                       FAIL_IF(!inst);
+                       *inst = IMUL_r_rm_i32;
+                       inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
+                       FAIL_IF(!inst);
+                       INC_SIZE(4);
+                       *(sljit_si*)inst = (sljit_si)src2w;
+               }
+               else {
+                       EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
+                       if (dst_r != src1)
+                               EMIT_MOV(compiler, dst_r, 0, src1, src1w);
+                       inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
+                       FAIL_IF(!inst);
+                       *inst++ = GROUP_0F;
+                       *inst = IMUL_r_rm;
+               }
+#endif
+       }
+       else {
+               /* Neither argument is immediate. */
+               if (ADDRESSING_DEPENDS_ON(src2, dst_r))
+                       dst_r = TMP_REG1;
+               EMIT_MOV(compiler, dst_r, 0, src1, src1w);
+               inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
+               FAIL_IF(!inst);
+               *inst++ = GROUP_0F;
+               *inst = IMUL_r_rm;
+       }
+
+       if (dst_r == TMP_REG1)
+               EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
+
+       return SLJIT_SUCCESS;
+}
+
+static sljit_si emit_lea_binary(struct sljit_compiler *compiler, sljit_si keep_flags,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       sljit_ub* inst;
+       sljit_si dst_r, done = 0;
+
+       /* These cases better be left to handled by normal way. */
+       if (!keep_flags) {
+               if (dst == src1 && dstw == src1w)
+                       return SLJIT_ERR_UNSUPPORTED;
+               if (dst == src2 && dstw == src2w)
+                       return SLJIT_ERR_UNSUPPORTED;
+       }
+
+       dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
+
+       if (FAST_IS_REG(src1)) {
+               if (FAST_IS_REG(src2)) {
+                       inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
+                       FAIL_IF(!inst);
+                       *inst = LEA_r_m;
+                       done = 1;
+               }
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+               if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
+                       inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_si)src2w);
+#else
+               if (src2 & SLJIT_IMM) {
+                       inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
+#endif
+                       FAIL_IF(!inst);
+                       *inst = LEA_r_m;
+                       done = 1;
+               }
+       }
+       else if (FAST_IS_REG(src2)) {
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+               if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
+                       inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_si)src1w);
+#else
+               if (src1 & SLJIT_IMM) {
+                       inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
+#endif
+                       FAIL_IF(!inst);
+                       *inst = LEA_r_m;
+                       done = 1;
+               }
+       }
+
+       if (done) {
+               if (dst_r == TMP_REG1)
+                       return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
+               return SLJIT_SUCCESS;
+       }
+       return SLJIT_ERR_UNSUPPORTED;
+}
+
+static sljit_si emit_cmp_binary(struct sljit_compiler *compiler,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       sljit_ub* inst;
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+       if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
+#else
+       if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
+#endif
+               BINARY_EAX_IMM(CMP_EAX_i32, src2w);
+               return SLJIT_SUCCESS;
+       }
+
+       if (FAST_IS_REG(src1)) {
+               if (src2 & SLJIT_IMM) {
+                       BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0);
+               }
+               else {
+                       inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
+                       FAIL_IF(!inst);
+                       *inst = CMP_r_rm;
+               }
+               return SLJIT_SUCCESS;
+       }
+
+       if (FAST_IS_REG(src2) && !(src1 & SLJIT_IMM)) {
+               inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
+               FAIL_IF(!inst);
+               *inst = CMP_rm_r;
+               return SLJIT_SUCCESS;
+       }
+
+       if (src2 & SLJIT_IMM) {
+               if (src1 & SLJIT_IMM) {
+                       EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
+                       src1 = TMP_REG1;
+                       src1w = 0;
+               }
+               BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w);
+       }
+       else {
+               EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
+               inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
+               FAIL_IF(!inst);
+               *inst = CMP_r_rm;
+       }
+       return SLJIT_SUCCESS;
+}
+
+static sljit_si emit_test_binary(struct sljit_compiler *compiler,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       sljit_ub* inst;
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+       if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
+#else
+       if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
+#endif
+               BINARY_EAX_IMM(TEST_EAX_i32, src2w);
+               return SLJIT_SUCCESS;
+       }
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+       if (src2 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
+#else
+       if (src2 == SLJIT_SCRATCH_REG1 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
+#endif
+               BINARY_EAX_IMM(TEST_EAX_i32, src1w);
+               return SLJIT_SUCCESS;
+       }
+
+       if (FAST_IS_REG(src1)) {
+               if (src2 & SLJIT_IMM) {
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+                       if (IS_HALFWORD(src2w) || compiler->mode32) {
+                               inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
+                               FAIL_IF(!inst);
+                               *inst = GROUP_F7;
+                       }
+                       else {
+                               FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
+                               inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, 0);
+                               FAIL_IF(!inst);
+                               *inst = TEST_rm_r;
+                       }
+#else
+                       inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
+                       FAIL_IF(!inst);
+                       *inst = GROUP_F7;
+#endif
+               }
+               else {
+                       inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
+                       FAIL_IF(!inst);
+                       *inst = TEST_rm_r;
+               }
+               return SLJIT_SUCCESS;
+       }
+
+       if (FAST_IS_REG(src2)) {
+               if (src1 & SLJIT_IMM) {
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+                       if (IS_HALFWORD(src1w) || compiler->mode32) {
+                               inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, 0);
+                               FAIL_IF(!inst);
+                               *inst = GROUP_F7;
+                       }
+                       else {
+                               FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
+                               inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, 0);
+                               FAIL_IF(!inst);
+                               *inst = TEST_rm_r;
+                       }
+#else
+                       inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, 0);
+                       FAIL_IF(!inst);
+                       *inst = GROUP_F7;
+#endif
+               }
+               else {
+                       inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
+                       FAIL_IF(!inst);
+                       *inst = TEST_rm_r;
+               }
+               return SLJIT_SUCCESS;
+       }
+
+       EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
+       if (src2 & SLJIT_IMM) {
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+               if (IS_HALFWORD(src2w) || compiler->mode32) {
+                       inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
+                       FAIL_IF(!inst);
+                       *inst = GROUP_F7;
+               }
+               else {
+                       FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
+                       inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0);
+                       FAIL_IF(!inst);
+                       *inst = TEST_rm_r;
+               }
+#else
+               inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
+               FAIL_IF(!inst);
+               *inst = GROUP_F7;
+#endif
+       }
+       else {
+               inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
+               FAIL_IF(!inst);
+               *inst = TEST_rm_r;
+       }
+       return SLJIT_SUCCESS;
+}
+
+static sljit_si emit_shift(struct sljit_compiler *compiler,
+       sljit_ub mode,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       sljit_ub* inst;
+
+       if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
+               if (dst == src1 && dstw == src1w) {
+                       inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
+                       FAIL_IF(!inst);
+                       *inst |= mode;
+                       return SLJIT_SUCCESS;
+               }
+               if (dst == SLJIT_UNUSED) {
+                       EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
+                       inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
+                       FAIL_IF(!inst);
+                       *inst |= mode;
+                       return SLJIT_SUCCESS;
+               }
+               if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
+                       EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
+                       inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
+                       FAIL_IF(!inst);
+                       *inst |= mode;
+                       EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
+                       return SLJIT_SUCCESS;
+               }
+               if (FAST_IS_REG(dst)) {
+                       EMIT_MOV(compiler, dst, 0, src1, src1w);
+                       inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
+                       FAIL_IF(!inst);
+                       *inst |= mode;
+                       return SLJIT_SUCCESS;
+               }
+
+               EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
+               inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
+               FAIL_IF(!inst);
+               *inst |= mode;
+               EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
+               return SLJIT_SUCCESS;
+       }
+
+       if (dst == SLJIT_PREF_SHIFT_REG) {
+               EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
+               EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
+               inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
+               FAIL_IF(!inst);
+               *inst |= mode;
+               EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
+       }
+       else if (FAST_IS_REG(dst) && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
+               if (src1 != dst)
+                       EMIT_MOV(compiler, dst, 0, src1, src1w);
+               EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
+               EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
+               inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
+               FAIL_IF(!inst);
+               *inst |= mode;
+               EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
+       }
+       else {
+               /* This case is really difficult, since ecx itself may used for
+                  addressing, and we must ensure to work even in that case. */
+               EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+               EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
+#else
+               /* [esp+0] contains the flags. */
+               EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), sizeof(sljit_sw), SLJIT_PREF_SHIFT_REG, 0);
+#endif
+               EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
+               inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
+               FAIL_IF(!inst);
+               *inst |= mode;
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+               EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
+#else
+               EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), sizeof(sljit_sw));
+#endif
+               EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+static sljit_si emit_shift_with_flags(struct sljit_compiler *compiler,
+       sljit_ub mode, sljit_si set_flags,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       /* The CPU does not set flags if the shift count is 0. */
+       if (src2 & SLJIT_IMM) {
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+               if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0))
+                       return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
+#else
+               if ((src2w & 0x1f) != 0)
+                       return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
+#endif
+               if (!set_flags)
+                       return emit_mov(compiler, dst, dstw, src1, src1w);
+               /* OR dst, src, 0 */
+               return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
+                       dst, dstw, src1, src1w, SLJIT_IMM, 0);
+       }
+
+       if (!set_flags)
+               return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
+
+       if (!FAST_IS_REG(dst))
+               FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
+
+       FAIL_IF(emit_shift(compiler,mode, dst, dstw, src1, src1w, src2, src2w));
+
+       if (FAST_IS_REG(dst))
+               return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0);
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       CHECK_ERROR();
+       check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+       ADJUST_LOCAL_OFFSET(src1, src1w);
+       ADJUST_LOCAL_OFFSET(src2, src2w);
+
+       CHECK_EXTRA_REGS(dst, dstw, (void)0);
+       CHECK_EXTRA_REGS(src1, src1w, (void)0);
+       CHECK_EXTRA_REGS(src2, src2w, (void)0);
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+       compiler->mode32 = op & SLJIT_INT_OP;
+#endif
+
+       if (GET_OPCODE(op) >= SLJIT_MUL) {
+               if (SLJIT_UNLIKELY(GET_FLAGS(op)))
+                       compiler->flags_saved = 0;
+               else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
+                       FAIL_IF(emit_save_flags(compiler));
+       }
+
+       switch (GET_OPCODE(op)) {
+       case SLJIT_ADD:
+               if (!GET_FLAGS(op)) {
+                       if (emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
+                               return compiler->error;
+               }
+               else
+                       compiler->flags_saved = 0;
+               if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
+                       FAIL_IF(emit_save_flags(compiler));
+               return emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
+                       dst, dstw, src1, src1w, src2, src2w);
+       case SLJIT_ADDC:
+               if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
+                       FAIL_IF(emit_restore_flags(compiler, 1));
+               else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
+                       FAIL_IF(emit_save_flags(compiler));
+               if (SLJIT_UNLIKELY(GET_FLAGS(op)))
+                       compiler->flags_saved = 0;
+               return emit_cum_binary(compiler, ADC_r_rm, ADC_rm_r, ADC, ADC_EAX_i32,
+                       dst, dstw, src1, src1w, src2, src2w);
+       case SLJIT_SUB:
+               if (!GET_FLAGS(op)) {
+                       if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
+                               return compiler->error;
+               }
+               else
+                       compiler->flags_saved = 0;
+               if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
+                       FAIL_IF(emit_save_flags(compiler));
+               if (dst == SLJIT_UNUSED)
+                       return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
+               return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
+                       dst, dstw, src1, src1w, src2, src2w);
+       case SLJIT_SUBC:
+               if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
+                       FAIL_IF(emit_restore_flags(compiler, 1));
+               else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
+                       FAIL_IF(emit_save_flags(compiler));
+               if (SLJIT_UNLIKELY(GET_FLAGS(op)))
+                       compiler->flags_saved = 0;
+               return emit_non_cum_binary(compiler, SBB_r_rm, SBB_rm_r, SBB, SBB_EAX_i32,
+                       dst, dstw, src1, src1w, src2, src2w);
+       case SLJIT_MUL:
+               return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
+       case SLJIT_AND:
+               if (dst == SLJIT_UNUSED)
+                       return emit_test_binary(compiler, src1, src1w, src2, src2w);
+               return emit_cum_binary(compiler, AND_r_rm, AND_rm_r, AND, AND_EAX_i32,
+                       dst, dstw, src1, src1w, src2, src2w);
+       case SLJIT_OR:
+               return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
+                       dst, dstw, src1, src1w, src2, src2w);
+       case SLJIT_XOR:
+               return emit_cum_binary(compiler, XOR_r_rm, XOR_rm_r, XOR, XOR_EAX_i32,
+                       dst, dstw, src1, src1w, src2, src2w);
+       case SLJIT_SHL:
+               return emit_shift_with_flags(compiler, SHL, GET_FLAGS(op),
+                       dst, dstw, src1, src1w, src2, src2w);
+       case SLJIT_LSHR:
+               return emit_shift_with_flags(compiler, SHR, GET_FLAGS(op),
+                       dst, dstw, src1, src1w, src2, src2w);
+       case SLJIT_ASHR:
+               return emit_shift_with_flags(compiler, SAR, GET_FLAGS(op),
+                       dst, dstw, src1, src1w, src2, src2w);
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
+{
+       check_sljit_get_register_index(reg);
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+       if (reg == SLJIT_TEMPORARY_EREG1 || reg == SLJIT_TEMPORARY_EREG2
+                       || reg == SLJIT_SAVED_EREG1 || reg == SLJIT_SAVED_EREG2)
+               return -1;
+#endif
+       return reg_map[reg];
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg)
+{
+       check_sljit_get_float_register_index(reg);
+       return reg;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
+       void *instruction, sljit_si size)
+{
+       sljit_ub *inst;
+
+       CHECK_ERROR();
+       check_sljit_emit_op_custom(compiler, instruction, size);
+       SLJIT_ASSERT(size > 0 && size < 16);
+
+       inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
+       FAIL_IF(!inst);
+       INC_SIZE(size);
+       SLJIT_MEMMOVE(inst, instruction, size);
+       return SLJIT_SUCCESS;
+}
+
+/* --------------------------------------------------------------------- */
+/*  Floating point operators                                             */
+/* --------------------------------------------------------------------- */
+
+#if (defined SLJIT_SSE2 && SLJIT_SSE2)
+
+/* Alignment + 2 * 16 bytes. */
+static sljit_si sse2_data[3 + (4 + 4) * 2];
+static sljit_si *sse2_buffer;
+
+static void init_compiler(void)
+{
+       sse2_buffer = (sljit_si*)(((sljit_uw)sse2_data + 15) & ~0xf);
+       /* Single precision constants. */
+       sse2_buffer[0] = 0x80000000;
+       sse2_buffer[4] = 0x7fffffff;
+       /* Double precision constants. */
+       sse2_buffer[8] = 0;
+       sse2_buffer[9] = 0x80000000;
+       sse2_buffer[12] = 0xffffffff;
+       sse2_buffer[13] = 0x7fffffff;
+}
+
+#endif
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
+{
+#ifdef SLJIT_IS_FPU_AVAILABLE
+       return SLJIT_IS_FPU_AVAILABLE;
+#elif (defined SLJIT_SSE2 && SLJIT_SSE2)
+#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
+       if (cpu_has_sse2 == -1)
+               get_cpu_features();
+       return cpu_has_sse2;
+#else /* SLJIT_DETECT_SSE2 */
+       return 1;
+#endif /* SLJIT_DETECT_SSE2 */
+#else /* SLJIT_SSE2 */
+       return 0;
+#endif
+}
+
+#if (defined SLJIT_SSE2 && SLJIT_SSE2)
+
+static sljit_si emit_sse2(struct sljit_compiler *compiler, sljit_ub opcode,
+       sljit_si single, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w)
+{
+       sljit_ub *inst;
+
+       inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
+       FAIL_IF(!inst);
+       *inst++ = GROUP_0F;
+       *inst = opcode;
+       return SLJIT_SUCCESS;
+}
+
+static sljit_si emit_sse2_logic(struct sljit_compiler *compiler, sljit_ub opcode,
+       sljit_si pref66, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w)
+{
+       sljit_ub *inst;
+
+       inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
+       FAIL_IF(!inst);
+       *inst++ = GROUP_0F;
+       *inst = opcode;
+       return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_si emit_sse2_load(struct sljit_compiler *compiler,
+       sljit_si single, sljit_si dst, sljit_si src, sljit_sw srcw)
+{
+       return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw);
+}
+
+static SLJIT_INLINE sljit_si emit_sse2_store(struct sljit_compiler *compiler,
+       sljit_si single, sljit_si dst, sljit_sw dstw, sljit_si src)
+{
+       return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       sljit_si dst_r;
+
+       CHECK_ERROR();
+       check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+       compiler->mode32 = 1;
+#endif
+
+       if (GET_OPCODE(op) == SLJIT_CMPD) {
+               compiler->flags_saved = 0;
+               if (FAST_IS_REG(dst))
+                       dst_r = dst;
+               else {
+                       dst_r = TMP_FREG;
+                       FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, dst, dstw));
+               }
+               return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_SINGLE_OP), dst_r, src, srcw);
+       }
+
+       if (op == SLJIT_MOVD) {
+               if (FAST_IS_REG(dst))
+                       return emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst, src, srcw);
+               if (FAST_IS_REG(src))
+                       return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, src);
+               FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src, srcw));
+               return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
+       }
+
+       if (SLOW_IS_REG(dst)) {
+               dst_r = dst;
+               if (dst != src)
+                       FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src, srcw));
+       }
+       else {
+               dst_r = TMP_FREG;
+               FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src, srcw));
+       }
+
+       switch (GET_OPCODE(op)) {
+       case SLJIT_NEGD:
+               FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer : sse2_buffer + 8)));
+               break;
+
+       case SLJIT_ABSD:
+               FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer + 4 : sse2_buffer + 12)));
+               break;
+       }
+
+       if (dst_r == TMP_FREG)
+               return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       sljit_si dst_r;
+
+       CHECK_ERROR();
+       check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+       compiler->mode32 = 1;
+#endif
+
+       if (FAST_IS_REG(dst)) {
+               dst_r = dst;
+               if (dst == src1)
+                       ; /* Do nothing here. */
+               else if (dst == src2 && (op == SLJIT_ADDD || op == SLJIT_MULD)) {
+                       /* Swap arguments. */
+                       src2 = src1;
+                       src2w = src1w;
+               }
+               else if (dst != src2)
+                       FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src1, src1w));
+               else {
+                       dst_r = TMP_FREG;
+                       FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w));
+               }
+       }
+       else {
+               dst_r = TMP_FREG;
+               FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w));
+       }
+
+       switch (GET_OPCODE(op)) {
+       case SLJIT_ADDD:
+               FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
+               break;
+
+       case SLJIT_SUBD:
+               FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
+               break;
+
+       case SLJIT_MULD:
+               FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
+               break;
+
+       case SLJIT_DIVD:
+               FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
+               break;
+       }
+
+       if (dst_r == TMP_FREG)
+               return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
+       return SLJIT_SUCCESS;
+}
+
+#else
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       CHECK_ERROR();
+       /* Should cause an assertion fail. */
+       check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
+       compiler->error = SLJIT_ERR_UNSUPPORTED;
+       return SLJIT_ERR_UNSUPPORTED;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       CHECK_ERROR();
+       /* Should cause an assertion fail. */
+       check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
+       compiler->error = SLJIT_ERR_UNSUPPORTED;
+       return SLJIT_ERR_UNSUPPORTED;
+}
+
+#endif
+
+/* --------------------------------------------------------------------- */
+/*  Conditional instructions                                             */
+/* --------------------------------------------------------------------- */
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
+{
+       sljit_ub *inst;
+       struct sljit_label *label;
+
+       CHECK_ERROR_PTR();
+       check_sljit_emit_label(compiler);
+
+       /* We should restore the flags before the label,
+          since other taken jumps has their own flags as well. */
+       if (SLJIT_UNLIKELY(compiler->flags_saved))
+               PTR_FAIL_IF(emit_restore_flags(compiler, 0));
+
+       if (compiler->last_label && compiler->last_label->size == compiler->size)
+               return compiler->last_label;
+
+       label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
+       PTR_FAIL_IF(!label);
+       set_label(label, compiler);
+
+       inst = (sljit_ub*)ensure_buf(compiler, 2);
+       PTR_FAIL_IF(!inst);
+
+       *inst++ = 0;
+       *inst++ = 0;
+
+       return label;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type)
+{
+       sljit_ub *inst;
+       struct sljit_jump *jump;
+
+       CHECK_ERROR_PTR();
+       check_sljit_emit_jump(compiler, type);
+
+       if (SLJIT_UNLIKELY(compiler->flags_saved)) {
+               if ((type & 0xff) <= SLJIT_JUMP)
+                       PTR_FAIL_IF(emit_restore_flags(compiler, 0));
+               compiler->flags_saved = 0;
+       }
+
+       jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
+       PTR_FAIL_IF_NULL(jump);
+       set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
+       type &= 0xff;
+
+       if (type >= SLJIT_CALL1)
+               PTR_FAIL_IF(call_with_args(compiler, type));
+
+       /* Worst case size. */
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+       compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
+#else
+       compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
+#endif
+
+       inst = (sljit_ub*)ensure_buf(compiler, 2);
+       PTR_FAIL_IF_NULL(inst);
+
+       *inst++ = 0;
+       *inst++ = type + 4;
+       return jump;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw)
+{
+       sljit_ub *inst;
+       struct sljit_jump *jump;
+
+       CHECK_ERROR();
+       check_sljit_emit_ijump(compiler, type, src, srcw);
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       CHECK_EXTRA_REGS(src, srcw, (void)0);
+
+       if (SLJIT_UNLIKELY(compiler->flags_saved)) {
+               if (type <= SLJIT_JUMP)
+                       FAIL_IF(emit_restore_flags(compiler, 0));
+               compiler->flags_saved = 0;
+       }
+
+       if (type >= SLJIT_CALL1) {
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
+               if (src == SLJIT_SCRATCH_REG3) {
+                       EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
+                       src = TMP_REG1;
+               }
+               if (src == SLJIT_MEM1(SLJIT_LOCALS_REG) && type >= SLJIT_CALL3)
+                       srcw += sizeof(sljit_sw);
+#endif
+#endif
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64)
+               if (src == SLJIT_SCRATCH_REG3) {
+                       EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
+                       src = TMP_REG1;
+               }
+#endif
+               FAIL_IF(call_with_args(compiler, type));
+       }
+
+       if (src == SLJIT_IMM) {
+               jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
+               FAIL_IF_NULL(jump);
+               set_jump(jump, compiler, JUMP_ADDR);
+               jump->u.target = srcw;
+
+               /* Worst case size. */
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+               compiler->size += 5;
+#else
+               compiler->size += 10 + 3;
+#endif
+
+               inst = (sljit_ub*)ensure_buf(compiler, 2);
+               FAIL_IF_NULL(inst);
+
+               *inst++ = 0;
+               *inst++ = type + 4;
+       }
+       else {
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+               /* REX_W is not necessary (src is not immediate). */
+               compiler->mode32 = 1;
+#endif
+               inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
+               FAIL_IF(!inst);
+               *inst++ = GROUP_FF;
+               *inst |= (type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm;
+       }
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw,
+       sljit_si type)
+{
+       sljit_ub *inst;
+       sljit_ub cond_set = 0;
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+       sljit_si reg;
+#else
+       /* CHECK_EXTRA_REGS migh overwrite these values. */
+       sljit_si dst_save = dst;
+       sljit_sw dstw_save = dstw;
+#endif
+
+       CHECK_ERROR();
+       check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type);
+
+       if (dst == SLJIT_UNUSED)
+               return SLJIT_SUCCESS;
+
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+       CHECK_EXTRA_REGS(dst, dstw, (void)0);
+       if (SLJIT_UNLIKELY(compiler->flags_saved))
+               FAIL_IF(emit_restore_flags(compiler, op & SLJIT_KEEP_FLAGS));
+
+       /* setcc = jcc + 0x10. */
+       cond_set = get_jump_code(type) + 0x10;
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+       if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src) {
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 3);
+               FAIL_IF(!inst);
+               INC_SIZE(4 + 3);
+               /* Set low register to conditional flag. */
+               *inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B;
+               *inst++ = GROUP_0F;
+               *inst++ = cond_set;
+               *inst++ = MOD_REG | reg_lmap[TMP_REG1];
+               *inst++ = REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B);
+               *inst++ = OR_rm8_r8;
+               *inst++ = MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst];
+               return SLJIT_SUCCESS;
+       }
+
+       reg = (op == SLJIT_MOV && FAST_IS_REG(dst)) ? dst : TMP_REG1;
+
+       inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 4);
+       FAIL_IF(!inst);
+       INC_SIZE(4 + 4);
+       /* Set low register to conditional flag. */
+       *inst++ = (reg_map[reg] <= 7) ? REX : REX_B;
+       *inst++ = GROUP_0F;
+       *inst++ = cond_set;
+       *inst++ = MOD_REG | reg_lmap[reg];
+       *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
+       *inst++ = GROUP_0F;
+       *inst++ = MOVZX_r_rm8;
+       *inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg];
+
+       if (reg != TMP_REG1)
+               return SLJIT_SUCCESS;
+
+       if (GET_OPCODE(op) < SLJIT_ADD) {
+               compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV;
+               return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
+       }
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       compiler->skip_checks = 1;
+#endif
+       return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REG1, 0);
+#else /* SLJIT_CONFIG_X86_64 */
+       if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) {
+               if (reg_map[dst] <= 4) {
+                       /* Low byte is accessible. */
+                       inst = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3);
+                       FAIL_IF(!inst);
+                       INC_SIZE(3 + 3);
+                       /* Set low byte to conditional flag. */
+                       *inst++ = GROUP_0F;
+                       *inst++ = cond_set;
+                       *inst++ = MOD_REG | reg_map[dst];
+
+                       *inst++ = GROUP_0F;
+                       *inst++ = MOVZX_r_rm8;
+                       *inst = MOD_REG | (reg_map[dst] << 3) | reg_map[dst];
+                       return SLJIT_SUCCESS;
+               }
+
+               /* Low byte is not accessible. */
+               if (cpu_has_cmov == -1)
+                       get_cpu_features();
+
+               if (cpu_has_cmov) {
+                       EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);
+                       /* a xor reg, reg operation would overwrite the flags. */
+                       EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0);
+
+                       inst = (sljit_ub*)ensure_buf(compiler, 1 + 3);
+                       FAIL_IF(!inst);
+                       INC_SIZE(3);
+
+                       *inst++ = GROUP_0F;
+                       /* cmovcc = setcc - 0x50. */
+                       *inst++ = cond_set - 0x50;
+                       *inst++ = MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1];
+                       return SLJIT_SUCCESS;
+               }
+
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
+               FAIL_IF(!inst);
+               INC_SIZE(1 + 3 + 3 + 1);
+               *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
+               /* Set al to conditional flag. */
+               *inst++ = GROUP_0F;
+               *inst++ = cond_set;
+               *inst++ = MOD_REG | 0 /* eax */;
+
+               *inst++ = GROUP_0F;
+               *inst++ = MOVZX_r_rm8;
+               *inst++ = MOD_REG | (reg_map[dst] << 3) | 0 /* eax */;
+               *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
+               return SLJIT_SUCCESS;
+       }
+
+       if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src && reg_map[dst] <= 4) {
+               SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SCRATCH_REG1] == 0, scratch_reg1_must_be_eax);
+               if (dst != SLJIT_SCRATCH_REG1) {
+                       inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1);
+                       FAIL_IF(!inst);
+                       INC_SIZE(1 + 3 + 2 + 1);
+                       /* Set low register to conditional flag. */
+                       *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
+                       *inst++ = GROUP_0F;
+                       *inst++ = cond_set;
+                       *inst++ = MOD_REG | 0 /* eax */;
+                       *inst++ = OR_rm8_r8;
+                       *inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst];
+                       *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
+               }
+               else {
+                       inst = (sljit_ub*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2);
+                       FAIL_IF(!inst);
+                       INC_SIZE(2 + 3 + 2 + 2);
+                       /* Set low register to conditional flag. */
+                       *inst++ = XCHG_r_rm;
+                       *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
+                       *inst++ = GROUP_0F;
+                       *inst++ = cond_set;
+                       *inst++ = MOD_REG | 1 /* ecx */;
+                       *inst++ = OR_rm8_r8;
+                       *inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */;
+                       *inst++ = XCHG_r_rm;
+                       *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
+               }
+               return SLJIT_SUCCESS;
+       }
+
+       /* Set TMP_REG1 to the bit. */
+       inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
+       FAIL_IF(!inst);
+       INC_SIZE(1 + 3 + 3 + 1);
+       *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
+       /* Set al to conditional flag. */
+       *inst++ = GROUP_0F;
+       *inst++ = cond_set;
+       *inst++ = MOD_REG | 0 /* eax */;
+
+       *inst++ = GROUP_0F;
+       *inst++ = MOVZX_r_rm8;
+       *inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */;
+
+       *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
+
+       if (GET_OPCODE(op) < SLJIT_ADD)
+               return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       compiler->skip_checks = 1;
+#endif
+       return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
+#endif /* SLJIT_CONFIG_X86_64 */
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset)
+{
+       CHECK_ERROR();
+       check_sljit_get_local_base(compiler, dst, dstw, offset);
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+
+       CHECK_EXTRA_REGS(dst, dstw, (void)0);
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+       compiler->mode32 = 0;
+#endif
+
+       ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_LOCALS_REG), offset);
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+       if (NOT_HALFWORD(offset)) {
+               FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset));
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+               SLJIT_ASSERT(emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
+               return compiler->error;
+#else
+               return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, TMP_REG1, 0);
+#endif
+       }
+#endif
+
+       if (offset != 0)
+               return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, SLJIT_IMM, offset);
+       return emit_mov(compiler, dst, dstw, SLJIT_LOCALS_REG, 0);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value)
+{
+       sljit_ub *inst;
+       struct sljit_const *const_;
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+       sljit_si reg;
+#endif
+
+       CHECK_ERROR_PTR();
+       check_sljit_emit_const(compiler, dst, dstw, init_value);
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+
+       CHECK_EXTRA_REGS(dst, dstw, (void)0);
+
+       const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
+       PTR_FAIL_IF(!const_);
+       set_const(const_, compiler);
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+       compiler->mode32 = 0;
+       reg = SLOW_IS_REG(dst) ? dst : TMP_REG1;
+
+       if (emit_load_imm64(compiler, reg, init_value))
+               return NULL;
+#else
+       if (dst == SLJIT_UNUSED)
+               dst = TMP_REG1;
+
+       if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
+               return NULL;
+#endif
+
+       inst = (sljit_ub*)ensure_buf(compiler, 2);
+       PTR_FAIL_IF(!inst);
+
+       *inst++ = 0;
+       *inst++ = 1;
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+       if (dst & SLJIT_MEM)
+               if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
+                       return NULL;
+#endif
+
+       return const_;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
+{
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+       *(sljit_sw*)addr = new_addr - (addr + 4);
+#else
+       *(sljit_uw*)addr = new_addr;
+#endif
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
+{
+       *(sljit_sw*)addr = new_constant;
+}
diff --git a/ext/pcre/pcrelib/sljit/sljitUtils.c b/ext/pcre/pcrelib/sljit/sljitUtils.c
new file mode 100644 (file)
index 0000000..b29b403
--- /dev/null
@@ -0,0 +1,332 @@
+/*
+ *    Stack-less Just-In-Time compiler
+ *
+ *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice, this list of
+ *      conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *      of conditions and the following disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* ------------------------------------------------------------------------ */
+/*  Locks                                                                   */
+/* ------------------------------------------------------------------------ */
+
+#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) || (defined SLJIT_UTIL_GLOBAL_LOCK && SLJIT_UTIL_GLOBAL_LOCK)
+
+#if (defined SLJIT_SINGLE_THREADED && SLJIT_SINGLE_THREADED)
+
+#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR)
+
+static SLJIT_INLINE void allocator_grab_lock(void)
+{
+       /* Always successful. */
+}
+
+static SLJIT_INLINE void allocator_release_lock(void)
+{
+       /* Always successful. */
+}
+
+#endif /* SLJIT_EXECUTABLE_ALLOCATOR */
+
+#if (defined SLJIT_UTIL_GLOBAL_LOCK && SLJIT_UTIL_GLOBAL_LOCK)
+
+SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_grab_lock(void)
+{
+       /* Always successful. */
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_release_lock(void)
+{
+       /* Always successful. */
+}
+
+#endif /* SLJIT_UTIL_GLOBAL_LOCK */
+
+#elif defined(_WIN32) /* SLJIT_SINGLE_THREADED */
+
+#include "windows.h"
+
+#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR)
+
+static HANDLE allocator_mutex = 0;
+
+static SLJIT_INLINE void allocator_grab_lock(void)
+{
+       /* No idea what to do if an error occures. Static mutexes should never fail... */
+       if (!allocator_mutex)
+               allocator_mutex = CreateMutex(NULL, TRUE, NULL);
+       else
+               WaitForSingleObject(allocator_mutex, INFINITE);
+}
+
+static SLJIT_INLINE void allocator_release_lock(void)
+{
+       ReleaseMutex(allocator_mutex);
+}
+
+#endif /* SLJIT_EXECUTABLE_ALLOCATOR */
+
+#if (defined SLJIT_UTIL_GLOBAL_LOCK && SLJIT_UTIL_GLOBAL_LOCK)
+
+static HANDLE global_mutex = 0;
+
+SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_grab_lock(void)
+{
+       /* No idea what to do if an error occures. Static mutexes should never fail... */
+       if (!global_mutex)
+               global_mutex = CreateMutex(NULL, TRUE, NULL);
+       else
+               WaitForSingleObject(global_mutex, INFINITE);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_release_lock(void)
+{
+       ReleaseMutex(global_mutex);
+}
+
+#endif /* SLJIT_UTIL_GLOBAL_LOCK */
+
+#else /* _WIN32 */
+
+#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR)
+
+#include <pthread.h>
+
+static pthread_mutex_t allocator_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static SLJIT_INLINE void allocator_grab_lock(void)
+{
+       pthread_mutex_lock(&allocator_mutex);
+}
+
+static SLJIT_INLINE void allocator_release_lock(void)
+{
+       pthread_mutex_unlock(&allocator_mutex);
+}
+
+#endif /* SLJIT_EXECUTABLE_ALLOCATOR */
+
+#if (defined SLJIT_UTIL_GLOBAL_LOCK && SLJIT_UTIL_GLOBAL_LOCK)
+
+#include <pthread.h>
+
+static pthread_mutex_t global_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_grab_lock(void)
+{
+       pthread_mutex_lock(&global_mutex);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_release_lock(void)
+{
+       pthread_mutex_unlock(&global_mutex);
+}
+
+#endif /* SLJIT_UTIL_GLOBAL_LOCK */
+
+#endif /* _WIN32 */
+
+/* ------------------------------------------------------------------------ */
+/*  Stack                                                                   */
+/* ------------------------------------------------------------------------ */
+
+#if (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK) || (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR)
+
+#ifdef _WIN32
+#include "windows.h"
+#else
+/* Provides mmap function. */
+#include <sys/mman.h>
+/* For detecting the page size. */
+#include <unistd.h>
+
+#ifndef MAP_ANON
+
+#include <fcntl.h>
+
+/* Some old systems does not have MAP_ANON. */
+static sljit_si dev_zero = -1;
+
+#if (defined SLJIT_SINGLE_THREADED && SLJIT_SINGLE_THREADED)
+
+static SLJIT_INLINE sljit_si open_dev_zero(void)
+{
+       dev_zero = open("/dev/zero", O_RDWR);
+       return dev_zero < 0;
+}
+
+#else /* SLJIT_SINGLE_THREADED */
+
+#include <pthread.h>
+
+static pthread_mutex_t dev_zero_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static SLJIT_INLINE sljit_si open_dev_zero(void)
+{
+       pthread_mutex_lock(&dev_zero_mutex);
+       dev_zero = open("/dev/zero", O_RDWR);
+       pthread_mutex_unlock(&dev_zero_mutex);
+       return dev_zero < 0;
+}
+
+#endif /* SLJIT_SINGLE_THREADED */
+
+#endif
+
+#endif
+
+#endif /* SLJIT_UTIL_STACK || SLJIT_EXECUTABLE_ALLOCATOR */
+
+#if (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK)
+
+/* Planning to make it even more clever in the future. */
+static sljit_sw sljit_page_align = 0;
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_CALL sljit_allocate_stack(sljit_uw limit, sljit_uw max_limit)
+{
+       struct sljit_stack *stack;
+       union {
+               void *ptr;
+               sljit_uw uw;
+       } base;
+#ifdef _WIN32
+       SYSTEM_INFO si;
+#endif
+
+       if (limit > max_limit || limit < 1)
+               return NULL;
+
+#ifdef _WIN32
+       if (!sljit_page_align) {
+               GetSystemInfo(&si);
+               sljit_page_align = si.dwPageSize - 1;
+       }
+#else
+       if (!sljit_page_align) {
+               sljit_page_align = sysconf(_SC_PAGESIZE);
+               /* Should never happen. */
+               if (sljit_page_align < 0)
+                       sljit_page_align = 4096;
+               sljit_page_align--;
+       }
+#endif
+
+       /* Align limit and max_limit. */
+       max_limit = (max_limit + sljit_page_align) & ~sljit_page_align;
+
+       stack = (struct sljit_stack*)SLJIT_MALLOC(sizeof(struct sljit_stack));
+       if (!stack)
+               return NULL;
+
+#ifdef _WIN32
+       base.ptr = VirtualAlloc(NULL, max_limit, MEM_RESERVE, PAGE_READWRITE);
+       if (!base.ptr) {
+               SLJIT_FREE(stack);
+               return NULL;
+       }
+       stack->base = base.uw;
+       stack->limit = stack->base;
+       stack->max_limit = stack->base + max_limit;
+       if (sljit_stack_resize(stack, stack->base + limit)) {
+               sljit_free_stack(stack);
+               return NULL;
+       }
+#else
+#ifdef MAP_ANON
+       base.ptr = mmap(NULL, max_limit, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+#else
+       if (dev_zero < 0) {
+               if (open_dev_zero()) {
+                       SLJIT_FREE(stack);
+                       return NULL;
+               }
+       }
+       base.ptr = mmap(NULL, max_limit, PROT_READ | PROT_WRITE, MAP_PRIVATE, dev_zero, 0);
+#endif
+       if (base.ptr == MAP_FAILED) {
+               SLJIT_FREE(stack);
+               return NULL;
+       }
+       stack->base = base.uw;
+       stack->limit = stack->base + limit;
+       stack->max_limit = stack->base + max_limit;
+#endif
+       stack->top = stack->base;
+       return stack;
+}
+
+#undef PAGE_ALIGN
+
+SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_free_stack(struct sljit_stack* stack)
+{
+#ifdef _WIN32
+       VirtualFree((void*)stack->base, 0, MEM_RELEASE);
+#else
+       munmap((void*)stack->base, stack->max_limit - stack->base);
+#endif
+       SLJIT_FREE(stack);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_sw SLJIT_CALL sljit_stack_resize(struct sljit_stack* stack, sljit_uw new_limit)
+{
+       sljit_uw aligned_old_limit;
+       sljit_uw aligned_new_limit;
+
+       if ((new_limit > stack->max_limit) || (new_limit < stack->base))
+               return -1;
+#ifdef _WIN32
+       aligned_new_limit = (new_limit + sljit_page_align) & ~sljit_page_align;
+       aligned_old_limit = (stack->limit + sljit_page_align) & ~sljit_page_align;
+       if (aligned_new_limit != aligned_old_limit) {
+               if (aligned_new_limit > aligned_old_limit) {
+                       if (!VirtualAlloc((void*)aligned_old_limit, aligned_new_limit - aligned_old_limit, MEM_COMMIT, PAGE_READWRITE))
+                               return -1;
+               }
+               else {
+                       if (!VirtualFree((void*)aligned_new_limit, aligned_old_limit - aligned_new_limit, MEM_DECOMMIT))
+                               return -1;
+               }
+       }
+       stack->limit = new_limit;
+       return 0;
+#else
+       if (new_limit >= stack->limit) {
+               stack->limit = new_limit;
+               return 0;
+       }
+       aligned_new_limit = (new_limit + sljit_page_align) & ~sljit_page_align;
+       aligned_old_limit = (stack->limit + sljit_page_align) & ~sljit_page_align;
+       /* If madvise is available, we release the unnecessary space. */
+#if defined(MADV_DONTNEED)
+       if (aligned_new_limit < aligned_old_limit)
+               madvise((void*)aligned_new_limit, aligned_old_limit - aligned_new_limit, MADV_DONTNEED);
+#elif defined(POSIX_MADV_DONTNEED)
+       if (aligned_new_limit < aligned_old_limit)
+               posix_madvise((void*)aligned_new_limit, aligned_old_limit - aligned_new_limit, POSIX_MADV_DONTNEED);
+#endif
+       stack->limit = new_limit;
+       return 0;
+#endif
+}
+
+#endif /* SLJIT_UTIL_STACK */
+
+#endif
index 7112d84fa1549b78c44a8d8f59d75f7fa23eb9cf..ea1bfc78ac4f08a6722ab04d5c0db932f7c1dd1a 100644 (file)
@@ -1,2 +1,2 @@
-xxx
+xxx\r
 jkl
\ No newline at end of file
index e6edddc6e0bc056bdb4a5a0984159574cb50bc34..5d2bd1be52835611d7675912718f8ba4cae61cde 100644 (file)
Binary files a/ext/pcre/pcrelib/testdata/saved16BE-1 and b/ext/pcre/pcrelib/testdata/saved16BE-1 differ
index 5035ec07215ce8df53a4a46b10862d899689dba1..822ccd7012aa61c85670ca8664d70cb187d22f5d 100644 (file)
Binary files a/ext/pcre/pcrelib/testdata/saved16LE-1 and b/ext/pcre/pcrelib/testdata/saved16LE-1 differ
index b4c2ffe42cce42b7007989bcc49fce99b171f4c4..609d97cdeba0099867aa8a4cc700c0d6c4f02da7 100644 (file)
Binary files a/ext/pcre/pcrelib/testdata/saved32BE-1 and b/ext/pcre/pcrelib/testdata/saved32BE-1 differ
index 49392b89a105cd2d5438b50f7493e6c88a735673..901dfb63487c45cde0aeb6af073131a36d33728e 100644 (file)
Binary files a/ext/pcre/pcrelib/testdata/saved32LE-1 and b/ext/pcre/pcrelib/testdata/saved32LE-1 differ
index abff34e73a5c2cfeb4e6619e404778ccc34fe105..2dfb54cdfd4bbf1e7aa93dd1c6ad56c59bedc57b 100644 (file)
@@ -207,7 +207,7 @@ correctly, but that messes up comparisons). --/
     CDBABC
     \x{2000}ABC 
 
-/\R*A/SI8
+/\R*A/SI8<bsr_unicode>
     CDBABC
     \x{2028}A  
 
index 00924ee98fa92e3b4c045bbbeef1bfa27b0a740e..da6e61499cd9f078d6fea2df64f7b4fd76674dcf 100644 (file)
 
 /\U/I
 
+/a{1,3}b/U
+    ab
+
 /[/I
 
 /[a-/I
@@ -4045,4 +4048,18 @@ backtracking verbs. --/
     
 /[a[:<:]] should give error/ 
 
+/(?=ab\K)/+
+    abcd
+
+/abcd/f<lf>
+    xx\nxabcd
+    
+/ -- Test stack check external calls --/ 
+
+/(((((a)))))/Q0
+
+/(((((a)))))/Q1
+
+/(((((a)))))/Q
+
 /-- End of testinput2 --/
index ce9d9e19a40fbfa86242d8524b87c6b9dbd50e8c..067ca12fdc011b91f819d0a6c09ba351b03fca1c 100644 (file)
@@ -1,6 +1,6 @@
 /-- Tests for the 32-bit library only */
 
-< forbid 8w
+< forbid 8W
 
 /-- Check maximum character size --/
 
index 1d2e855386adf2f8eb9873557639133018cc0d35..fcd46255c936ad0657e9f32a3a0ec2784697cb61 100644 (file)
@@ -1,7 +1,10 @@
-/-- This set of tests checks local-specific features, using the fr_FR locale. 
-    It is not Perl-compatible. There is different version called wintestinput3
-  f  or use on Windows, where the locale is called "french". --/
-  
+/-- This set of tests checks local-specific features, using the "fr_FR" locale. 
+    It is not Perl-compatible. When run via RunTest, the locale is edited to
+    be whichever of "fr_FR", "french", or "fr" is found to exist. There is
+    different version of this file called wintestinput3 for use on Windows,
+    where the locale is called "french" and the tests are run using
+    RunTest.bat. --/
+
 < forbid 8W 
 
 /^[\w]+/
index 983f7a119b53f480d546ead89bdfc6cd5e8a2344..0110267bd803b66741fbdac34db1592fb5723461 100644 (file)
 /^a+[a\x{200}]/8
     aa
 
+/^.\B.\B./8
+    \x{10123}\x{10124}\x{10125}
+
+/^#[^\x{ffff}]#[^\x{ffff}]#[^\x{ffff}]#/8
+    #\x{10000}#\x{100}#\x{10ffff}#
+
 /-- End of testinput4 --/
index 9e9a22a1a1ff593b9eb4dddd83d9d8af5d81d31c..e36b09d637722acb988fdf94bd76791d1688a0ee 100644 (file)
 /^a+[a\x{200}]/8BZ
     aa
 
+/[b-d\x{200}-\x{250}]*[ae-h]?#[\x{200}-\x{250}]{0,8}[\x00-\xff]*#[\x{200}-\x{250}]+[a-z]/8BZ
+
 /-- End of testinput5 --/
index 1e450be04d34580317e31f8c3a9d253ae317c743..7a6a53f1473af2b0065c495bb0790690ccd570d0 100644 (file)
     \x{a1}\x{a7}  
     \x{37e} 
 
+/[RST]+/8iW
+    Ss\x{17f}
+    
+/[R-T]+/8iW 
+    Ss\x{17f}
+
+/[q-u]+/8iW 
+    Ss\x{17f}
+
 /-- End of testinput6 --/
index 9d1454363501b4001250a3b37899da1cfc3b6ad5..6bd0586441107fc7f88bde11a3521dcd3650a943 100644 (file)
@@ -829,4 +829,10 @@ of case for anything other than the ASCII letters. --/
 
 /\d+\s{0,5}=\s*\S?=\w{0,4}\W*/8WBZ
 
+/[RST]+/8iWBZ
+    
+/[R-T]+/8iWBZ 
+
+/[Q-U]+/8iWBZ 
+
 /-- End of testinput7 --/
index a76e2aef880cfab9995d76ac3553c063efac72eb..67ad2c8aecfa2143de9362f46237a5cb7e8043a7 100644 (file)
@@ -8,7 +8,7 @@ No options
 First char = 'a'
 Need char = 'c'
 Subject length lower bound = 3
-No set of starting bytes
+No starting char list
 JIT study was successful
 
 /(?(?C1)(?=a)a)/S+I
@@ -27,7 +27,7 @@ No options
 No first char
 No need char
 Subject length lower bound = -1
-No set of starting bytes
+No starting char list
 JIT study was not successful
 
 /abc/S+I>testsavedregex
@@ -36,7 +36,7 @@ No options
 First char = 'a'
 Need char = 'c'
 Subject length lower bound = 3
-No set of starting bytes
+No starting char list
 JIT study was successful
 Compiled pattern written to testsavedregex
 Study data written to testsavedregex
@@ -165,7 +165,7 @@ No options
 First char = 'a'
 Need char = 'd'
 Subject length lower bound = 4
-No set of starting bytes
+No starting char list
 JIT study was successful
 
 /(*NO_START_OPT)a(*:m)b/KS++
index 9f73c5000f69fb1b61728f1799401ce1469dd695..d6fb8a5ca2901e0793a0d06ada0dc95740f1fa57 100644 (file)
@@ -8,7 +8,7 @@ No options
 First char = 'a'
 Need char = 'c'
 Subject length lower bound = 3
-No set of starting bytes
+No starting char list
 JIT support is not available in this version of PCRE
 
 /a*/SI
index 52680a8f9cdb12ae26f203d38d4911de0396d50c..ae85681e0e09c7f3c7dab5e35967f3deb67a3faa 100644 (file)
@@ -361,7 +361,7 @@ Options: extended
 No first char
 No need char
 Subject length lower bound = 3
-Starting byte set: \x09 \x20 ! " # $ % & ' ( * + - / 0 1 2 3 4 5 6 7 8 
+Starting chars: \x09 \x20 ! " # $ % & ' ( * + - / 0 1 2 3 4 5 6 7 8 
   9 = ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ^ _ ` a b c d e 
   f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f 
 
@@ -388,7 +388,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: \x09 \x20 \xa0 
+Starting chars: \x09 \x20 \xa0 
 
 /\H/SI
 Capturing subpattern count = 0
@@ -396,7 +396,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 1
-No set of starting bytes
+No starting char list
 
 /\v/SI
 Capturing subpattern count = 0
@@ -404,7 +404,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: \x0a \x0b \x0c \x0d \x85 
+Starting chars: \x0a \x0b \x0c \x0d \x85 
 
 /\V/SI
 Capturing subpattern count = 0
@@ -412,7 +412,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 1
-No set of starting bytes
+No starting char list
 
 /\R/SI
 Capturing subpattern count = 0
@@ -420,7 +420,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: \x0a \x0b \x0c \x0d \x85 
+Starting chars: \x0a \x0b \x0c \x0d \x85 
 
 /[\h]/BZ
 ------------------------------------------------------------------
index 5792be72df7f5a8a42018fc029839356d778082d..5af369d06d9334b62f454e75f62ef2bbb14c23ff 100644 (file)
@@ -481,7 +481,7 @@ Options: utf
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a 
+Starting chars: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a 
   \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 
   \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 
   5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y 
@@ -519,7 +519,7 @@ Options: utf
 First char = \x{c4}
 Need char = \x{80}
 Subject length lower bound = 3
-No set of starting bytes
+No starting char list
   \x{100}\x{100}\x{100}\x{100\x{100}
  0: \x{100}\x{100}\x{100}
 
@@ -539,7 +539,7 @@ Options: utf
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: x \xc4 
+Starting chars: x \xc4 
 
 /(\x{100}*a|x)/8SDZ
 ------------------------------------------------------------------
@@ -558,7 +558,7 @@ Options: utf
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: a x \xc4 
+Starting chars: a x \xc4 
 
 /(\x{100}{0,2}a|x)/8SDZ
 ------------------------------------------------------------------
@@ -577,7 +577,7 @@ Options: utf
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: a x \xc4 
+Starting chars: a x \xc4 
 
 /(\x{100}{1,2}a|x)/8SDZ
 ------------------------------------------------------------------
@@ -597,7 +597,7 @@ Options: utf
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: x \xc4 
+Starting chars: x \xc4 
 
 /\x{100}/8DZ
 ------------------------------------------------------------------
@@ -799,7 +799,7 @@ Options: utf
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: \x09 \x20 \xc2 \xe1 \xe2 \xe3 
+Starting chars: \x09 \x20 \xc2 \xe1 \xe2 \xe3 
     ABC\x{09}
  0: \x{09}
     ABC\x{20}
@@ -825,7 +825,7 @@ Options: utf
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: \x0a \x0b \x0c \x0d \xc2 \xe2 
+Starting chars: \x0a \x0b \x0c \x0d \xc2 \xe2 
     ABC\x{0a}
  0: \x{0a}
     ABC\x{0b}
@@ -845,7 +845,7 @@ Options: utf
 No first char
 Need char = 'A'
 Subject length lower bound = 1
-Starting byte set: \x09 \x20 A \xc2 \xe1 \xe2 \xe3 
+Starting chars: \x09 \x20 A \xc2 \xe1 \xe2 \xe3 
     CDBABC
  0: A
     
@@ -855,7 +855,7 @@ Options: utf
 No first char
 Need char = 'A'
 Subject length lower bound = 2
-Starting byte set: \x0a \x0b \x0c \x0d \xc2 \xe2 
+Starting chars: \x0a \x0b \x0c \x0d \xc2 \xe2 
 
 /\s?xxx\s/8SI
 Capturing subpattern count = 0
@@ -863,7 +863,7 @@ Options: utf
 No first char
 Need char = 'x'
 Subject length lower bound = 4
-Starting byte set: \x09 \x0a \x0b \x0c \x0d \x20 x 
+Starting chars: \x09 \x0a \x0b \x0c \x0d \x20 x 
 
 /\sxxx\s/I8ST1
 Capturing subpattern count = 0
@@ -871,7 +871,7 @@ Options: utf
 No first char
 Need char = 'x'
 Subject length lower bound = 5
-Starting byte set: \x09 \x0a \x0c \x0d \x20 \xc2 
+Starting chars: \x09 \x0a \x0c \x0d \x20 \xc2 
     AB\x{85}xxx\x{a0}XYZ
  0: \x{85}xxx\x{a0}
     AB\x{a0}xxx\x{85}XYZ
@@ -883,7 +883,7 @@ Options: utf
 No first char
 Need char = ' '
 Subject length lower bound = 3
-Starting byte set: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0b \x0e 
+Starting chars: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0b \x0e 
   \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d 
   \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ 
   A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e 
@@ -917,7 +917,7 @@ Options: caseless utf
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: \xe1 
+Starting chars: \xe1 
 
 /\x{1234}+?/iS8I
 Capturing subpattern count = 0
@@ -925,7 +925,7 @@ Options: caseless utf
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: \xe1 
+Starting chars: \xe1 
 
 /\x{1234}++/iS8I
 Capturing subpattern count = 0
@@ -933,7 +933,7 @@ Options: caseless utf
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: \xe1 
+Starting chars: \xe1 
 
 /\x{1234}{2}/iS8I
 Capturing subpattern count = 0
@@ -941,7 +941,7 @@ Options: caseless utf
 No first char
 No need char
 Subject length lower bound = 2
-Starting byte set: \xe1 
+Starting chars: \xe1 
 
 /[^\x{c4}]/8DZ
 ------------------------------------------------------------------
@@ -974,7 +974,7 @@ Options: utf
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: \x0a \x0b \x0c \x0d \xc2 \xe2 
+Starting chars: \x0a \x0b \x0c \x0d \xc2 \xe2 
 
 /\777/8DZ
 ------------------------------------------------------------------
index 1d5f31d929a2b340fb6bbe1245ad5fba305d366c..63e9eb06ae65e61d593b2b1532b8cb8f00f91e5b 100644 (file)
@@ -64,7 +64,7 @@ Options: caseless utf
 No first char
 No need char
 Subject length lower bound = 17
-Starting byte set: \xd0 \xd1 
+Starting chars: \xd0 \xd1 
     \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}
  0: \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}
     \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f}
@@ -92,7 +92,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: \x09 \x20 \xa0 
+Starting chars: \x09 \x20 \xa0 
 
 /\v/SI
 Capturing subpattern count = 0
@@ -100,7 +100,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: \x0a \x0b \x0c \x0d \x85 
+Starting chars: \x0a \x0b \x0c \x0d \x85 
 
 /\R/SI
 Capturing subpattern count = 0
@@ -108,7 +108,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: \x0a \x0b \x0c \x0d \x85 
+Starting chars: \x0a \x0b \x0c \x0d \x85 
 
 /[[:blank:]]/WBZ
 ------------------------------------------------------------------
index 9a469c51ae152197c0de899735fb5dfd487d910d..1a3b492fb4950023d8a38ab65cfa97ccdb099f46 100644 (file)
@@ -228,7 +228,7 @@ Options: extended
 No first char
 No need char
 Subject length lower bound = 3
-Starting byte set: \x09 \x20 ! " # $ % & ' ( * + - / 0 1 2 3 4 5 6 7 8 
+Starting chars: \x09 \x20 ! " # $ % & ' ( * + - / 0 1 2 3 4 5 6 7 8 
   9 = ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ^ _ ` a b c d e 
   f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xff 
 
@@ -274,7 +274,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: \x09 \x20 \xa0 \xff 
+Starting chars: \x09 \x20 \xa0 \xff 
     \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
  0: \x{1680}\x{2000}\x{202f}\x{3000}
     \x{3001}\x{2fff}\x{200a}\xa0\x{2000}
@@ -292,7 +292,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 1
-No set of starting bytes
+Starting chars: \x09 \x20 \xa0 \xff 
     \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
  0: \x{1680}\x{2000}\x{202f}\x{3000}
     \x{3001}\x{2fff}\x{200a}\xa0\x{2000}
@@ -304,7 +304,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 1
-No set of starting bytes
+No starting char list
     \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
  0: \x{167f}\x{1681}\x{180d}\x{180f}
     \x{2000}\x{200a}\x{1fff}\x{200b}
@@ -330,7 +330,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: \x0a \x0b \x0c \x0d \x85 \xff 
+Starting chars: \x0a \x0b \x0c \x0d \x85 \xff 
     \x{2027}\x{2030}\x{2028}\x{2029}
  0: \x{2028}\x{2029}
     \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
@@ -348,7 +348,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 1
-No set of starting bytes
+Starting chars: \x0a \x0b \x0c \x0d \x85 \xff 
     \x{2027}\x{2030}\x{2028}\x{2029}
  0: \x{2028}\x{2029}
     \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
@@ -360,7 +360,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 1
-No set of starting bytes
+No starting char list
     \x{2028}\x{2029}\x{2027}\x{2030}
  0: \x{2027}\x{2030}
     \x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
@@ -378,7 +378,7 @@ Options: bsr_unicode
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: \x0a \x0b \x0c \x0d \x85 \xff 
+Starting chars: \x0a \x0b \x0c \x0d \x85 \xff 
     \x{2027}\x{2030}\x{2028}\x{2029}
  0: \x{2028}\x{2029}
     \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
@@ -534,18 +534,18 @@ MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789AB
 ------------------------------------------------------------------
         Bra
         a*
-        [b-\x{200}]?+
+        [b-\xff\x{100}-\x{200}]?+
         a#
         a*+
-        [b-\x{200}]?
+        [b-\xff\x{100}-\x{200}]?
         b#
-        [a-f]*
-        [g-\x{200}]*+
+        [a-f]*+
+        [g-\xff\x{100}-\x{200}]*+
         #
-        [g-\x{200}]*
+        [g-\xff\x{100}-\x{200}]*+
         [a-c]*+
         #
-        [g-\x{200}]*
+        [g-\xff\x{100}-\x{200}]*
         [a-h]*+
         Ket
         End
index 1ca9ee74018618ddd25d360adf9541cca1ba3919..a19620570502ae91ff9849a087eb1f2e1ba59deb 100644 (file)
@@ -339,7 +339,7 @@ Options: utf
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a 
+Starting chars: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a 
   \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 
   \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 
   5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y 
@@ -378,7 +378,7 @@ Options: utf
 First char = \x{100}
 Need char = \x{100}
 Subject length lower bound = 3
-No set of starting bytes
+No starting char list
   \x{100}\x{100}\x{100}\x{100\x{100}
  0: \x{100}\x{100}\x{100}
 
@@ -398,7 +398,7 @@ Options: utf
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: x \xff 
+Starting chars: x \xff 
 
 /(\x{100}*a|x)/8SDZ
 ------------------------------------------------------------------
@@ -417,7 +417,7 @@ Options: utf
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: a x \xff 
+Starting chars: a x \xff 
 
 /(\x{100}{0,2}a|x)/8SDZ
 ------------------------------------------------------------------
@@ -436,7 +436,7 @@ Options: utf
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: a x \xff 
+Starting chars: a x \xff 
 
 /(\x{100}{1,2}a|x)/8SDZ
 ------------------------------------------------------------------
@@ -456,7 +456,7 @@ Options: utf
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: x \xff 
+Starting chars: x \xff 
 
 /\x{100}/8DZ
 ------------------------------------------------------------------
@@ -666,7 +666,7 @@ Options: utf
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: \x09 \x20 \xa0 \xff 
+Starting chars: \x09 \x20 \xa0 \xff 
     ABC\x{09}
  0: \x{09}
     ABC\x{20}
@@ -692,7 +692,7 @@ Options: utf
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: \x0a \x0b \x0c \x0d \x85 \xff 
+Starting chars: \x0a \x0b \x0c \x0d \x85 \xff 
     ABC\x{0a}
  0: \x{0a}
     ABC\x{0b}
@@ -712,19 +712,19 @@ Options: utf
 No first char
 Need char = 'A'
 Subject length lower bound = 1
-Starting byte set: \x09 \x20 A \xa0 \xff 
+Starting chars: \x09 \x20 A \xa0 \xff 
     CDBABC
  0: A
     \x{2000}ABC 
  0: \x{2000}A
 
-/\R*A/SI8
+/\R*A/SI8<bsr_unicode>
 Capturing subpattern count = 0
-Options: utf
+Options: bsr_unicode utf
 No first char
 Need char = 'A'
 Subject length lower bound = 1
-Starting byte set: \x0a \x0b \x0c \x0d A \x85 \xff 
+Starting chars: \x0a \x0b \x0c \x0d A \x85 \xff 
     CDBABC
  0: A
     \x{2028}A  
@@ -736,7 +736,7 @@ Options: utf
 No first char
 Need char = 'A'
 Subject length lower bound = 2
-Starting byte set: \x0a \x0b \x0c \x0d \x85 \xff 
+Starting chars: \x0a \x0b \x0c \x0d \x85 \xff 
 
 /\s?xxx\s/8SI
 Capturing subpattern count = 0
@@ -744,7 +744,7 @@ Options: utf
 No first char
 Need char = 'x'
 Subject length lower bound = 4
-Starting byte set: \x09 \x0a \x0b \x0c \x0d \x20 x 
+Starting chars: \x09 \x0a \x0b \x0c \x0d \x20 x 
 
 /\sxxx\s/I8ST1
 Capturing subpattern count = 0
@@ -752,7 +752,7 @@ Options: utf
 No first char
 Need char = 'x'
 Subject length lower bound = 5
-Starting byte set: \x09 \x0a \x0c \x0d \x20 \x85 \xa0 
+Starting chars: \x09 \x0a \x0c \x0d \x20 \x85 \xa0 
     AB\x{85}xxx\x{a0}XYZ
  0: \x{85}xxx\x{a0}
     AB\x{a0}xxx\x{85}XYZ
@@ -764,7 +764,7 @@ Options: utf
 No first char
 Need char = ' '
 Subject length lower bound = 3
-Starting byte set: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0b \x0e 
+Starting chars: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0b \x0e 
   \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d 
   \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ 
   A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e 
@@ -803,7 +803,7 @@ Options: caseless utf
 First char = \x{1234}
 No need char
 Subject length lower bound = 1
-No set of starting bytes
+No starting char list
 
 /\x{1234}+?/iS8I
 Capturing subpattern count = 0
@@ -811,7 +811,7 @@ Options: caseless utf
 First char = \x{1234}
 No need char
 Subject length lower bound = 1
-No set of starting bytes
+No starting char list
 
 /\x{1234}++/iS8I
 Capturing subpattern count = 0
@@ -819,7 +819,7 @@ Options: caseless utf
 First char = \x{1234}
 No need char
 Subject length lower bound = 1
-No set of starting bytes
+No starting char list
 
 /\x{1234}{2}/iS8I
 Capturing subpattern count = 0
@@ -827,7 +827,7 @@ Options: caseless utf
 First char = \x{1234}
 Need char = \x{1234}
 Subject length lower bound = 2
-No set of starting bytes
+No starting char list
 
 /[^\x{c4}]/8DZ
 ------------------------------------------------------------------
@@ -860,7 +860,7 @@ Options: utf
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: \x0a \x0b \x0c \x0d \x85 \xff 
+Starting chars: \x0a \x0b \x0c \x0d \x85 \xff 
 
 /-- Check bad offset --/
 
index 89be3a4b0519d8a02749c82973968f06236ad04d..1525994db98a34e9e3a07372bda7c1b99faeffa9 100644 (file)
@@ -337,7 +337,7 @@ Options: utf
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a 
+Starting chars: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a 
   \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 
   \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 
   5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y 
@@ -376,7 +376,7 @@ Options: utf
 First char = \x{100}
 Need char = \x{100}
 Subject length lower bound = 3
-No set of starting bytes
+No starting char list
   \x{100}\x{100}\x{100}\x{100\x{100}
  0: \x{100}\x{100}\x{100}
 
@@ -396,7 +396,7 @@ Options: utf
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: x \xff 
+Starting chars: x \xff 
 
 /(\x{100}*a|x)/8SDZ
 ------------------------------------------------------------------
@@ -415,7 +415,7 @@ Options: utf
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: a x \xff 
+Starting chars: a x \xff 
 
 /(\x{100}{0,2}a|x)/8SDZ
 ------------------------------------------------------------------
@@ -434,7 +434,7 @@ Options: utf
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: a x \xff 
+Starting chars: a x \xff 
 
 /(\x{100}{1,2}a|x)/8SDZ
 ------------------------------------------------------------------
@@ -454,7 +454,7 @@ Options: utf
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: x \xff 
+Starting chars: x \xff 
 
 /\x{100}/8DZ
 ------------------------------------------------------------------
@@ -663,7 +663,7 @@ Options: utf
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: \x09 \x20 \xa0 \xff 
+Starting chars: \x09 \x20 \xa0 \xff 
     ABC\x{09}
  0: \x{09}
     ABC\x{20}
@@ -689,7 +689,7 @@ Options: utf
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: \x0a \x0b \x0c \x0d \x85 \xff 
+Starting chars: \x0a \x0b \x0c \x0d \x85 \xff 
     ABC\x{0a}
  0: \x{0a}
     ABC\x{0b}
@@ -709,19 +709,19 @@ Options: utf
 No first char
 Need char = 'A'
 Subject length lower bound = 1
-Starting byte set: \x09 \x20 A \xa0 \xff 
+Starting chars: \x09 \x20 A \xa0 \xff 
     CDBABC
  0: A
     \x{2000}ABC 
  0: \x{2000}A
 
-/\R*A/SI8
+/\R*A/SI8<bsr_unicode>
 Capturing subpattern count = 0
-Options: utf
+Options: bsr_unicode utf
 No first char
 Need char = 'A'
 Subject length lower bound = 1
-Starting byte set: \x0a \x0b \x0c \x0d A \x85 \xff 
+Starting chars: \x0a \x0b \x0c \x0d A \x85 \xff 
     CDBABC
  0: A
     \x{2028}A  
@@ -733,7 +733,7 @@ Options: utf
 No first char
 Need char = 'A'
 Subject length lower bound = 2
-Starting byte set: \x0a \x0b \x0c \x0d \x85 \xff 
+Starting chars: \x0a \x0b \x0c \x0d \x85 \xff 
 
 /\s?xxx\s/8SI
 Capturing subpattern count = 0
@@ -741,7 +741,7 @@ Options: utf
 No first char
 Need char = 'x'
 Subject length lower bound = 4
-Starting byte set: \x09 \x0a \x0b \x0c \x0d \x20 x 
+Starting chars: \x09 \x0a \x0b \x0c \x0d \x20 x 
 
 /\sxxx\s/I8ST1
 Capturing subpattern count = 0
@@ -749,7 +749,7 @@ Options: utf
 No first char
 Need char = 'x'
 Subject length lower bound = 5
-Starting byte set: \x09 \x0a \x0c \x0d \x20 \x85 \xa0 
+Starting chars: \x09 \x0a \x0c \x0d \x20 \x85 \xa0 
     AB\x{85}xxx\x{a0}XYZ
  0: \x{85}xxx\x{a0}
     AB\x{a0}xxx\x{85}XYZ
@@ -761,7 +761,7 @@ Options: utf
 No first char
 Need char = ' '
 Subject length lower bound = 3
-Starting byte set: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0b \x0e 
+Starting chars: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0b \x0e 
   \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d 
   \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ 
   A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e 
@@ -800,7 +800,7 @@ Options: caseless utf
 First char = \x{1234}
 No need char
 Subject length lower bound = 1
-No set of starting bytes
+No starting char list
 
 /\x{1234}+?/iS8I
 Capturing subpattern count = 0
@@ -808,7 +808,7 @@ Options: caseless utf
 First char = \x{1234}
 No need char
 Subject length lower bound = 1
-No set of starting bytes
+No starting char list
 
 /\x{1234}++/iS8I
 Capturing subpattern count = 0
@@ -816,7 +816,7 @@ Options: caseless utf
 First char = \x{1234}
 No need char
 Subject length lower bound = 1
-No set of starting bytes
+No starting char list
 
 /\x{1234}{2}/iS8I
 Capturing subpattern count = 0
@@ -824,7 +824,7 @@ Options: caseless utf
 First char = \x{1234}
 Need char = \x{1234}
 Subject length lower bound = 2
-No set of starting bytes
+No starting char list
 
 /[^\x{c4}]/8DZ
 ------------------------------------------------------------------
@@ -857,7 +857,7 @@ Options: utf
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: \x0a \x0b \x0c \x0d \x85 \xff 
+Starting chars: \x0a \x0b \x0c \x0d \x85 \xff 
 
 /-- Check bad offset --/
 
index ccc198cc153076376e4e59ed0716e3927d5b9ddf..21fe677900b2d24b0cd80863323db86e2f34f033 100644 (file)
@@ -55,7 +55,7 @@ Options: caseless utf
 First char = \x{401} (caseless)
 Need char = \x{42f} (caseless)
 Subject length lower bound = 17
-No set of starting bytes
+No starting char list
     \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}
  0: \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}
     \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f}
index 844497abcdcea57069c95bd59bb970e54114c69b..b6da7df187ee5e83f22907e288941cbfaf92b1f3 100644 (file)
@@ -178,7 +178,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 3
-Starting byte set: c d e 
+Starting chars: c d e 
     this sentence eventually mentions a cat
  0: cat
     this sentences rambles on and on for a while and then reaches elephant
@@ -190,7 +190,7 @@ Options: caseless
 No first char
 No need char
 Subject length lower bound = 3
-Starting byte set: C D E c d e 
+Starting chars: C D E c d e 
     this sentence eventually mentions a CAT cat
  0: CAT
     this sentences rambles on and on for a while to elephant ElePhant
@@ -202,7 +202,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: a b c d 
+Starting chars: a b c d 
 
 /(a|[^\dZ])/IS
 Capturing subpattern count = 1
@@ -210,7 +210,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a 
+Starting chars: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a 
   \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 
   \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = > 
   ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y [ \ ] ^ _ ` a b c d 
@@ -231,7 +231,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: \x09 \x0a \x0b \x0c \x0d \x20 a b 
+Starting chars: \x09 \x0a \x0b \x0c \x0d \x20 a b 
 
 /(ab\2)/
 Failed: reference to non-existent subpattern at offset 6
@@ -512,7 +512,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: a b c d 
+Starting chars: a b c d 
 
 /(?i)[abcd]/IS
 Capturing subpattern count = 0
@@ -520,7 +520,7 @@ Options: caseless
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: A B C D a b c d 
+Starting chars: A B C D a b c d 
 
 /(?m)[xy]|(b|c)/IS
 Capturing subpattern count = 1
@@ -528,7 +528,7 @@ Options: multiline
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: b c x y 
+Starting chars: b c x y 
 
 /(^a|^b)/Im
 Capturing subpattern count = 1
@@ -591,7 +591,7 @@ No options
 First char = 'b' (caseless)
 No need char
 Subject length lower bound = 1
-No set of starting bytes
+No starting char list
 
 /(a*b|(?i:c*(?-i)d))/IS
 Capturing subpattern count = 1
@@ -599,7 +599,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: C a b c d 
+Starting chars: C a b c d 
 
 /a$/I
 Capturing subpattern count = 0
@@ -666,7 +666,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: a b 
+Starting chars: a b 
 
 /(?<!foo)(alpha|omega)/IS
 Capturing subpattern count = 1
@@ -675,7 +675,7 @@ No options
 No first char
 Need char = 'a'
 Subject length lower bound = 5
-Starting byte set: a o 
+Starting chars: a o 
 
 /(?!alphabet)[ab]/IS
 Capturing subpattern count = 0
@@ -683,7 +683,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: a b 
+Starting chars: a b 
 
 /(?<=foo\n)^bar/Im
 Capturing subpattern count = 0
@@ -1642,7 +1642,7 @@ Options: anchored
 No first char
 Need char = 'd'
 Subject length lower bound = 4
-No set of starting bytes
+No starting char list
 
 /\(             # ( at start
   (?:           # Non-capturing bracket
@@ -1875,7 +1875,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 
+Starting chars: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 
   _ a b c d e f g h i j k l m n o p q r s t u v w x y z 
 
 /^[[:ascii:]]/DZ
@@ -1937,7 +1937,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: \x09 \x0a \x0b \x0c \x0d \x20 
+Starting chars: \x09 \x0a \x0b \x0c \x0d \x20 
 
 /^[[:cntrl:]]/DZ
 ------------------------------------------------------------------
@@ -3178,6 +3178,10 @@ Failed: PCRE does not support \L, \l, \N{name}, \U, or \u at offset 1
 /\U/I
 Failed: PCRE does not support \L, \l, \N{name}, \U, or \u at offset 1
 
+/a{1,3}b/U
+    ab
+ 0: ab
+
 /[/I
 Failed: missing terminating ] for character class at offset 1
 
@@ -3434,7 +3438,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: a b 
+Starting chars: a b 
 
 /[^a]/I
 Capturing subpattern count = 0
@@ -3454,7 +3458,7 @@ No options
 No first char
 Need char = '6'
 Subject length lower bound = 4
-Starting byte set: 0 1 2 3 4 5 6 7 8 9 
+Starting chars: 0 1 2 3 4 5 6 7 8 9 
 
 /a^b/I
 Capturing subpattern count = 0
@@ -3488,7 +3492,7 @@ Options: caseless
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: A B a b 
+Starting chars: A B a b 
 
 /[ab](?i)cd/IS
 Capturing subpattern count = 0
@@ -3496,7 +3500,7 @@ No options
 No first char
 Need char = 'd' (caseless)
 Subject length lower bound = 3
-Starting byte set: a b 
+Starting chars: a b 
 
 /abc(?C)def/I
 Capturing subpattern count = 0
@@ -3537,7 +3541,7 @@ No options
 No first char
 Need char = 'f'
 Subject length lower bound = 7
-Starting byte set: 0 1 2 3 4 5 6 7 8 9 
+Starting chars: 0 1 2 3 4 5 6 7 8 9 
     1234abcdef
 --->1234abcdef
   1 ^              \d
@@ -3856,7 +3860,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: a b 
+Starting chars: a b 
 
 /(?R)/I
 Failed: recursive call could loop indefinitely at offset 3
@@ -4637,7 +4641,7 @@ Options: caseless
 No first char
 Need char = 'g' (caseless)
 Subject length lower bound = 8
-No set of starting bytes
+No starting char list
      Baby Bjorn Active Carrier - With free SHIPPING!!
  0: Baby Bjorn Active Carrier - With free SHIPPING!!
  1: Baby Bjorn Active Carrier - With free SHIPPING!!
@@ -4656,7 +4660,7 @@ No options
 No first char
 Need char = 'b'
 Subject length lower bound = 1
-No set of starting bytes
+No starting char list
 
 /(a|b)*.?c/ISDZ
 ------------------------------------------------------------------
@@ -4677,7 +4681,7 @@ No options
 No first char
 Need char = 'c'
 Subject length lower bound = 1
-No set of starting bytes
+No starting char list
 
 /abc(?C255)de(?C)f/DZ
 ------------------------------------------------------------------
@@ -4750,7 +4754,7 @@ Options:
 No first char
 Need char = 'b'
 Subject length lower bound = 1
-Starting byte set: a b 
+Starting chars: a b 
   ab
 --->ab
  +0 ^      a*
@@ -4893,7 +4897,7 @@ Options:
 No first char
 Need char = 'x'
 Subject length lower bound = 4
-Starting byte set: a d 
+Starting chars: a d 
   abcx
 --->abcx
  +0 ^        (abc|def)
@@ -5127,7 +5131,7 @@ Options:
 No first char
 No need char
 Subject length lower bound = 2
-Starting byte set: a b x 
+Starting chars: a b x 
     Note: that { does NOT introduce a quantifier
 --->Note: that { does NOT introduce a quantifier
  +0         ^                                        ([ab]{,4}c|xy)
@@ -5607,7 +5611,7 @@ No options
 First char = 'a'
 Need char = 'c'
 Subject length lower bound = 3
-No set of starting bytes
+No starting char list
 Compiled pattern written to testsavedregex
 Study data written to testsavedregex
 <testsavedregex
@@ -5642,7 +5646,7 @@ No options
 First char = 'a'
 Need char = 'c'
 Subject length lower bound = 3
-No set of starting bytes
+No starting char list
 Compiled pattern written to testsavedregex
 Study data written to testsavedregex
 <testsavedregex
@@ -5677,7 +5681,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: a b 
+Starting chars: a b 
 Compiled pattern written to testsavedregex
 Study data written to testsavedregex
 <testsavedregex
@@ -5716,7 +5720,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: a b 
+Starting chars: a b 
 Compiled pattern written to testsavedregex
 Study data written to testsavedregex
 <testsavedregex
@@ -6431,7 +6435,7 @@ No options
 No first char
 Need char = ','
 Subject length lower bound = 1
-Starting byte set: \x09 \x0a \x0b \x0c \x0d \x20 , 
+Starting chars: \x09 \x0a \x0b \x0c \x0d \x20 , 
     \x0b,\x0b
  0: \x0b,\x0b
     \x0c,\x0d
@@ -6738,7 +6742,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: C a b c d 
+Starting chars: C a b c d 
 
 /()[ab]xyz/IS
 Capturing subpattern count = 1
@@ -6746,7 +6750,7 @@ No options
 No first char
 Need char = 'z'
 Subject length lower bound = 4
-Starting byte set: a b 
+Starting chars: a b 
 
 /(|)[ab]xyz/IS
 Capturing subpattern count = 1
@@ -6754,7 +6758,7 @@ No options
 No first char
 Need char = 'z'
 Subject length lower bound = 4
-Starting byte set: a b 
+Starting chars: a b 
 
 /(|c)[ab]xyz/IS
 Capturing subpattern count = 1
@@ -6762,7 +6766,7 @@ No options
 No first char
 Need char = 'z'
 Subject length lower bound = 4
-Starting byte set: a b c 
+Starting chars: a b c 
 
 /(|c?)[ab]xyz/IS
 Capturing subpattern count = 1
@@ -6770,7 +6774,7 @@ No options
 No first char
 Need char = 'z'
 Subject length lower bound = 4
-Starting byte set: a b c 
+Starting chars: a b c 
 
 /(d?|c?)[ab]xyz/IS
 Capturing subpattern count = 1
@@ -6778,7 +6782,7 @@ No options
 No first char
 Need char = 'z'
 Subject length lower bound = 4
-Starting byte set: a b c d 
+Starting chars: a b c d 
 
 /(d?|c)[ab]xyz/IS
 Capturing subpattern count = 1
@@ -6786,7 +6790,7 @@ No options
 No first char
 Need char = 'z'
 Subject length lower bound = 4
-Starting byte set: a b c d 
+Starting chars: a b c d 
 
 /^a*b\d/DZ
 ------------------------------------------------------------------
@@ -6879,7 +6883,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: a b c d 
+Starting chars: a b c d 
 
 /(a+|b*)[cd]/IS
 Capturing subpattern count = 1
@@ -6887,7 +6891,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: a b c d 
+Starting chars: a b c d 
 
 /(a*|b+)[cd]/IS
 Capturing subpattern count = 1
@@ -6895,7 +6899,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: a b c d 
+Starting chars: a b c d 
 
 /(a+|b+)[cd]/IS
 Capturing subpattern count = 1
@@ -6903,7 +6907,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 2
-Starting byte set: a b 
+Starting chars: a b 
 
 /((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((
  ((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((
@@ -9307,7 +9311,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: x y z 
+Starting chars: x y z 
 
 /(?(?=.*b)b|^)/CI
 Capturing subpattern count = 0
@@ -10096,7 +10100,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 2
-Starting byte set: a b 
+Starting chars: a b 
 
 /(a|bc)\1{2,3}/SI
 Capturing subpattern count = 1
@@ -10105,7 +10109,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 3
-Starting byte set: a b 
+Starting chars: a b 
 
 /(a|bc)(?1)/SI
 Capturing subpattern count = 1
@@ -10113,7 +10117,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 2
-Starting byte set: a b 
+Starting chars: a b 
 
 /(a|b\1)(a|b\1)/SI
 Capturing subpattern count = 2
@@ -10122,7 +10126,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 2
-Starting byte set: a b 
+Starting chars: a b 
 
 /(a|b\1){2}/SI
 Capturing subpattern count = 1
@@ -10131,7 +10135,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 2
-Starting byte set: a b 
+Starting chars: a b 
 
 /(a|bbbb\1)(a|bbbb\1)/SI
 Capturing subpattern count = 2
@@ -10140,7 +10144,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 2
-Starting byte set: a b 
+Starting chars: a b 
 
 /(a|bbbb\1){2}/SI
 Capturing subpattern count = 1
@@ -10149,7 +10153,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 2
-Starting byte set: a b 
+Starting chars: a b 
 
 /^From +([^ ]+) +[a-zA-Z][a-zA-Z][a-zA-Z] +[a-zA-Z][a-zA-Z][a-zA-Z] +[0-9]?[0-9] +[0-9][0-9]:[0-9][0-9]/SI
 Capturing subpattern count = 1
@@ -10157,7 +10161,7 @@ Options: anchored
 No first char
 Need char = ':'
 Subject length lower bound = 22
-No set of starting bytes
+No starting char list
 
 /<tr([\w\W\s\d][^<>]{0,})><TD([\w\W\s\d][^<>]{0,})>([\d]{0,}\.)(.*)((<BR>([\w\W\s\d][^<>]{0,})|[\s]{0,}))<\/a><\/TD><TD([\w\W\s\d][^<>]{0,})>([\w\W\s\d][^<>]{0,})<\/TD><TD([\w\W\s\d][^<>]{0,})>([\w\W\s\d][^<>]{0,})<\/TD><\/TR>/isIS
 Capturing subpattern count = 11
@@ -10165,7 +10169,7 @@ Options: caseless dotall
 First char = '<'
 Need char = '>'
 Subject length lower bound = 47
-No set of starting bytes
+No starting char list
 
 "(?>.*/)foo"SI
 Capturing subpattern count = 0
@@ -10173,7 +10177,7 @@ No options
 No first char
 Need char = 'o'
 Subject length lower bound = 4
-No set of starting bytes
+No starting char list
 
 /(?(?=[^a-z]+[a-z])  \d{2}-[a-z]{3}-\d{2}  |  \d{2}-\d{2}-\d{2} ) /xSI
 Capturing subpattern count = 0
@@ -10181,7 +10185,7 @@ Options: extended
 No first char
 Need char = '-'
 Subject length lower bound = 8
-No set of starting bytes
+No starting char list
 
 /(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))/iSI
 Capturing subpattern count = 1
@@ -10189,7 +10193,7 @@ Options: caseless
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: A B C a b c 
+Starting chars: A B C a b c 
 
 /(?:c|d)(?:)(?:aaaaaaaa(?:)(?:bbbbbbbb)(?:bbbbbbbb(?:))(?:bbbbbbbb(?:)(?:bbbbbbbb)))/SI
 Capturing subpattern count = 0
@@ -10197,7 +10201,7 @@ No options
 No first char
 Need char = 'b'
 Subject length lower bound = 41
-Starting byte set: c d 
+Starting chars: c d 
 
 /<a[\s]+href[\s]*=[\s]*          # find <a href=
  ([\"\'])?                       # find single or double quote
@@ -10210,7 +10214,7 @@ Options: caseless extended dotall
 First char = '<'
 Need char = '='
 Subject length lower bound = 9
-No set of starting bytes
+No starting char list
 
 /^(?!:)                       # colon disallowed at start
   (?:                         # start of item
@@ -10226,7 +10230,7 @@ Options: anchored caseless extended
 No first char
 Need char = ':'
 Subject length lower bound = 2
-No set of starting bytes
+No starting char list
 
 /(?|(?<a>A)|(?<a>B))/I
 Capturing subpattern count = 1
@@ -10450,7 +10454,7 @@ Options:
 No first char
 Need char = 'a'
 Subject length lower bound = 1
-No set of starting bytes
+No starting char list
     cat
  0: a
  1: 
@@ -10464,7 +10468,7 @@ No options
 No first char
 Need char = 'a'
 Subject length lower bound = 3
-No set of starting bytes
+No starting char list
     cat
 No match
 
@@ -10476,7 +10480,7 @@ No options
 First char = 'i'
 No need char
 Subject length lower bound = 1
-No set of starting bytes
+No starting char list
     i
  0: i
     
@@ -10486,7 +10490,7 @@ No options
 No first char
 Need char = 'i'
 Subject length lower bound = 1
-Starting byte set: i 
+Starting chars: i 
     ia
  0: ia
  1: 
@@ -11080,7 +11084,7 @@ No options
 First char = 'a'
 Need char = '4'
 Subject length lower bound = 5
-No set of starting bytes
+No starting char list
 
 /([abc])++1234/SI
 Capturing subpattern count = 1
@@ -11088,7 +11092,7 @@ No options
 No first char
 Need char = '4'
 Subject length lower bound = 5
-Starting byte set: a b c 
+Starting chars: a b c 
 
 /(?<=(abc)+)X/
 Failed: lookbehind assertion is not fixed length at offset 10
@@ -11369,7 +11373,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 1
-No set of starting bytes
+No starting char list
 
 /(a(?2)|b)(b(?1)|a)(?:(?1)|(?2))/SI
 Capturing subpattern count = 2
@@ -11377,7 +11381,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 3
-Starting byte set: a b 
+Starting chars: a b 
 
 /(a(?2)|b)(b(?1)|a)(?1)(?2)/SI
 Capturing subpattern count = 2
@@ -11385,7 +11389,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 4
-Starting byte set: a b 
+Starting chars: a b 
 
 /(abc)(?1)/SI
 Capturing subpattern count = 1
@@ -11393,7 +11397,7 @@ No options
 First char = 'a'
 Need char = 'c'
 Subject length lower bound = 6
-No set of starting bytes
+No starting char list
 
 /^(?>a)++/
     aa\M
@@ -11711,7 +11715,7 @@ No options
 First char = 't'
 Need char = 't'
 Subject length lower bound = 18
-No set of starting bytes
+No starting char list
 
 /\btype\b\W*?\btext\b\W*?\bjavascript\b|\burl\b\W*?\bshell:|<input\b.*?\btype\b\W*?\bimage\b|\bonkeyup\b\W*?\=/IS
 Capturing subpattern count = 0
@@ -11720,7 +11724,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 8
-Starting byte set: < o t u 
+Starting chars: < o t u 
 
 /a(*SKIP)c|b(*ACCEPT)|/+S!I
 Capturing subpattern count = 0
@@ -11729,7 +11733,7 @@ No options
 No first char
 No need char
 Subject length lower bound = -1
-No set of starting bytes
+No starting char list
     a
  0: 
  0+ 
@@ -11740,7 +11744,7 @@ No options
 No first char
 No need char
 Subject length lower bound = -1
-Starting byte set: a b x 
+Starting chars: a b x 
     ax
  0: x
 
@@ -12436,7 +12440,7 @@ No options
 No first char
 No need char
 Subject length lower bound = -1
-No set of starting bytes
+No starting char list
 
 /(?:(a)+(?C1)bb|aa(?C2)b)/
     aab\C+
@@ -12722,7 +12726,7 @@ No options
 No first char
 Need char = 'z'
 Subject length lower bound = 2
-Starting byte set: a z 
+Starting chars: a z 
     aaaaaaaaaaaaaz
 Error -21 (recursion limit exceeded)
     aaaaaaaaaaaaaz\Q1000
@@ -12735,7 +12739,7 @@ No options
 No first char
 Need char = 'z'
 Subject length lower bound = 2
-Starting byte set: a z 
+Starting chars: a z 
     aaaaaaaaaaaaaz
 Error -21 (recursion limit exceeded)
 
@@ -12746,7 +12750,7 @@ No options
 No first char
 Need char = 'z'
 Subject length lower bound = 2
-Starting byte set: a z 
+Starting chars: a z 
     aaaaaaaaaaaaaz
 No match
     aaaaaaaaaaaaaz\Q10
@@ -12790,7 +12794,7 @@ Options: dupnames
 First char = 'a'
 Need char = 'z'
 Subject length lower bound = 5
-No set of starting bytes
+No starting char list
 
 /a*[bcd]/BZ
 ------------------------------------------------------------------
@@ -13902,7 +13906,7 @@ No options
 No first char
 Need char = 'd'
 Subject length lower bound = 1
-Starting byte set: a b c d 
+Starting chars: a b c d 
 
 /[a-c]+d/DZS
 ------------------------------------------------------------------
@@ -13917,7 +13921,7 @@ No options
 No first char
 Need char = 'd'
 Subject length lower bound = 2
-Starting byte set: a b c 
+Starting chars: a b c 
 
 /[a-c]?d/DZS
 ------------------------------------------------------------------
@@ -13932,7 +13936,7 @@ No options
 No first char
 Need char = 'd'
 Subject length lower bound = 1
-Starting byte set: a b c d 
+Starting chars: a b c d 
 
 /[a-c]{4,6}d/DZS
 ------------------------------------------------------------------
@@ -13947,7 +13951,7 @@ No options
 No first char
 Need char = 'd'
 Subject length lower bound = 5
-Starting byte set: a b c 
+Starting chars: a b c 
 
 /[a-c]{0,6}d/DZS
 ------------------------------------------------------------------
@@ -13962,7 +13966,7 @@ No options
 No first char
 Need char = 'd'
 Subject length lower bound = 1
-Starting byte set: a b c d 
+Starting chars: a b c d 
 
 /-- End of special auto-possessive tests --/
 
@@ -14125,4 +14129,24 @@ No match
 /[a[:<:]] should give error/ 
 Failed: unknown POSIX class name at offset 4
 
+/(?=ab\K)/+
+    abcd
+Start of matched string is beyond its end - displaying from end to start.
+ 0: ab
+ 0+ abcd
+
+/abcd/f<lf>
+    xx\nxabcd
+No match
+    
+/ -- Test stack check external calls --/ 
+
+/(((((a)))))/Q0
+
+/(((((a)))))/Q1
+Failed: parentheses are too deeply nested (stack check) at offset 0
+
+/(((((a)))))/Q
+** Missing 0 or 1 after /Q
+
 /-- End of testinput2 --/
index 0e21350f891e1c368ab04e56a3a8b8ce1dd2ef79..da194d90e0ba255b0f767dee038062326444cba2 100644 (file)
@@ -50,7 +50,7 @@ Options: anchored extended
 No first char
 No need char
 Subject length lower bound = 6
-No set of starting bytes
+No starting char list
 
 <!testsaved16BE-1
 Compiled pattern loaded from testsaved16BE-1
@@ -83,7 +83,7 @@ Options: anchored extended
 No first char
 No need char
 Subject length lower bound = 6
-No set of starting bytes
+No starting char list
 
 <!testsaved32LE-1
 Compiled pattern loaded from testsaved32LE-1
index 183487aca13e10a7b8388bdc1904e72b5d34a993..d087bb6f4dd494ca59aaa0f937a2a6637d416158 100644 (file)
@@ -62,7 +62,7 @@ Options: anchored extended
 No first char
 No need char
 Subject length lower bound = 6
-No set of starting bytes
+No starting char list
 
 <!testsaved32BE-1
 Compiled pattern loaded from testsaved32BE-1
@@ -95,6 +95,6 @@ Options: anchored extended
 No first char
 No need char
 Subject length lower bound = 6
-No set of starting bytes
+No starting char list
 
 /-- End of testinput21 --/
index f896b13e18a77a162d15cbec48ca8cc6b14c446e..32a71cd44384a2e7084a6e05624d1b27fc5a5e9c 100644 (file)
@@ -37,7 +37,7 @@ Options: extended utf
 No first char
 No need char
 Subject length lower bound = 2
-No set of starting bytes
+No starting char list
 
 <!testsaved16BE-2
 Compiled pattern loaded from testsaved16BE-2
@@ -64,7 +64,7 @@ Options: extended utf
 No first char
 No need char
 Subject length lower bound = 2
-No set of starting bytes
+No starting char list
 
 <!testsaved32LE-2
 Compiled pattern loaded from testsaved32LE-2
index 783926b821015115b456754f02c14b2a3fecf38b..13e441d159c748b38c9c65b68b921a4f92a5a707 100644 (file)
@@ -49,7 +49,7 @@ Options: extended utf
 No first char
 No need char
 Subject length lower bound = 2
-No set of starting bytes
+No starting char list
 
 <!testsaved32BE-2
 Compiled pattern loaded from testsaved32BE-2
@@ -76,6 +76,6 @@ Options: extended utf
 No first char
 No need char
 Subject length lower bound = 2
-No set of starting bytes
+No starting char list
 
 /-- End of testinput22 --/
index 6f5384c34e8c00f886c710234b4b3a86f3141085..6dabf03b0faddad7fb842e0766751d5ec2fccb69 100644 (file)
@@ -18,7 +18,7 @@ Failed: character value in \x{} or \o{} is too large at offset 8
 /[\H]/BZSI
 ------------------------------------------------------------------
         Bra
-        [\x00-\x08\x0a-\x1f!-\x9f\x{a1}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{ffff}]
+        [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{ffff}]
         Ket
         End
 ------------------------------------------------------------------
@@ -27,12 +27,25 @@ No options
 No first char
 No need char
 Subject length lower bound = 1
-No set of starting bytes
+Starting chars: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0a \x0b 
+  \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a 
+  \x1b \x1c \x1d \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 
+  : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ 
+  _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 
+  \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f 
+  \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e 
+  \x9f \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae 
+  \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd 
+  \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc 
+  \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb 
+  \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea 
+  \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 
+  \xfa \xfb \xfc \xfd \xfe \xff 
 
 /[\V]/BZSI
 ------------------------------------------------------------------
         Bra
-        [\x00-\x09\x0e-\x84\x{86}-\x{2027}\x{202a}-\x{ffff}]
+        [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{ffff}]
         Ket
         End
 ------------------------------------------------------------------
@@ -41,6 +54,19 @@ No options
 No first char
 No need char
 Subject length lower bound = 1
-No set of starting bytes
+Starting chars: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0e 
+  \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d 
+  \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > 
+  ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c 
+  d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 
+  \x83 \x84 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 
+  \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 
+  \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 
+  \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf 
+  \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce 
+  \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd 
+  \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec 
+  \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb 
+  \xfc \xfd \xfe \xff 
 
 /-- End of testinput23 --/
index 7ad3378368ff10ce894566df8497a68d5820e688..4c62c8d8079c518fd5193c5bfcdaaf2225e386f5 100644 (file)
@@ -1,6 +1,6 @@
 /-- Tests for the 32-bit library only */
 
-< forbid 8w
+< forbid 8W
 
 /-- Check maximum character size --/
 
@@ -65,7 +65,7 @@ Need char = \x{800000}
 /[\H]/BZSI
 ------------------------------------------------------------------
         Bra
-        [\x00-\x08\x0a-\x1f!-\x9f\x{a1}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{ffffffff}]
+        [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{ffffffff}]
         Ket
         End
 ------------------------------------------------------------------
@@ -74,12 +74,25 @@ No options
 No first char
 No need char
 Subject length lower bound = 1
-No set of starting bytes
+Starting chars: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0a \x0b 
+  \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a 
+  \x1b \x1c \x1d \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 
+  : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ 
+  _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 
+  \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f 
+  \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e 
+  \x9f \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae 
+  \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd 
+  \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc 
+  \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb 
+  \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea 
+  \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 
+  \xfa \xfb \xfc \xfd \xfe \xff 
 
 /[\V]/BZSI
 ------------------------------------------------------------------
         Bra
-        [\x00-\x09\x0e-\x84\x{86}-\x{2027}\x{202a}-\x{ffffffff}]
+        [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{ffffffff}]
         Ket
         End
 ------------------------------------------------------------------
@@ -88,6 +101,19 @@ No options
 No first char
 No need char
 Subject length lower bound = 1
-No set of starting bytes
+Starting chars: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0e 
+  \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d 
+  \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > 
+  ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c 
+  d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 
+  \x83 \x84 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 
+  \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 
+  \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 
+  \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf 
+  \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce 
+  \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd 
+  \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec 
+  \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb 
+  \xfc \xfd \xfe \xff 
 
 /-- End of testinput25 --/
index 12ffc9911b63e79ff89d5c4b2bfd051ca4b268d6..73119ab4b7bd4749e3d8f725f5c6c6cc2979b400 100644 (file)
@@ -1,7 +1,10 @@
-/-- This set of tests checks local-specific features, using the fr_FR locale. 
-    It is not Perl-compatible. There is different version called wintestinput3
-  f  or use on Windows, where the locale is called "french". --/
-  
+/-- This set of tests checks local-specific features, using the "fr_FR" locale. 
+    It is not Perl-compatible. When run via RunTest, the locale is edited to
+    be whichever of "fr_FR", "french", or "fr" is found to exist. There is
+    different version of this file called wintestinput3 for use on Windows,
+    where the locale is called "french" and the tests are run using
+    RunTest.bat. --/
+
 < forbid 8W 
 
 /^[\w]+/
@@ -90,7 +93,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P 
+Starting chars: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P 
   Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z 
 
 /\w/ISLfr_FR
@@ -99,7 +102,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P 
+Starting chars: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P 
   Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z 
   ª µ º À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï Ð Ñ Ò Ó Ô Õ Ö Ø Ù Ú Û Ü Ý Þ ß à á â 
   ã ä å æ ç è é ê ë ì í î ï ð ñ ò ó ô õ ö ø ù ú û ü ý þ ÿ 
index 0dbec4eccab9689a0dcb59067f21fe5e9d7fd835..dcf13b08507e89d5b66377b7f0e749d5d588bef4 100644 (file)
@@ -1263,4 +1263,12 @@ No match
     aa
  0: aa
 
+/^.\B.\B./8
+    \x{10123}\x{10124}\x{10125}
+ 0: \x{10123}\x{10124}\x{10125}
+
+/^#[^\x{ffff}]#[^\x{ffff}]#[^\x{ffff}]#/8
+    #\x{10000}#\x{100}#\x{10ffff}#
+ 0: #\x{10000}#\x{100}#\x{10ffff}#
+
 /-- End of testinput4 --/
index 3fa581052e6b23dc0b254849ea9d8e4664deb19e..5c098e650ba7e8ee2391cfc6e5fa4ab145429d13 100644 (file)
@@ -270,7 +270,7 @@ No match
 /[z-\x{100}]/8DZ
 ------------------------------------------------------------------
         Bra
-        [z-\x{100}]
+        [z-\xff\x{100}]
         Ket
         End
 ------------------------------------------------------------------
@@ -812,7 +812,7 @@ No match
 /[\H]/8BZ
 ------------------------------------------------------------------
         Bra
-        [\x00-\x08\x0a-\x1f!-\x9f\x{a1}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{10ffff}]
+        [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{10ffff}]
         Ket
         End
 ------------------------------------------------------------------
@@ -820,7 +820,7 @@ No match
 /[\V]/8BZ
 ------------------------------------------------------------------
         Bra
-        [\x00-\x09\x0e-\x84\x{86}-\x{2027}\x{202a}-\x{10ffff}]
+        [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{10ffff}]
         Ket
         End
 ------------------------------------------------------------------
@@ -1536,7 +1536,7 @@ Options: caseless utf
 No first char
 No need char
 Subject length lower bound = 1
-No set of starting bytes
+No starting char list
 
 /[^\x{1234}]+?/iS8I   
 Capturing subpattern count = 0
@@ -1544,7 +1544,7 @@ Options: caseless utf
 No first char
 No need char
 Subject length lower bound = 1
-No set of starting bytes
+No starting char list
 
 /[^\x{1234}]++/iS8I   
 Capturing subpattern count = 0
@@ -1552,7 +1552,7 @@ Options: caseless utf
 No first char
 No need char
 Subject length lower bound = 1
-No set of starting bytes
+No starting char list
 
 /[^\x{1234}]{2}/iS8I
 Capturing subpattern count = 0
@@ -1560,7 +1560,7 @@ Options: caseless utf
 No first char
 No need char
 Subject length lower bound = 2
-No set of starting bytes
+No starting char list
 
 //<bsr_anycrlf><bsr_unicode>
 Failed: inconsistent NEWLINE options at offset 0
@@ -1620,7 +1620,7 @@ Failed: disallowed Unicode code point (>= 0xd800 && <= 0xdfff) at offset 7
 /[\H\x{d7ff}]+/8BZ
 ------------------------------------------------------------------
         Bra
-        [\x00-\x08\x0a-\x1f!-\x9f\x{a1}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{10ffff}\x{d7ff}]++
+        [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{10ffff}\x{d7ff}]++
         Ket
         End
 ------------------------------------------------------------------
@@ -1660,7 +1660,7 @@ Failed: disallowed Unicode code point (>= 0xd800 && <= 0xdfff) at offset 7
 /[\V\x{d7ff}]+/8BZ
 ------------------------------------------------------------------
         Bra
-        [\x00-\x09\x0e-\x84\x{86}-\x{2027}\x{202a}-\x{10ffff}\x{d7ff}]++
+        [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{10ffff}\x{d7ff}]++
         Ket
         End
 ------------------------------------------------------------------
@@ -1882,4 +1882,19 @@ Failed: disallowed Unicode code point (>= 0xd800 && <= 0xdfff) at offset 5
     aa
  0: aa
 
+/[b-d\x{200}-\x{250}]*[ae-h]?#[\x{200}-\x{250}]{0,8}[\x00-\xff]*#[\x{200}-\x{250}]+[a-z]/8BZ
+------------------------------------------------------------------
+        Bra
+        [b-d\x{200}-\x{250}]*+
+        [ae-h]?+
+        #
+        [\x{200}-\x{250}]{0,8}+
+        [\x00-\xff]*
+        #
+        [\x{200}-\x{250}]++
+        [a-z]
+        Ket
+        End
+------------------------------------------------------------------
+
 /-- End of testinput5 --/
index 6c42fce1a5ba7574db4f8c7c993bef24a9049622..f355e601383d8420b0011220739f8abe8572408a 100644 (file)
@@ -2445,4 +2445,16 @@ No match
     \x{37e} 
 No match
 
+/[RST]+/8iW
+    Ss\x{17f}
+ 0: Ss\x{17f}
+    
+/[R-T]+/8iW 
+    Ss\x{17f}
+ 0: Ss\x{17f}
+
+/[q-u]+/8iW 
+    Ss\x{17f}
+ 0: Ss\x{17f}
+
 /-- End of testinput6 --/
index 45ac72fd8d4031f50ae6d5c67a8d8811717873d2..c64e049942117b131ca87fa30d15196b2e574c4e 100644 (file)
@@ -124,7 +124,7 @@ No match
 /[z-\x{100}]/8iDZ 
 ------------------------------------------------------------------
         Bra
-        [Z\x{39c}\x{3bc}\x{1e9e}\x{178}z-\x{101}]
+        [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}]
         Ket
         End
 ------------------------------------------------------------------
@@ -162,7 +162,7 @@ No match
 /[z-\x{100}]/8DZi
 ------------------------------------------------------------------
         Bra
-        [Z\x{39c}\x{3bc}\x{1e9e}\x{178}z-\x{101}]
+        [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}]
         Ket
         End
 ------------------------------------------------------------------
@@ -2263,4 +2263,28 @@ No match
         End
 ------------------------------------------------------------------
 
+/[RST]+/8iWBZ
+------------------------------------------------------------------
+        Bra
+        [R-Tr-t\x{17f}]++
+        Ket
+        End
+------------------------------------------------------------------
+    
+/[R-T]+/8iWBZ 
+------------------------------------------------------------------
+        Bra
+        [R-Tr-t\x{17f}]++
+        Ket
+        End
+------------------------------------------------------------------
+
+/[Q-U]+/8iWBZ 
+------------------------------------------------------------------
+        Bra
+        [Q-Uq-u\x{17f}]++
+        Ket
+        End
+------------------------------------------------------------------
+
 /-- End of testinput7 --/
index bb68d3e64525945ba44e4d7561a99c5cf05cdb53..3861ea41fdb764a78ae7388ad849e9b1369d6a6a 100644 (file)
@@ -7232,7 +7232,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 3
-Starting byte set: a d x 
+Starting chars: a d x 
     terhjk;abcdaadsfe
  0: abc
     the quick xyz brown fox 
index 00880070670a3695ee9ed4bbb5e2ed11b161c82f..456ad196b56454a16672ad4694b618f4dd275521 100644 (file)
@@ -84,7 +84,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P 
+Starting chars: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P 
   Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z 
 
 /\w/ISLfrench
@@ -93,7 +93,7 @@ No options
 No first char
 No need char
 Subject length lower bound = 1
-Starting byte set: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P 
+Starting chars: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P 
   Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z 
   \83 \8a \8c \8e \9a \9c \9e \9f ª ² ³ µ ¹ º À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï Ð Ñ Ò Ó Ô Õ Ö 
   Ø Ù Ú Û Ü Ý Þ ß à á â ã ä å æ ç è é ê ë ì í î ï ð ñ ò ó ô õ ö ø ù ú û ü ý 
index 2d39b6ea385e5a7fd216a09a857dfaeaed050ee9..a16b4fbe71895494610c41acf032b42a2443e40d 100644 (file)
@@ -1,6 +1,7 @@
 --TEST--
 preg_replace() with array of failing regular expressions
 --INI--
+pcre.jit=0
 pcre.backtrack_limit=100000
 --FILE--
 <?php
index 7dee7ba4e7236e640e7b0337d74ecc4e2ca7faee..294931388d6fcf31ae3c492ac76b9c2a81b17bf0 100644 (file)
@@ -7,6 +7,7 @@ if (@preg_match_all('/\p{N}/', '0123456789', $dummy) === false) {
 }
 ?>
 --INI--
+pcre.jit=0
 pcre.recursion_limit=2
 --FILE--
 <?php