]> granicus.if.org Git - php/commitdiff
Upgrade to PCRE 8.37 due to various bugfixes
authorStanislav Malyshev <stas@php.net>
Thu, 30 Apr 2015 05:25:02 +0000 (22:25 -0700)
committerStanislav Malyshev <stas@php.net>
Thu, 30 Apr 2015 05:27:07 +0000 (22:27 -0700)
36 files changed:
NEWS
ext/pcre/config.w32
ext/pcre/config0.m4
ext/pcre/pcrelib/AUTHORS
ext/pcre/pcrelib/ChangeLog
ext/pcre/pcrelib/LICENCE
ext/pcre/pcrelib/NEWS
ext/pcre/pcrelib/README
ext/pcre/pcrelib/config.h
ext/pcre/pcrelib/pcre_compile.c
ext/pcre/pcrelib/pcre_exec.c
ext/pcre/pcrelib/pcre_internal.h
ext/pcre/pcrelib/pcre_jit_compile.c [new file with mode: 0644]
ext/pcre/pcrelib/pcre_study.c
ext/pcre/pcrelib/sljit/sljitConfig.h [new file with mode: 0644]
ext/pcre/pcrelib/sljit/sljitConfigInternal.h [new file with mode: 0644]
ext/pcre/pcrelib/sljit/sljitExecAllocator.c [new file with mode: 0644]
ext/pcre/pcrelib/sljit/sljitLir.c [new file with mode: 0644]
ext/pcre/pcrelib/sljit/sljitLir.h [new file with mode: 0644]
ext/pcre/pcrelib/sljit/sljitNativeARM_32.c [new file with mode: 0644]
ext/pcre/pcrelib/sljit/sljitNativeARM_64.c [new file with mode: 0644]
ext/pcre/pcrelib/sljit/sljitNativeARM_T2_32.c [new file with mode: 0644]
ext/pcre/pcrelib/sljit/sljitNativeMIPS_32.c [new file with mode: 0644]
ext/pcre/pcrelib/sljit/sljitNativeMIPS_64.c [new file with mode: 0644]
ext/pcre/pcrelib/sljit/sljitNativeMIPS_common.c [new file with mode: 0644]
ext/pcre/pcrelib/sljit/sljitNativePPC_32.c [new file with mode: 0644]
ext/pcre/pcrelib/sljit/sljitNativePPC_64.c [new file with mode: 0644]
ext/pcre/pcrelib/sljit/sljitNativePPC_common.c [new file with mode: 0644]
ext/pcre/pcrelib/sljit/sljitNativeSPARC_32.c [new file with mode: 0644]
ext/pcre/pcrelib/sljit/sljitNativeSPARC_common.c [new file with mode: 0644]
ext/pcre/pcrelib/sljit/sljitNativeTILEGX-encoder.c [new file with mode: 0644]
ext/pcre/pcrelib/sljit/sljitNativeTILEGX_64.c [new file with mode: 0644]
ext/pcre/pcrelib/sljit/sljitNativeX86_32.c [new file with mode: 0644]
ext/pcre/pcrelib/sljit/sljitNativeX86_64.c [new file with mode: 0644]
ext/pcre/pcrelib/sljit/sljitNativeX86_common.c [new file with mode: 0644]
ext/pcre/pcrelib/sljit/sljitUtils.c [new file with mode: 0644]

diff --git a/NEWS b/NEWS
index 0d897561d7cf6c5361986fba34153ae8058cb425..811f66ddf55d7213b623da86208da8cd5bcf8423 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -3,7 +3,7 @@ PHP                                                                        NEWS
 ?? ??? 2015 PHP 5.4.41
 
 - PCRE
-  . Upgraded pcrelib to 8.36.
+  . Upgraded pcrelib to 8.37.
 
 16 Apr 2015 PHP 5.4.40
 
index 8279f0a3318c1dda31bf78ffade5bfd3b265347e..594b1cb474907aea0bd1dab5e644f1dd89e42b9a 100644 (file)
@@ -3,7 +3,7 @@
 
 EXTENSION("pcre", "php_pcre.c", false /* never shared */,
                "-Iext/pcre/pcrelib");
-ADD_SOURCES("ext/pcre/pcrelib", "pcre_chartables.c pcre_ucd.c pcre_compile.c pcre_config.c pcre_exec.c pcre_fullinfo.c pcre_get.c pcre_globals.c pcre_maketables.c pcre_newline.c pcre_ord2utf8.c pcre_refcount.c pcre_study.c pcre_tables.c pcre_valid_utf8.c pcre_version.c pcre_xclass.c", "pcre");
+ADD_SOURCES("ext/pcre/pcrelib", "pcre_chartables.c pcre_ucd.c pcre_compile.c pcre_config.c pcre_exec.c pcre_fullinfo.c pcre_get.c pcre_globals.c pcre_maketables.c pcre_newline.c pcre_ord2utf8.c pcre_refcount.c pcre_study.c pcre_tables.c pcre_valid_utf8.c pcre_version.c pcre_xclass.c pcre_jit_compile.c", "pcre");
 ADD_DEF_FILE("ext\\pcre\\php_pcre.def");
 
 AC_DEFINE('HAVE_BUNDLED_PCRE', 1, 'Using bundled PCRE library');
index 4f8af76e63ab0d5de3e0cba0e08e52b36b844885..bfe2009aa097690ae61821c4343278aba4d81ec6 100644 (file)
@@ -58,7 +58,8 @@ PHP_ARG_WITH(pcre-regex,,
                                 pcrelib/pcre_maketables.c pcrelib/pcre_newline.c \
                                 pcrelib/pcre_ord2utf8.c pcrelib/pcre_refcount.c pcrelib/pcre_study.c \
                                 pcrelib/pcre_tables.c pcrelib/pcre_valid_utf8.c \
-                                pcrelib/pcre_version.c pcrelib/pcre_xclass.c"
+                                pcrelib/pcre_version.c pcrelib/pcre_xclass.c \
+                                pcrelib/pcre_jit_compile.c"
     PHP_PCRE_CFLAGS="-DHAVE_CONFIG_H -I@ext_srcdir@/pcrelib"
     PHP_NEW_EXTENSION(pcre, $pcrelib_sources php_pcre.c, no,,$PHP_PCRE_CFLAGS)
     PHP_ADD_BUILD_DIR($ext_builddir/pcrelib)
index 5eee1af4c6fd252594ccc538d121c13e4ac5e431..d33723f198a836d094709c868dce2ec6bab2de0b 100644 (file)
@@ -8,7 +8,7 @@ Email domain:     cam.ac.uk
 University of Cambridge Computing Service,
 Cambridge, England.
 
-Copyright (c) 1997-2014 University of Cambridge
+Copyright (c) 1997-2015 University of Cambridge
 All rights reserved
 
 
@@ -19,7 +19,7 @@ Written by:       Zoltan Herczeg
 Email local part: hzmester
 Emain domain:     freemail.hu
 
-Copyright(c) 2010-2014 Zoltan Herczeg
+Copyright(c) 2010-2015 Zoltan Herczeg
 All rights reserved.
 
 
@@ -30,7 +30,7 @@ Written by:       Zoltan Herczeg
 Email local part: hzmester
 Emain domain:     freemail.hu
 
-Copyright(c) 2009-2014 Zoltan Herczeg
+Copyright(c) 2009-2015 Zoltan Herczeg
 All rights reserved.
 
 
index 8abdfb5f117e29c907342652cb39e791d0ddcea7..359b412958220e7981bef8041998892418e85f70 100644 (file)
@@ -1,6 +1,173 @@
 ChangeLog for PCRE
 ------------------
 
+Version 8.37 28-April-2015
+--------------------------
+
+1.  When an (*ACCEPT) is triggered inside capturing parentheses, it arranges
+    for those parentheses to be closed with whatever has been captured so far.
+    However, it was failing to mark any other groups between the hightest
+    capture so far and the currrent group as "unset". Thus, the ovector for
+    those groups contained whatever was previously there. An example is the
+    pattern /(x)|((*ACCEPT))/ when matched against "abcd".
+
+2.  If an assertion condition was quantified with a minimum of zero (an odd
+    thing to do, but it happened), SIGSEGV or other misbehaviour could occur.
+
+3.  If a pattern in pcretest input had the P (POSIX) modifier followed by an
+    unrecognized modifier, a crash could occur.
+
+4.  An attempt to do global matching in pcretest with a zero-length ovector
+    caused a crash.
+
+5.  Fixed a memory leak during matching that could occur for a subpattern
+    subroutine call (recursive or otherwise) if the number of captured groups
+    that had to be saved was greater than ten.
+
+6.  Catch a bad opcode during auto-possessification after compiling a bad UTF
+    string with NO_UTF_CHECK. This is a tidyup, not a bug fix, as passing bad
+    UTF with NO_UTF_CHECK is documented as having an undefined outcome.
+
+7.  A UTF pattern containing a "not" match of a non-ASCII character and a
+    subroutine reference could loop at compile time. Example: /[^\xff]((?1))/.
+
+8. When a pattern is compiled, it remembers the highest back reference so that
+   when matching, if the ovector is too small, extra memory can be obtained to
+   use instead. A conditional subpattern whose condition is a check on a
+   capture having happened, such as, for example in the pattern
+   /^(?:(a)|b)(?(1)A|B)/, is another kind of back reference, but it was not
+   setting the highest backreference number. This mattered only if pcre_exec()
+   was called with an ovector that was too small to hold the capture, and there
+   was no other kind of back reference (a situation which is probably quite
+   rare). The effect of the bug was that the condition was always treated as
+   FALSE when the capture could not be consulted, leading to a incorrect
+   behaviour by pcre_exec(). This bug has been fixed.
+
+9. A reference to a duplicated named group (either a back reference or a test
+   for being set in a conditional) that occurred in a part of the pattern where
+   PCRE_DUPNAMES was not set caused the amount of memory needed for the pattern
+   to be incorrectly calculated, leading to overwriting.
+
+10. A mutually recursive set of back references such as (\2)(\1) caused a
+    segfault at study time (while trying to find the minimum matching length).
+    The infinite loop is now broken (with the minimum length unset, that is,
+    zero).
+
+11. If an assertion that was used as a condition was quantified with a minimum
+    of zero, matching went wrong. In particular, if the whole group had
+    unlimited repetition and could match an empty string, a segfault was
+    likely. The pattern (?(?=0)?)+ is an example that caused this. Perl allows
+    assertions to be quantified, but not if they are being used as conditions,
+    so the above pattern is faulted by Perl. PCRE has now been changed so that
+    it also rejects such patterns.
+
+12. A possessive capturing group such as (a)*+ with a minimum repeat of zero
+    failed to allow the zero-repeat case if pcre2_exec() was called with an
+    ovector too small to capture the group.
+
+13. Fixed two bugs in pcretest that were discovered by fuzzing and reported by
+    Red Hat Product Security:
+
+    (a) A crash if /K and /F were both set with the option to save the compiled
+    pattern.
+
+    (b) Another crash if the option to print captured substrings in a callout
+    was combined with setting a null ovector, for example \O\C+ as a subject
+    string.
+
+14. A pattern such as "((?2){0,1999}())?", which has a group containing a
+    forward reference repeated a large (but limited) number of times within a
+    repeated outer group that has a zero minimum quantifier, caused incorrect
+    code to be compiled, leading to the error "internal error:
+    previously-checked referenced subpattern not found" when an incorrect
+    memory address was read. This bug was reported as "heap overflow",
+    discovered by Kai Lu of Fortinet's FortiGuard Labs and given the CVE number
+    CVE-2015-2325.
+
+23. A pattern such as "((?+1)(\1))/" containing a forward reference subroutine
+    call within a group that also contained a recursive back reference caused
+    incorrect code to be compiled. This bug was reported as "heap overflow",
+    discovered by Kai Lu of Fortinet's FortiGuard Labs, and given the CVE
+    number CVE-2015-2326.
+
+24. Computing the size of the JIT read-only data in advance has been a source
+    of various issues, and new ones are still appear unfortunately. To fix
+    existing and future issues, size computation is eliminated from the code,
+    and replaced by on-demand memory allocation.
+
+25. A pattern such as /(?i)[A-`]/, where characters in the other case are
+    adjacent to the end of the range, and the range contained characters with
+    more than one other case, caused incorrect behaviour when compiled in UTF
+    mode. In that example, the range a-j was left out of the class.
+
+26. Fix JIT compilation of conditional blocks, which assertion
+    is converted to (*FAIL). E.g: /(?(?!))/.
+
+27. The pattern /(?(?!)^)/ caused references to random memory. This bug was
+    discovered by the LLVM fuzzer.
+
+28. The assertion (?!) is optimized to (*FAIL). This was not handled correctly
+    when this assertion was used as a condition, for example (?(?!)a|b). In
+    pcre2_match() it worked by luck; in pcre2_dfa_match() it gave an incorrect
+    error about an unsupported item.
+
+29. For some types of pattern, for example /Z*(|d*){216}/, the auto-
+    possessification code could take exponential time to complete. A recursion
+    depth limit of 1000 has been imposed to limit the resources used by this
+    optimization.
+
+30. A pattern such as /(*UTF)[\S\V\H]/, which contains a negated special class
+    such as \S in non-UCP mode, explicit wide characters (> 255) can be ignored
+    because \S ensures they are all in the class. The code for doing this was
+    interacting badly with the code for computing the amount of space needed to
+    compile the pattern, leading to a buffer overflow. This bug was discovered
+    by the LLVM fuzzer.
+
+31. A pattern such as /((?2)+)((?1))/ which has mutual recursion nested inside
+    other kinds of group caused stack overflow at compile time. This bug was
+    discovered by the LLVM fuzzer.
+
+32. A pattern such as /(?1)(?#?'){8}(a)/ which had a parenthesized comment
+    between a subroutine call and its quantifier was incorrectly compiled,
+    leading to buffer overflow or other errors. This bug was discovered by the
+    LLVM fuzzer.
+
+33. The illegal pattern /(?(?<E>.*!.*)?)/ was not being diagnosed as missing an
+    assertion after (?(. The code was failing to check the character after
+    (?(?< for the ! or = that would indicate a lookbehind assertion. This bug
+    was discovered by the LLVM fuzzer.
+
+34. A pattern such as /X((?2)()*+){2}+/ which has a possessive quantifier with
+    a fixed maximum following a group that contains a subroutine reference was
+    incorrectly compiled and could trigger buffer overflow. This bug was
+    discovered by the LLVM fuzzer.
+
+35. A mutual recursion within a lookbehind assertion such as (?<=((?2))((?1)))
+    caused a stack overflow instead of the diagnosis of a non-fixed length
+    lookbehind assertion. This bug was discovered by the LLVM fuzzer.
+
+36. The use of \K in a positive lookbehind assertion in a non-anchored pattern
+    (e.g. /(?<=\Ka)/) could make pcregrep loop.
+
+37. There was a similar problem to 36 in pcretest for global matches.
+
+38. If a greedy quantified \X was preceded by \C in UTF mode (e.g. \C\X*),
+    and a subsequent item in the pattern caused a non-match, backtracking over
+    the repeated \X did not stop, but carried on past the start of the subject,
+    causing reference to random memory and/or a segfault. There were also some
+    other cases where backtracking after \C could crash. This set of bugs was
+    discovered by the LLVM fuzzer.
+
+39. The function for finding the minimum length of a matching string could take
+    a very long time if mutual recursion was present many times in a pattern,
+    for example, /((?2){73}(?2))((?1))/. A better mutual recursion detection
+    method has been implemented. This infelicity was discovered by the LLVM
+    fuzzer.
+
+40. Static linking against the PCRE library using the pkg-config module was
+    failing on missing pthread symbols.
+
+
 Version 8.36 26-September-2014
 ------------------------------
 
index 602e4ae680467f9db3260f16ae0f65fcfa01544a..9f6f98e477f474520aad41ac52c54797ca614ad2 100644 (file)
@@ -6,7 +6,8 @@ and semantics are as close as possible to those of the Perl 5 language.
 
 Release 8 of PCRE is distributed under the terms of the "BSD" licence, as
 specified below. The documentation for PCRE, supplied in the "doc"
-directory, is distributed under the same terms as the software itself.
+directory, is distributed under the same terms as the software itself. The data
+in the testdata directory is not copyrighted and is in the public domain.
 
 The basic library functions are written in C and are freestanding. Also
 included in the distribution is a set of C++ wrapper functions, and a
@@ -24,7 +25,7 @@ Email domain:     cam.ac.uk
 University of Cambridge Computing Service,
 Cambridge, England.
 
-Copyright (c) 1997-2014 University of Cambridge
+Copyright (c) 1997-2015 University of Cambridge
 All rights reserved.
 
 
@@ -35,7 +36,7 @@ Written by:       Zoltan Herczeg
 Email local part: hzmester
 Emain domain:     freemail.hu
 
-Copyright(c) 2010-2014 Zoltan Herczeg
+Copyright(c) 2010-2015 Zoltan Herczeg
 All rights reserved.
 
 
@@ -46,7 +47,7 @@ Written by:       Zoltan Herczeg
 Email local part: hzmester
 Emain domain:     freemail.hu
 
-Copyright(c) 2009-2014 Zoltan Herczeg
+Copyright(c) 2009-2015 Zoltan Herczeg
 All rights reserved.
 
 
index 5b8c60c14b878d0fcff98dd0f4034516a397e889..064bf27819c95cbf2f6af80db3b2fed87aa31f79 100644 (file)
@@ -1,6 +1,14 @@
 News about PCRE releases
 ------------------------
 
+Release 8.37 28-April-2015
+--------------------------
+
+This is bug-fix release. Note that this library (now called PCRE1) is now being
+maintained for bug fixes only. New projects are advised to use the new PCRE2
+libraries.
+
+
 Release 8.36 26-September-2014
 ------------------------------
 
index e30bd0fd5b7273f593fd81fcc2e072cb10b5de35..4887ebf350e7e6f7881b54f0e1e0077d1091adda 100644 (file)
@@ -1,7 +1,16 @@
 README file for PCRE (Perl-compatible regular expression library)
 -----------------------------------------------------------------
 
-The latest release of PCRE is always available in three alternative formats
+NOTE: This set of files relates to PCRE releases that use the original API,
+with library names libpcre, libpcre16, and libpcre32. January 2015 saw the
+first release of a new API, known as PCRE2, with release numbers starting at
+10.00 and library names libpcre2-8, libpcre2-16, and libpcre2-32. The old
+libraries (now called PCRE1) are still being maintained for bug fixes, but
+there will be no new development. New projects are advised to use the new PCRE2
+libraries.
+
+
+The latest release of PCRE1 is always available in three alternative formats
 from:
 
   ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-xxx.tar.gz
@@ -990,4 +999,4 @@ pcre_xxx, one with the name pcre16_xx, and a third with the name pcre32_xxx.
 Philip Hazel
 Email local part: ph10
 Email domain: cam.ac.uk
-Last updated: 24 October 2014
+Last updated: 10 February 2015
index 878ddc81ebcdb6cdf9bc82e194d9c97842a9ce4e..8e7cac93b4e08d49cfbea10c3869e7d87da3b0fb 100644 (file)
@@ -395,7 +395,7 @@ them both to 0; an emulation function will be used. */
 #undef SUPPORT_GCOV
 
 /* Define to any value to enable support for Just-In-Time compiling. */
-#undef SUPPORT_JIT
+#define SUPPORT_JIT
 
 /* Define to any value to allow pcregrep to be linked with libbz2, so that it
    is able to handle .bz2 files. */
index efc0b21fd14bebb311c449bb0a00fde6ce2e18aa..0efad2645d9d0164b56c3bd279348299997969bc 100644 (file)
@@ -1704,6 +1704,7 @@ Arguments:
   utf      TRUE in UTF-8 / UTF-16 / UTF-32 mode
   atend    TRUE if called when the pattern is complete
   cd       the "compile data" structure
+  recurses    chain of recurse_check to catch mutual recursion
 
 Returns:   the fixed length,
              or -1 if there is no fixed length,
@@ -1713,10 +1714,11 @@ Returns:   the fixed length,
 */
 
 static int
-find_fixedlength(pcre_uchar *code, BOOL utf, BOOL atend, compile_data *cd)
+find_fixedlength(pcre_uchar *code, BOOL utf, BOOL atend, compile_data *cd,
+  recurse_check *recurses)
 {
 int length = -1;
-
+recurse_check this_recurse;
 register int branchlength = 0;
 register pcre_uchar *cc = code + 1 + LINK_SIZE;
 
@@ -1741,7 +1743,8 @@ for (;;)
     case OP_ONCE:
     case OP_ONCE_NC:
     case OP_COND:
-    d = find_fixedlength(cc + ((op == OP_CBRA)? IMM2_SIZE : 0), utf, atend, cd);
+    d = find_fixedlength(cc + ((op == OP_CBRA)? IMM2_SIZE : 0), utf, atend, cd,
+      recurses);
     if (d < 0) return d;
     branchlength += d;
     do cc += GET(cc, 1); while (*cc == OP_ALT);
@@ -1775,7 +1778,15 @@ for (;;)
     cs = ce = (pcre_uchar *)cd->start_code + GET(cc, 1);  /* Start subpattern */
     do ce += GET(ce, 1); while (*ce == OP_ALT);           /* End subpattern */
     if (cc > cs && cc < ce) return -1;                    /* Recursion */
-    d = find_fixedlength(cs + IMM2_SIZE, utf, atend, cd);
+    else   /* Check for mutual recursion */
+      {
+      recurse_check *r = recurses;
+      for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
+      if (r != NULL) return -1;   /* Mutual recursion */
+      }
+    this_recurse.prev = recurses;
+    this_recurse.group = cs;
+    d = find_fixedlength(cs + IMM2_SIZE, utf, atend, cd, &this_recurse);
     if (d < 0) return d;
     branchlength += d;
     cc += 1 + LINK_SIZE;
@@ -2129,32 +2140,60 @@ for (;;)
       {
       case OP_CHAR:
       case OP_CHARI:
+      case OP_NOT:
+      case OP_NOTI:
       case OP_EXACT:
       case OP_EXACTI:
+      case OP_NOTEXACT:
+      case OP_NOTEXACTI:
       case OP_UPTO:
       case OP_UPTOI:
+      case OP_NOTUPTO:
+      case OP_NOTUPTOI:
       case OP_MINUPTO:
       case OP_MINUPTOI:
+      case OP_NOTMINUPTO:
+      case OP_NOTMINUPTOI:
       case OP_POSUPTO:
       case OP_POSUPTOI:
+      case OP_NOTPOSUPTO:
+      case OP_NOTPOSUPTOI:
       case OP_STAR:
       case OP_STARI:
+      case OP_NOTSTAR:
+      case OP_NOTSTARI:
       case OP_MINSTAR:
       case OP_MINSTARI:
+      case OP_NOTMINSTAR:
+      case OP_NOTMINSTARI:
       case OP_POSSTAR:
       case OP_POSSTARI:
+      case OP_NOTPOSSTAR:
+      case OP_NOTPOSSTARI:
       case OP_PLUS:
       case OP_PLUSI:
+      case OP_NOTPLUS:
+      case OP_NOTPLUSI:
       case OP_MINPLUS:
       case OP_MINPLUSI:
+      case OP_NOTMINPLUS:
+      case OP_NOTMINPLUSI:
       case OP_POSPLUS:
       case OP_POSPLUSI:
+      case OP_NOTPOSPLUS:
+      case OP_NOTPOSPLUSI:
       case OP_QUERY:
       case OP_QUERYI:
+      case OP_NOTQUERY:
+      case OP_NOTQUERYI:
       case OP_MINQUERY:
       case OP_MINQUERYI:
+      case OP_NOTMINQUERY:
+      case OP_NOTMINQUERYI:
       case OP_POSQUERY:
       case OP_POSQUERYI:
+      case OP_NOTPOSQUERY:
+      case OP_NOTPOSQUERYI:
       if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
       break;
       }
@@ -2334,11 +2373,6 @@ Arguments:
 Returns:      TRUE if what is matched could be empty
 */
 
-typedef struct recurse_check {
-  struct recurse_check *prev;
-  const pcre_uchar *group;
-} recurse_check;
-
 static BOOL
 could_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode,
   BOOL utf, compile_data *cd, recurse_check *recurses)
@@ -2469,8 +2503,8 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
       empty_branch = FALSE;
       do
         {
-        if (!empty_branch && could_be_empty_branch(code, endcode, utf, cd, NULL))
-          empty_branch = TRUE;
+        if (!empty_branch && could_be_empty_branch(code, endcode, utf, cd,
+          recurses)) empty_branch = TRUE;
         code += GET(code, 1);
         }
       while (*code == OP_ALT);
@@ -3065,7 +3099,7 @@ Returns:      TRUE if the auto-possessification is possible
 
 static BOOL
 compare_opcodes(const pcre_uchar *code, BOOL utf, const compile_data *cd,
-  const pcre_uint32 *base_list, const pcre_uchar *base_end)
+  const pcre_uint32 *base_list, const pcre_uchar *base_end, int *rec_limit)
 {
 pcre_uchar c;
 pcre_uint32 list[8];
@@ -3082,6 +3116,9 @@ pcre_uint32 chr;
 BOOL accepted, invert_bits;
 BOOL entered_a_group = FALSE;
 
+if (*rec_limit == 0) return FALSE;
+--(*rec_limit);
+
 /* Note: the base_list[1] contains whether the current opcode has greedy
 (represented by a non-zero value) quantifier. This is a different from
 other character type lists, which stores here that the character iterator
@@ -3152,7 +3189,8 @@ for(;;)
 
     while (*next_code == OP_ALT)
       {
-      if (!compare_opcodes(code, utf, cd, base_list, base_end)) return FALSE;
+      if (!compare_opcodes(code, utf, cd, base_list, base_end, rec_limit))
+        return FALSE;
       code = next_code + 1 + LINK_SIZE;
       next_code += GET(next_code, 1);
       }
@@ -3172,7 +3210,7 @@ for(;;)
     /* The bracket content will be checked by the
     OP_BRA/OP_CBRA case above. */
     next_code += 1 + LINK_SIZE;
-    if (!compare_opcodes(next_code, utf, cd, base_list, base_end))
+    if (!compare_opcodes(next_code, utf, cd, base_list, base_end, rec_limit))
       return FALSE;
 
     code += PRIV(OP_lengths)[c];
@@ -3605,11 +3643,20 @@ register pcre_uchar c;
 const pcre_uchar *end;
 pcre_uchar *repeat_opcode;
 pcre_uint32 list[8];
+int rec_limit;
 
 for (;;)
   {
   c = *code;
 
+  /* When a pattern with bad UTF-8 encoding is compiled with NO_UTF_CHECK,
+  it may compile without complaining, but may get into a loop here if the code
+  pointer points to a bad value. This is, of course a documentated possibility,
+  when NO_UTF_CHECK is set, so it isn't a bug, but we can detect this case and
+  just give up on this optimization. */
+
+  if (c >= OP_TABLE_LENGTH) return;
+
   if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
     {
     c -= get_repeat_base(c) - OP_STAR;
@@ -3617,7 +3664,8 @@ for (;;)
       get_chr_property_list(code, utf, cd->fcc, list) : NULL;
     list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO;
 
-    if (end != NULL && compare_opcodes(end, utf, cd, list, end))
+    rec_limit = 1000;
+    if (end != NULL && compare_opcodes(end, utf, cd, list, end, &rec_limit))
       {
       switch(c)
         {
@@ -3673,7 +3721,8 @@ for (;;)
 
       list[1] = (c & 1) == 0;
 
-      if (compare_opcodes(end, utf, cd, list, end))
+      rec_limit = 1000;
+      if (compare_opcodes(end, utf, cd, list, end, &rec_limit))
         {
         switch (c)
           {
@@ -3947,14 +3996,14 @@ Arguments:
   adjust     the amount by which the group is to be moved
   utf        TRUE in UTF-8 / UTF-16 / UTF-32 mode
   cd         contains pointers to tables etc.
-  save_hwm   the hwm forward reference pointer at the start of the group
+  save_hwm_offset   the hwm forward reference offset at the start of the group
 
 Returns:     nothing
 */
 
 static void
 adjust_recurse(pcre_uchar *group, int adjust, BOOL utf, compile_data *cd,
-  pcre_uchar *save_hwm)
+  size_t save_hwm_offset)
 {
 pcre_uchar *ptr = group;
 
@@ -3966,7 +4015,8 @@ while ((ptr = (pcre_uchar *)find_recurse(ptr, utf)) != NULL)
   /* See if this recursion is on the forward reference list. If so, adjust the
   reference. */
 
-  for (hc = save_hwm; hc < cd->hwm; hc += LINK_SIZE)
+  for (hc = (pcre_uchar *)cd->start_workspace + save_hwm_offset; hc < cd->hwm;
+       hc += LINK_SIZE)
     {
     offset = (int)GET(hc, 0);
     if (cd->start_code + offset == ptr + 1)
@@ -4171,7 +4221,11 @@ if ((options & PCRE_CASELESS) != 0)
       range. Otherwise, use a recursive call to add the additional range. */
 
       else if (oc < start && od >= start - 1) start = oc; /* Extend downwards */
-      else if (od > end && oc <= end + 1) end = od;       /* Extend upwards */
+      else if (od > end && oc <= end + 1)
+        {
+        end = od;       /* Extend upwards */
+        if (end > classbits_end) classbits_end = (end <= 0xff ? end : 0xff);
+        }
       else n8 += add_to_class(classbits, uchardptr, options, cd, oc, od);
       }
     }
@@ -4411,7 +4465,7 @@ const pcre_uchar *tempptr;
 const pcre_uchar *nestptr = NULL;
 pcre_uchar *previous = NULL;
 pcre_uchar *previous_callout = NULL;
-pcre_uchar *save_hwm = NULL;
+size_t save_hwm_offset = 0;
 pcre_uint8 classbits[32];
 
 /* We can fish out the UTF-8 setting once and for all into a BOOL, but we
@@ -5470,6 +5524,12 @@ for (;; ptr++)
       PUT(previous, 1, (int)(code - previous));
       break;   /* End of class handling */
       }
+
+    /* Even though any XCLASS list is now discarded, we must allow for
+    its memory. */
+
+    if (lengthptr != NULL)
+      *lengthptr += (int)(class_uchardata - class_uchardata_base);
 #endif
 
     /* If there are no characters > 255, or they are all to be included or
@@ -5870,6 +5930,7 @@ for (;; ptr++)
       {
       register int i;
       int len = (int)(code - previous);
+      size_t base_hwm_offset = save_hwm_offset;
       pcre_uchar *bralink = NULL;
       pcre_uchar *brazeroptr = NULL;
 
@@ -5924,7 +5985,7 @@ for (;; ptr++)
         if (repeat_max <= 1)    /* Covers 0, 1, and unlimited */
           {
           *code = OP_END;
-          adjust_recurse(previous, 1, utf, cd, save_hwm);
+          adjust_recurse(previous, 1, utf, cd, save_hwm_offset);
           memmove(previous + 1, previous, IN_UCHARS(len));
           code++;
           if (repeat_max == 0)
@@ -5948,7 +6009,7 @@ for (;; ptr++)
           {
           int offset;
           *code = OP_END;
-          adjust_recurse(previous, 2 + LINK_SIZE, utf, cd, save_hwm);
+          adjust_recurse(previous, 2 + LINK_SIZE, utf, cd, save_hwm_offset);
           memmove(previous + 2 + LINK_SIZE, previous, IN_UCHARS(len));
           code += 2 + LINK_SIZE;
           *previous++ = OP_BRAZERO + repeat_type;
@@ -6011,26 +6072,25 @@ for (;; ptr++)
             for (i = 1; i < repeat_min; i++)
               {
               pcre_uchar *hc;
-              pcre_uchar *this_hwm = cd->hwm;
+              size_t this_hwm_offset = cd->hwm - cd->start_workspace;
               memcpy(code, previous, IN_UCHARS(len));
 
               while (cd->hwm > cd->start_workspace + cd->workspace_size -
-                     WORK_SIZE_SAFETY_MARGIN - (this_hwm - save_hwm))
+                     WORK_SIZE_SAFETY_MARGIN -
+                     (this_hwm_offset - base_hwm_offset))
                 {
-                size_t save_offset = save_hwm - cd->start_workspace;
-                size_t this_offset = this_hwm - cd->start_workspace;
                 *errorcodeptr = expand_workspace(cd);
                 if (*errorcodeptr != 0) goto FAILED;
-                save_hwm = (pcre_uchar *)cd->start_workspace + save_offset;
-                this_hwm = (pcre_uchar *)cd->start_workspace + this_offset;
                 }
 
-              for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)
+              for (hc = (pcre_uchar *)cd->start_workspace + base_hwm_offset;
+                   hc < (pcre_uchar *)cd->start_workspace + this_hwm_offset;
+                   hc += LINK_SIZE)
                 {
                 PUT(cd->hwm, 0, GET(hc, 0) + len);
                 cd->hwm += LINK_SIZE;
                 }
-              save_hwm = this_hwm;
+              base_hwm_offset = this_hwm_offset;
               code += len;
               }
             }
@@ -6075,7 +6135,7 @@ for (;; ptr++)
         else for (i = repeat_max - 1; i >= 0; i--)
           {
           pcre_uchar *hc;
-          pcre_uchar *this_hwm = cd->hwm;
+          size_t this_hwm_offset = cd->hwm - cd->start_workspace;
 
           *code++ = OP_BRAZERO + repeat_type;
 
@@ -6097,22 +6157,21 @@ for (;; ptr++)
           copying them. */
 
           while (cd->hwm > cd->start_workspace + cd->workspace_size -
-                 WORK_SIZE_SAFETY_MARGIN - (this_hwm - save_hwm))
+                 WORK_SIZE_SAFETY_MARGIN -
+                 (this_hwm_offset - base_hwm_offset))
             {
-            size_t save_offset = save_hwm - cd->start_workspace;
-            size_t this_offset = this_hwm - cd->start_workspace;
             *errorcodeptr = expand_workspace(cd);
             if (*errorcodeptr != 0) goto FAILED;
-            save_hwm = (pcre_uchar *)cd->start_workspace + save_offset;
-            this_hwm = (pcre_uchar *)cd->start_workspace + this_offset;
             }
 
-          for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)
+          for (hc = (pcre_uchar *)cd->start_workspace + base_hwm_offset;
+               hc < (pcre_uchar *)cd->start_workspace + this_hwm_offset;
+               hc += LINK_SIZE)
             {
             PUT(cd->hwm, 0, GET(hc, 0) + len + ((i != 0)? 2+LINK_SIZE : 1));
             cd->hwm += LINK_SIZE;
             }
-          save_hwm = this_hwm;
+          base_hwm_offset = this_hwm_offset;
           code += len;
           }
 
@@ -6208,7 +6267,7 @@ for (;; ptr++)
               {
               int nlen = (int)(code - bracode);
               *code = OP_END;
-              adjust_recurse(bracode, 1 + LINK_SIZE, utf, cd, save_hwm);
+              adjust_recurse(bracode, 1 + LINK_SIZE, utf, cd, save_hwm_offset);
               memmove(bracode + 1 + LINK_SIZE, bracode, IN_UCHARS(nlen));
               code += 1 + LINK_SIZE;
               nlen += 1 + LINK_SIZE;
@@ -6342,7 +6401,7 @@ for (;; ptr++)
         else
           {
           *code = OP_END;
-          adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, save_hwm);
+          adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, save_hwm_offset);
           memmove(tempcode + 1 + LINK_SIZE, tempcode, IN_UCHARS(len));
           code += 1 + LINK_SIZE;
           len += 1 + LINK_SIZE;
@@ -6391,7 +6450,7 @@ for (;; ptr++)
 
         default:
         *code = OP_END;
-        adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, save_hwm);
+        adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, save_hwm_offset);
         memmove(tempcode + 1 + LINK_SIZE, tempcode, IN_UCHARS(len));
         code += 1 + LINK_SIZE;
         len += 1 + LINK_SIZE;
@@ -6420,15 +6479,25 @@ for (;; ptr++)
     parenthesis forms.  */
 
     case CHAR_LEFT_PARENTHESIS:
-    newoptions = options;
-    skipbytes = 0;
-    bravalue = OP_CBRA;
-    save_hwm = cd->hwm;
-    reset_bracount = FALSE;
+    ptr++;
 
-    /* First deal with various "verbs" that can be introduced by '*'. */
+    /* First deal with comments. Putting this code right at the start ensures
+    that comments have no bad side effects. */
+
+    if (ptr[0] == CHAR_QUESTION_MARK && ptr[1] == CHAR_NUMBER_SIGN)
+      {
+      ptr += 2;
+      while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
+      if (*ptr == CHAR_NULL)
+        {
+        *errorcodeptr = ERR18;
+        goto FAILED;
+        }
+      continue;
+      }
+
+    /* Now deal with various "verbs" that can be introduced by '*'. */
 
-    ptr++;
     if (ptr[0] == CHAR_ASTERISK && (ptr[1] == ':'
          || (MAX_255(ptr[1]) && ((cd->ctypes[ptr[1]] & ctype_letter) != 0))))
       {
@@ -6549,10 +6618,18 @@ for (;; ptr++)
       goto FAILED;
       }
 
+    /* Initialize for "real" parentheses */
+
+    newoptions = options;
+    skipbytes = 0;
+    bravalue = OP_CBRA;
+    save_hwm_offset = cd->hwm - cd->start_workspace;
+    reset_bracount = FALSE;
+
     /* Deal with the extended parentheses; all are introduced by '?', and the
     appearance of any of them means that this is not a capturing group. */
 
-    else if (*ptr == CHAR_QUESTION_MARK)
+    if (*ptr == CHAR_QUESTION_MARK)
       {
       int i, set, unset, namelen;
       int *optset;
@@ -6561,17 +6638,6 @@ for (;; ptr++)
 
       switch (*(++ptr))
         {
-        case CHAR_NUMBER_SIGN:                 /* Comment; skip to ket */
-        ptr++;
-        while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
-        if (*ptr == CHAR_NULL)
-          {
-          *errorcodeptr = ERR18;
-          goto FAILED;
-          }
-        continue;
-
-
         /* ------------------------------------------------------------ */
         case CHAR_VERTICAL_LINE:  /* Reset capture count for each branch */
         reset_bracount = TRUE;
@@ -6620,8 +6686,13 @@ for (;; ptr++)
         if (tempptr[1] == CHAR_QUESTION_MARK &&
               (tempptr[2] == CHAR_EQUALS_SIGN ||
                tempptr[2] == CHAR_EXCLAMATION_MARK ||
-               tempptr[2] == CHAR_LESS_THAN_SIGN))
+                 (tempptr[2] == CHAR_LESS_THAN_SIGN &&
+                   (tempptr[3] == CHAR_EQUALS_SIGN ||
+                    tempptr[3] == CHAR_EXCLAMATION_MARK))))
+          {
+          cd->iscondassert = TRUE;
           break;
+          }
 
         /* Other conditions use OP_CREF/OP_DNCREF/OP_RREF/OP_DNRREF, and all
         need to skip at least 1+IMM2_SIZE bytes at the start of the group. */
@@ -6698,8 +6769,7 @@ for (;; ptr++)
             ptr++;
             }
           namelen = (int)(ptr - name);
-          if (lengthptr != NULL && (options & PCRE_DUPNAMES) != 0)
-            *lengthptr += IMM2_SIZE;
+          if (lengthptr != NULL) *lengthptr += IMM2_SIZE;
           }
 
         /* Check the terminator */
@@ -6735,6 +6805,7 @@ for (;; ptr++)
             goto FAILED;
             }
           PUT2(code, 2+LINK_SIZE, recno);
+          if (recno > cd->top_backref) cd->top_backref = recno;
           break;
           }
 
@@ -6757,6 +6828,7 @@ for (;; ptr++)
           int offset = i++;
           int count = 1;
           recno = GET2(slot, 0);   /* Number from first found */
+          if (recno > cd->top_backref) cd->top_backref = recno;
           for (; i < cd->names_found; i++)
             {
             slot += cd->name_entry_size;
@@ -7114,11 +7186,11 @@ for (;; ptr++)
 
           if (!is_recurse) cd->namedrefcount++;
 
-          /* If duplicate names are permitted, we have to allow for a named
-          reference to a duplicated name (this cannot be determined until the
-          second pass). This needs an extra 16-bit data item. */
+          /* We have to allow for a named reference to a duplicated name (this
+          cannot be determined until the second pass). This needs an extra
+          16-bit data item. */
 
-          if ((options & PCRE_DUPNAMES) != 0) *lengthptr += IMM2_SIZE;
+          *lengthptr += IMM2_SIZE;
           }
 
         /* In the real compile, search the name table. We check the name
@@ -7475,12 +7547,22 @@ for (;; ptr++)
       goto FAILED;
       }
 
-    /* Assertions used not to be repeatable, but this was changed for Perl
-    compatibility, so all kinds can now be repeated. We copy code into a
+    /* All assertions used not to be repeatable, but this was changed for Perl
+    compatibility. All kinds can now be repeated except for assertions that are
+    conditions (Perl also forbids these to be repeated). We copy code into a
     non-register variable (tempcode) in order to be able to pass its address
-    because some compilers complain otherwise. */
+    because some compilers complain otherwise. At the start of a conditional
+    group whose condition is an assertion, cd->iscondassert is set. We unset it
+    here so as to allow assertions later in the group to be quantified. */
+
+    if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NOT &&
+        cd->iscondassert)
+      {
+      previous = NULL;
+      cd->iscondassert = FALSE;
+      }
+    else previous = code;
 
-    previous = code;                      /* For handling repetition */
     *code = bravalue;
     tempcode = code;
     tempreqvary = cd->req_varyopt;        /* Save value before bracket */
@@ -7727,7 +7809,7 @@ for (;; ptr++)
         const pcre_uchar *p;
         pcre_uint32 cf;
 
-        save_hwm = cd->hwm;   /* Normally this is set when '(' is read */
+        save_hwm_offset = cd->hwm - cd->start_workspace;   /* Normally this is set when '(' is read */
         terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
           CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
 
@@ -8054,6 +8136,7 @@ int length;
 unsigned int orig_bracount;
 unsigned int max_bracount;
 branch_chain bc;
+size_t save_hwm_offset;
 
 /* If set, call the external function that checks for stack availability. */
 
@@ -8071,6 +8154,8 @@ bc.current_branch = code;
 firstchar = reqchar = 0;
 firstcharflags = reqcharflags = REQ_UNSET;
 
+save_hwm_offset = cd->hwm - cd->start_workspace;
+
 /* Accumulate the length for use in the pre-compile phase. Start with the
 length of the BRA and KET and any extra bytes that are required at the
 beginning. We accumulate in a local variable to save frequent testing of
@@ -8212,7 +8297,7 @@ for (;;)
       int fixed_length;
       *code = OP_END;
       fixed_length = find_fixedlength(last_branch,  (options & PCRE_UTF8) != 0,
-        FALSE, cd);
+        FALSE, cd, NULL);
       DPRINTF(("fixed length = %d\n", fixed_length));
       if (fixed_length == -3)
         {
@@ -8273,7 +8358,7 @@ for (;;)
         {
         *code = OP_END;
         adjust_recurse(start_bracket, 1 + LINK_SIZE,
-          (options & PCRE_UTF8) != 0, cd, cd->hwm);
+          (options & PCRE_UTF8) != 0, cd, save_hwm_offset);
         memmove(start_bracket + 1 + LINK_SIZE, start_bracket,
           IN_UCHARS(code - start_bracket));
         *start_bracket = OP_ONCE;
@@ -8497,6 +8582,7 @@ do {
        case OP_RREF:
        case OP_DNRREF:
        case OP_DEF:
+       case OP_FAIL:
        return FALSE;
 
        default:     /* Assertion */
@@ -9081,6 +9167,7 @@ cd->dupnames = FALSE;
 cd->namedrefcount = 0;
 cd->start_code = cworkspace;
 cd->hwm = cworkspace;
+cd->iscondassert = FALSE;
 cd->start_workspace = cworkspace;
 cd->workspace_size = COMPILE_WORK_SIZE;
 cd->named_groups = named_groups;
@@ -9118,13 +9205,6 @@ if (length > MAX_PATTERN_SIZE)
   goto PCRE_EARLY_ERROR_RETURN;
   }
 
-/* If there are groups with duplicate names and there are also references by
-name, we must allow for the possibility of named references to duplicated
-groups. These require an extra data item each. */
-
-if (cd->dupnames && cd->namedrefcount > 0)
-  length += cd->namedrefcount * IMM2_SIZE * sizeof(pcre_uchar);
-
 /* Compute the size of the data block for storing the compiled pattern. Integer
 overflow should no longer be possible because nowadays we limit the maximum
 value of cd->names_found and cd->name_entry_size. */
@@ -9183,6 +9263,7 @@ cd->name_table = (pcre_uchar *)re + re->name_table_offset;
 codestart = cd->name_table + re->name_entry_size * re->name_count;
 cd->start_code = codestart;
 cd->hwm = (pcre_uchar *)(cd->start_workspace);
+cd->iscondassert = FALSE;
 cd->req_varyopt = 0;
 cd->had_accept = FALSE;
 cd->had_pruneorskip = FALSE;
@@ -9319,7 +9400,7 @@ if (cd->check_lookbehind)
       int end_op = *be;
       *be = OP_END;
       fixed_length = find_fixedlength(cc, (re->options & PCRE_UTF8) != 0, TRUE,
-        cd);
+        cd, NULL);
       *be = end_op;
       DPRINTF(("fixed length = %d\n", fixed_length));
       if (fixed_length < 0)
index 654eb9e2762144188068cecf9a065733b2563baa..c021fe1a4c98f294a4b7e314077d0b587714ea61 100644 (file)
@@ -1136,93 +1136,81 @@ for (;;)
     printf("\n");
 #endif
 
-    if (offset < md->offset_max)
-      {
-      matched_once = FALSE;
-      code_offset = (int)(ecode - md->start_code);
-
-      save_offset1 = md->offset_vector[offset];
-      save_offset2 = md->offset_vector[offset+1];
-      save_offset3 = md->offset_vector[md->offset_end - number];
-      save_capture_last = md->capture_last;
+    if (offset >= md->offset_max) goto POSSESSIVE_NON_CAPTURE;
 
-      DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
+    matched_once = FALSE;
+    code_offset = (int)(ecode - md->start_code);
 
-      /* Each time round the loop, save the current subject position for use
-      when the group matches. For MATCH_MATCH, the group has matched, so we
-      restart it with a new subject starting position, remembering that we had
-      at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
-      usual. If we haven't matched any alternatives in any iteration, check to
-      see if a previous iteration matched. If so, the group has matched;
-      continue from afterwards. Otherwise it has failed; restore the previous
-      capture values before returning NOMATCH. */
+    save_offset1 = md->offset_vector[offset];
+    save_offset2 = md->offset_vector[offset+1];
+    save_offset3 = md->offset_vector[md->offset_end - number];
+    save_capture_last = md->capture_last;
 
-      for (;;)
-        {
-        md->offset_vector[md->offset_end - number] =
-          (int)(eptr - md->start_subject);
-        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
-        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
-          eptrb, RM63);
-        if (rrc == MATCH_KETRPOS)
-          {
-          offset_top = md->end_offset_top;
-          ecode = md->start_code + code_offset;
-          save_capture_last = md->capture_last;
-          matched_once = TRUE;
-          mstart = md->start_match_ptr;    /* In case \K changed it */
-          if (eptr == md->end_match_ptr)   /* Matched an empty string */
-            {
-            do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
-            break;
-            }
-          eptr = md->end_match_ptr;
-          continue;
-          }
+    DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
 
-        /* See comment in the code for capturing groups above about handling
-        THEN. */
+    /* Each time round the loop, save the current subject position for use
+    when the group matches. For MATCH_MATCH, the group has matched, so we
+    restart it with a new subject starting position, remembering that we had
+    at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
+    usual. If we haven't matched any alternatives in any iteration, check to
+    see if a previous iteration matched. If so, the group has matched;
+    continue from afterwards. Otherwise it has failed; restore the previous
+    capture values before returning NOMATCH. */
 
-        if (rrc == MATCH_THEN)
+    for (;;)
+      {
+      md->offset_vector[md->offset_end - number] =
+        (int)(eptr - md->start_subject);
+      if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
+      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
+        eptrb, RM63);
+      if (rrc == MATCH_KETRPOS)
+        {
+        offset_top = md->end_offset_top;
+        ecode = md->start_code + code_offset;
+        save_capture_last = md->capture_last;
+        matched_once = TRUE;
+        mstart = md->start_match_ptr;    /* In case \K changed it */
+        if (eptr == md->end_match_ptr)   /* Matched an empty string */
           {
-          next = ecode + GET(ecode,1);
-          if (md->start_match_ptr < next &&
-              (*ecode == OP_ALT || *next == OP_ALT))
-            rrc = MATCH_NOMATCH;
+          do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
+          break;
           }
-
-        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-        md->capture_last = save_capture_last;
-        ecode += GET(ecode, 1);
-        if (*ecode != OP_ALT) break;
+        eptr = md->end_match_ptr;
+        continue;
         }
 
-      if (!matched_once)
-        {
-        md->offset_vector[offset] = save_offset1;
-        md->offset_vector[offset+1] = save_offset2;
-        md->offset_vector[md->offset_end - number] = save_offset3;
-        }
+      /* See comment in the code for capturing groups above about handling
+      THEN. */
 
-      if (allow_zero || matched_once)
+      if (rrc == MATCH_THEN)
         {
-        ecode += 1 + LINK_SIZE;
-        break;
+        next = ecode + GET(ecode,1);
+        if (md->start_match_ptr < next &&
+            (*ecode == OP_ALT || *next == OP_ALT))
+          rrc = MATCH_NOMATCH;
         }
 
-      RRETURN(MATCH_NOMATCH);
+      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+      md->capture_last = save_capture_last;
+      ecode += GET(ecode, 1);
+      if (*ecode != OP_ALT) break;
       }
 
-    /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
-    as a non-capturing bracket. */
-
-    /* VVVVVVVVVVVVVVVVVVVVVVVVV */
-    /* VVVVVVVVVVVVVVVVVVVVVVVVV */
+    if (!matched_once)
+      {
+      md->offset_vector[offset] = save_offset1;
+      md->offset_vector[offset+1] = save_offset2;
+      md->offset_vector[md->offset_end - number] = save_offset3;
+      }
 
-    DPRINTF(("insufficient capture room: treat as non-capturing\n"));
+    if (allow_zero || matched_once)
+      {
+      ecode += 1 + LINK_SIZE;
+      break;
+      }
 
-    /* VVVVVVVVVVVVVVVVVVVVVVVVV */
-    /* VVVVVVVVVVVVVVVVVVVVVVVVV */
+    RRETURN(MATCH_NOMATCH);
 
     /* Non-capturing possessive bracket with unlimited repeat. We come here
     from BRAZERO with allow_zero = TRUE. The code is similar to the above,
@@ -1388,6 +1376,7 @@ for (;;)
       break;
 
       case OP_DEF:     /* DEFINE - always false */
+      case OP_FAIL:    /* From optimized (?!) condition */
       break;
 
       /* The condition is an assertion. Call match() to evaluate it - setting
@@ -1404,8 +1393,11 @@ for (;;)
         condition = TRUE;
 
         /* Advance ecode past the assertion to the start of the first branch,
-        but adjust it so that the general choosing code below works. */
+        but adjust it so that the general choosing code below works. If the
+        assertion has a quantifier that allows zero repeats we must skip over
+        the BRAZERO. This is a lunatic thing to do, but somebody did! */
 
+        if (*ecode == OP_BRAZERO) ecode++;
         ecode += GET(ecode, 1);
         while (*ecode == OP_ALT) ecode += GET(ecode, 1);
         ecode += 1 + LINK_SIZE - PRIV(OP_lengths)[condcode];
@@ -1474,7 +1466,18 @@ for (;;)
       md->offset_vector[offset] =
         md->offset_vector[md->offset_end - number];
       md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
-      if (offset_top <= offset) offset_top = offset + 2;
+
+      /* If this group is at or above the current highwater mark, ensure that
+      any groups between the current high water mark and this group are marked
+      unset and then update the high water mark. */
+
+      if (offset >= offset_top)
+        {
+        register int *iptr = md->offset_vector + offset_top;
+        register int *iend = md->offset_vector + offset;
+        while (iptr < iend) *iptr++ = -1;
+        offset_top = offset + 2;
+        }
       }
     ecode += 1 + IMM2_SIZE;
     break;
@@ -1826,7 +1829,11 @@ for (;;)
         are defined in a range that can be tested for. */
 
         if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX)
+          {
+          if (new_recursive.offset_save != stacksave)
+            (PUBL(free))(new_recursive.offset_save);
           RRETURN(MATCH_NOMATCH);
+          }
 
         /* Any return code other than NOMATCH is an error. */
 
@@ -3476,7 +3483,7 @@ for (;;)
           if (possessive) continue;    /* No backtracking */
           for(;;)
             {
-            if (eptr == pp) goto TAIL_RECURSE;
+            if (eptr <= pp) goto TAIL_RECURSE;
             RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 #ifdef SUPPORT_UCP
@@ -3897,7 +3904,7 @@ for (;;)
           if (possessive) continue;    /* No backtracking */
           for(;;)
             {
-            if (eptr == pp) goto TAIL_RECURSE;
+            if (eptr <= pp) goto TAIL_RECURSE;
             RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
             eptr--;
@@ -4032,7 +4039,7 @@ for (;;)
           if (possessive) continue;    /* No backtracking */
           for(;;)
             {
-            if (eptr == pp) goto TAIL_RECURSE;
+            if (eptr <= pp) goto TAIL_RECURSE;
             RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);
             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
             eptr--;
@@ -5603,7 +5610,7 @@ for (;;)
         if (possessive) continue;    /* No backtracking */
         for(;;)
           {
-          if (eptr == pp) goto TAIL_RECURSE;
+          if (eptr <= pp) goto TAIL_RECURSE;
           RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
           eptr--;
@@ -5645,12 +5652,17 @@ for (;;)
 
         if (possessive) continue;    /* No backtracking */
 
+        /* We use <= pp rather than == pp to detect the start of the run while
+        backtracking because the use of \C in UTF mode can cause BACKCHAR to
+        move back past pp. This is just palliative; the use of \C in UTF mode
+        is fraught with danger. */
+
         for(;;)
           {
           int lgb, rgb;
           PCRE_PUCHAR fptr;
 
-          if (eptr == pp) goto TAIL_RECURSE;   /* At start of char run */
+          if (eptr <= pp) goto TAIL_RECURSE;   /* At start of char run */
           RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 
@@ -5668,7 +5680,7 @@ for (;;)
 
           for (;;)
             {
-            if (eptr == pp) goto TAIL_RECURSE;   /* At start of char run */
+            if (eptr <= pp) goto TAIL_RECURSE;   /* At start of char run */
             fptr = eptr - 1;
             if (!utf) c = *fptr; else
               {
@@ -5918,7 +5930,7 @@ for (;;)
         if (possessive) continue;    /* No backtracking */
         for(;;)
           {
-          if (eptr == pp) goto TAIL_RECURSE;
+          if (eptr <= pp) goto TAIL_RECURSE;
           RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);
           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
           eptr--;
index 19d18a830a415fcba02740d6d36c9f7179a8d41a..4c4817d797bf31e67cb0b90d1b9572454ab44a8f 100644 (file)
@@ -2450,6 +2450,7 @@ typedef struct compile_data {
   BOOL had_pruneorskip;             /* (*PRUNE) or (*SKIP) encountered */
   BOOL check_lookbehind;            /* Lookbehinds need later checking */
   BOOL dupnames;                    /* Duplicate names exist */
+  BOOL iscondassert;                /* Next assert is a condition */
   int  nltype;                      /* Newline type */
   int  nllen;                       /* Newline string length */
   pcre_uchar nl[4];                 /* Newline string when fixed length */
@@ -2463,6 +2464,13 @@ typedef struct branch_chain {
   pcre_uchar *current_branch;
 } branch_chain;
 
+/* Structure for mutual recursion detection. */
+
+typedef struct recurse_check {
+  struct recurse_check *prev;
+  const pcre_uchar *group;
+} recurse_check;
+
 /* Structure for items in a linked list that represents an explicit recursive
 call within the pattern; used by pcre_exec(). */
 
diff --git a/ext/pcre/pcrelib/pcre_jit_compile.c b/ext/pcre/pcrelib/pcre_jit_compile.c
new file mode 100644 (file)
index 0000000..debdf6e
--- /dev/null
@@ -0,0 +1,10690 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+           Copyright (c) 1997-2013 University of Cambridge
+
+  The machine code generator part (this module) was written by Zoltan Herczeg
+                      Copyright (c) 2010-2013
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "pcre_internal.h"
+
+#if defined SUPPORT_JIT
+
+/* All-in-one: Since we use the JIT compiler only from here,
+we just include it. This way we don't need to touch the build
+system files. */
+
+#define SLJIT_MALLOC(size, allocator_data) (PUBL(malloc))(size)
+#define SLJIT_FREE(ptr, allocator_data) (PUBL(free))(ptr)
+#define SLJIT_CONFIG_AUTO 1
+#define SLJIT_CONFIG_STATIC 1
+#define SLJIT_VERBOSE 0
+#define SLJIT_DEBUG 0
+
+#include "sljit/sljitLir.c"
+
+#if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
+#error Unsupported architecture
+#endif
+
+/* Defines for debugging purposes. */
+
+/* 1 - Use unoptimized capturing brackets.
+   2 - Enable capture_last_ptr (includes option 1). */
+/* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
+
+/* 1 - Always have a control head. */
+/* #define DEBUG_FORCE_CONTROL_HEAD 1 */
+
+/* Allocate memory for the regex stack on the real machine stack.
+Fast, but limited size. */
+#define MACHINE_STACK_SIZE 32768
+
+/* Growth rate for stack allocated by the OS. Should be the multiply
+of page size. */
+#define STACK_GROWTH_RATE 8192
+
+/* Enable to check that the allocation could destroy temporaries. */
+#if defined SLJIT_DEBUG && SLJIT_DEBUG
+#define DESTROY_REGISTERS 1
+#endif
+
+/*
+Short summary about the backtracking mechanism empolyed by the jit code generator:
+
+The code generator follows the recursive nature of the PERL compatible regular
+expressions. The basic blocks of regular expressions are condition checkers
+whose execute different commands depending on the result of the condition check.
+The relationship between the operators can be horizontal (concatenation) and
+vertical (sub-expression) (See struct backtrack_common for more details).
+
+  'ab' - 'a' and 'b' regexps are concatenated
+  'a+' - 'a' is the sub-expression of the '+' operator
+
+The condition checkers are boolean (true/false) checkers. Machine code is generated
+for the checker itself and for the actions depending on the result of the checker.
+The 'true' case is called as the matching path (expected path), and the other is called as
+the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
+branches on the matching path.
+
+ Greedy star operator (*) :
+   Matching path: match happens.
+   Backtrack path: match failed.
+ Non-greedy star operator (*?) :
+   Matching path: no need to perform a match.
+   Backtrack path: match is required.
+
+The following example shows how the code generated for a capturing bracket
+with two alternatives. Let A, B, C, D are arbirary regular expressions, and
+we have the following regular expression:
+
+   A(B|C)D
+
+The generated code will be the following:
+
+ A matching path
+ '(' matching path (pushing arguments to the stack)
+ B matching path
+ ')' matching path (pushing arguments to the stack)
+ D matching path
+ return with successful match
+
+ D backtrack path
+ ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
+ B backtrack path
+ C expected path
+ jump to D matching path
+ C backtrack path
+ A backtrack path
+
+ Notice, that the order of backtrack code paths are the opposite of the fast
+ code paths. In this way the topmost value on the stack is always belong
+ to the current backtrack code path. The backtrack path must check
+ whether there is a next alternative. If so, it needs to jump back to
+ the matching path eventually. Otherwise it needs to clear out its own stack
+ frame and continue the execution on the backtrack code paths.
+*/
+
+/*
+Saved stack frames:
+
+Atomic blocks and asserts require reloading the values of private data
+when the backtrack mechanism performed. Because of OP_RECURSE, the data
+are not necessarly known in compile time, thus we need a dynamic restore
+mechanism.
+
+The stack frames are stored in a chain list, and have the following format:
+([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
+
+Thus we can restore the private data to a particular point in the stack.
+*/
+
+typedef struct jit_arguments {
+  /* Pointers first. */
+  struct sljit_stack *stack;
+  const pcre_uchar *str;
+  const pcre_uchar *begin;
+  const pcre_uchar *end;
+  int *offsets;
+  pcre_uchar *uchar_ptr;
+  pcre_uchar *mark_ptr;
+  void *callout_data;
+  /* Everything else after. */
+  pcre_uint32 limit_match;
+  int real_offset_count;
+  int offset_count;
+  pcre_uint8 notbol;
+  pcre_uint8 noteol;
+  pcre_uint8 notempty;
+  pcre_uint8 notempty_atstart;
+} jit_arguments;
+
+typedef struct executable_functions {
+  void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
+  void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
+  sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
+  PUBL(jit_callback) callback;
+  void *userdata;
+  pcre_uint32 top_bracket;
+  pcre_uint32 limit_match;
+} executable_functions;
+
+typedef struct jump_list {
+  struct sljit_jump *jump;
+  struct jump_list *next;
+} jump_list;
+
+typedef struct stub_list {
+  struct sljit_jump *start;
+  struct sljit_label *quit;
+  struct stub_list *next;
+} stub_list;
+
+typedef struct label_addr_list {
+  struct sljit_label *label;
+  sljit_uw *update_addr;
+  struct label_addr_list *next;
+} label_addr_list;
+
+enum frame_types {
+  no_frame = -1,
+  no_stack = -2
+};
+
+enum control_types {
+  type_mark = 0,
+  type_then_trap = 1
+};
+
+typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
+
+/* The following structure is the key data type for the recursive
+code generator. It is allocated by compile_matchingpath, and contains
+the arguments for compile_backtrackingpath. Must be the first member
+of its descendants. */
+typedef struct backtrack_common {
+  /* Concatenation stack. */
+  struct backtrack_common *prev;
+  jump_list *nextbacktracks;
+  /* Internal stack (for component operators). */
+  struct backtrack_common *top;
+  jump_list *topbacktracks;
+  /* Opcode pointer. */
+  pcre_uchar *cc;
+} backtrack_common;
+
+typedef struct assert_backtrack {
+  backtrack_common common;
+  jump_list *condfailed;
+  /* Less than 0 if a frame is not needed. */
+  int framesize;
+  /* Points to our private memory word on the stack. */
+  int private_data_ptr;
+  /* For iterators. */
+  struct sljit_label *matchingpath;
+} assert_backtrack;
+
+typedef struct bracket_backtrack {
+  backtrack_common common;
+  /* Where to coninue if an alternative is successfully matched. */
+  struct sljit_label *alternative_matchingpath;
+  /* For rmin and rmax iterators. */
+  struct sljit_label *recursive_matchingpath;
+  /* For greedy ? operator. */
+  struct sljit_label *zero_matchingpath;
+  /* Contains the branches of a failed condition. */
+  union {
+    /* Both for OP_COND, OP_SCOND. */
+    jump_list *condfailed;
+    assert_backtrack *assert;
+    /* For OP_ONCE. Less than 0 if not needed. */
+    int framesize;
+  } u;
+  /* Points to our private memory word on the stack. */
+  int private_data_ptr;
+} bracket_backtrack;
+
+typedef struct bracketpos_backtrack {
+  backtrack_common common;
+  /* Points to our private memory word on the stack. */
+  int private_data_ptr;
+  /* Reverting stack is needed. */
+  int framesize;
+  /* Allocated stack size. */
+  int stacksize;
+} bracketpos_backtrack;
+
+typedef struct braminzero_backtrack {
+  backtrack_common common;
+  struct sljit_label *matchingpath;
+} braminzero_backtrack;
+
+typedef struct iterator_backtrack {
+  backtrack_common common;
+  /* Next iteration. */
+  struct sljit_label *matchingpath;
+} iterator_backtrack;
+
+typedef struct recurse_entry {
+  struct recurse_entry *next;
+  /* Contains the function entry. */
+  struct sljit_label *entry;
+  /* Collects the calls until the function is not created. */
+  jump_list *calls;
+  /* Points to the starting opcode. */
+  sljit_sw start;
+} recurse_entry;
+
+typedef struct recurse_backtrack {
+  backtrack_common common;
+  BOOL inlined_pattern;
+} recurse_backtrack;
+
+#define OP_THEN_TRAP OP_TABLE_LENGTH
+
+typedef struct then_trap_backtrack {
+  backtrack_common common;
+  /* If then_trap is not NULL, this structure contains the real
+  then_trap for the backtracking path. */
+  struct then_trap_backtrack *then_trap;
+  /* Points to the starting opcode. */
+  sljit_sw start;
+  /* Exit point for the then opcodes of this alternative. */
+  jump_list *quit;
+  /* Frame size of the current alternative. */
+  int framesize;
+} then_trap_backtrack;
+
+#define MAX_RANGE_SIZE 4
+
+typedef struct compiler_common {
+  /* The sljit ceneric compiler. */
+  struct sljit_compiler *compiler;
+  /* First byte code. */
+  pcre_uchar *start;
+  /* Maps private data offset to each opcode. */
+  sljit_si *private_data_ptrs;
+  /* Chain list of read-only data ptrs. */
+  void *read_only_data_head;
+  /* Tells whether the capturing bracket is optimized. */
+  pcre_uint8 *optimized_cbracket;
+  /* Tells whether the starting offset is a target of then. */
+  pcre_uint8 *then_offsets;
+  /* Current position where a THEN must jump. */
+  then_trap_backtrack *then_trap;
+  /* Starting offset of private data for capturing brackets. */
+  int cbra_ptr;
+  /* Output vector starting point. Must be divisible by 2. */
+  int ovector_start;
+  /* Last known position of the requested byte. */
+  int req_char_ptr;
+  /* Head of the last recursion. */
+  int recursive_head_ptr;
+  /* First inspected character for partial matching. */
+  int start_used_ptr;
+  /* Starting pointer for partial soft matches. */
+  int hit_start;
+  /* End pointer of the first line. */
+  int first_line_end;
+  /* Points to the marked string. */
+  int mark_ptr;
+  /* Recursive control verb management chain. */
+  int control_head_ptr;
+  /* Points to the last matched capture block index. */
+  int capture_last_ptr;
+  /* Points to the starting position of the current match. */
+  int start_ptr;
+
+  /* Flipped and lower case tables. */
+  const pcre_uint8 *fcc;
+  sljit_sw lcc;
+  /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
+  int mode;
+  /* TRUE, when minlength is greater than 0. */
+  BOOL might_be_empty;
+  /* \K is found in the pattern. */
+  BOOL has_set_som;
+  /* (*SKIP:arg) is found in the pattern. */
+  BOOL has_skip_arg;
+  /* (*THEN) is found in the pattern. */
+  BOOL has_then;
+  /* Needs to know the start position anytime. */
+  BOOL needs_start_ptr;
+  /* Currently in recurse or negative assert. */
+  BOOL local_exit;
+  /* Currently in a positive assert. */
+  BOOL positive_assert;
+  /* Newline control. */
+  int nltype;
+  pcre_uint32 nlmax;
+  pcre_uint32 nlmin;
+  int newline;
+  int bsr_nltype;
+  pcre_uint32 bsr_nlmax;
+  pcre_uint32 bsr_nlmin;
+  /* Dollar endonly. */
+  int endonly;
+  /* Tables. */
+  sljit_sw ctypes;
+  /* Named capturing brackets. */
+  pcre_uchar *name_table;
+  sljit_sw name_count;
+  sljit_sw name_entry_size;
+
+  /* Labels and jump lists. */
+  struct sljit_label *partialmatchlabel;
+  struct sljit_label *quit_label;
+  struct sljit_label *forced_quit_label;
+  struct sljit_label *accept_label;
+  struct sljit_label *ff_newline_shortcut;
+  stub_list *stubs;
+  label_addr_list *label_addrs;
+  recurse_entry *entries;
+  recurse_entry *currententry;
+  jump_list *partialmatch;
+  jump_list *quit;
+  jump_list *positive_assert_quit;
+  jump_list *forced_quit;
+  jump_list *accept;
+  jump_list *calllimit;
+  jump_list *stackalloc;
+  jump_list *revertframes;
+  jump_list *wordboundary;
+  jump_list *anynewline;
+  jump_list *hspace;
+  jump_list *vspace;
+  jump_list *casefulcmp;
+  jump_list *caselesscmp;
+  jump_list *reset_match;
+  BOOL jscript_compat;
+#ifdef SUPPORT_UTF
+  BOOL utf;
+#ifdef SUPPORT_UCP
+  BOOL use_ucp;
+#endif
+#ifdef COMPILE_PCRE8
+  jump_list *utfreadchar;
+  jump_list *utfreadchar16;
+  jump_list *utfreadtype8;
+#endif
+#endif /* SUPPORT_UTF */
+#ifdef SUPPORT_UCP
+  jump_list *getucd;
+#endif
+} compiler_common;
+
+/* For byte_sequence_compare. */
+
+typedef struct compare_context {
+  int length;
+  int sourcereg;
+#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
+  int ucharptr;
+  union {
+    sljit_si asint;
+    sljit_uh asushort;
+#if defined COMPILE_PCRE8
+    sljit_ub asbyte;
+    sljit_ub asuchars[4];
+#elif defined COMPILE_PCRE16
+    sljit_uh asuchars[2];
+#elif defined COMPILE_PCRE32
+    sljit_ui asuchars[1];
+#endif
+  } c;
+  union {
+    sljit_si asint;
+    sljit_uh asushort;
+#if defined COMPILE_PCRE8
+    sljit_ub asbyte;
+    sljit_ub asuchars[4];
+#elif defined COMPILE_PCRE16
+    sljit_uh asuchars[2];
+#elif defined COMPILE_PCRE32
+    sljit_ui asuchars[1];
+#endif
+  } oc;
+#endif
+} compare_context;
+
+/* Undefine sljit macros. */
+#undef CMP
+
+/* Used for accessing the elements of the stack. */
+#define STACK(i)      ((-(i) - 1) * (int)sizeof(sljit_sw))
+
+#define TMP1          SLJIT_R0
+#define TMP2          SLJIT_R2
+#define TMP3          SLJIT_R3
+#define STR_PTR       SLJIT_S0
+#define STR_END       SLJIT_S1
+#define STACK_TOP     SLJIT_R1
+#define STACK_LIMIT   SLJIT_S2
+#define COUNT_MATCH   SLJIT_S3
+#define ARGUMENTS     SLJIT_S4
+#define RETURN_ADDR   SLJIT_R4
+
+/* Local space layout. */
+/* These two locals can be used by the current opcode. */
+#define LOCALS0          (0 * sizeof(sljit_sw))
+#define LOCALS1          (1 * sizeof(sljit_sw))
+/* Two local variables for possessive quantifiers (char1 cannot use them). */
+#define POSSESSIVE0      (2 * sizeof(sljit_sw))
+#define POSSESSIVE1      (3 * sizeof(sljit_sw))
+/* Max limit of recursions. */
+#define LIMIT_MATCH      (4 * sizeof(sljit_sw))
+/* The output vector is stored on the stack, and contains pointers
+to characters. The vector data is divided into two groups: the first
+group contains the start / end character pointers, and the second is
+the start pointers when the end of the capturing group has not yet reached. */
+#define OVECTOR_START    (common->ovector_start)
+#define OVECTOR(i)       (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
+#define OVECTOR_PRIV(i)  (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
+#define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
+
+#if defined COMPILE_PCRE8
+#define MOV_UCHAR  SLJIT_MOV_UB
+#define MOVU_UCHAR SLJIT_MOVU_UB
+#elif defined COMPILE_PCRE16
+#define MOV_UCHAR  SLJIT_MOV_UH
+#define MOVU_UCHAR SLJIT_MOVU_UH
+#elif defined COMPILE_PCRE32
+#define MOV_UCHAR  SLJIT_MOV_UI
+#define MOVU_UCHAR SLJIT_MOVU_UI
+#else
+#error Unsupported compiling mode
+#endif
+
+/* Shortcuts. */
+#define DEFINE_COMPILER \
+  struct sljit_compiler *compiler = common->compiler
+#define OP1(op, dst, dstw, src, srcw) \
+  sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
+#define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
+  sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
+#define LABEL() \
+  sljit_emit_label(compiler)
+#define JUMP(type) \
+  sljit_emit_jump(compiler, (type))
+#define JUMPTO(type, label) \
+  sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
+#define JUMPHERE(jump) \
+  sljit_set_label((jump), sljit_emit_label(compiler))
+#define SET_LABEL(jump, label) \
+  sljit_set_label((jump), (label))
+#define CMP(type, src1, src1w, src2, src2w) \
+  sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
+#define CMPTO(type, src1, src1w, src2, src2w, label) \
+  sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
+#define OP_FLAGS(op, dst, dstw, src, srcw, type) \
+  sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
+#define GET_LOCAL_BASE(dst, dstw, offset) \
+  sljit_get_local_base(compiler, (dst), (dstw), (offset))
+
+#define READ_CHAR_MAX 0x7fffffff
+
+static pcre_uchar *bracketend(pcre_uchar *cc)
+{
+SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
+do cc += GET(cc, 1); while (*cc == OP_ALT);
+SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
+cc += 1 + LINK_SIZE;
+return cc;
+}
+
+static int no_alternatives(pcre_uchar *cc)
+{
+int count = 0;
+SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
+do
+  {
+  cc += GET(cc, 1);
+  count++;
+  }
+while (*cc == OP_ALT);
+SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
+return count;
+}
+
+static int ones_in_half_byte[16] = {
+  /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3,
+  /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4
+};
+
+/* Functions whose might need modification for all new supported opcodes:
+ next_opcode
+ check_opcode_types
+ set_private_data_ptrs
+ get_framesize
+ init_frame
+ get_private_data_copy_length
+ copy_private_data
+ compile_matchingpath
+ compile_backtrackingpath
+*/
+
+static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
+{
+SLJIT_UNUSED_ARG(common);
+switch(*cc)
+  {
+  case OP_SOD:
+  case OP_SOM:
+  case OP_SET_SOM:
+  case OP_NOT_WORD_BOUNDARY:
+  case OP_WORD_BOUNDARY:
+  case OP_NOT_DIGIT:
+  case OP_DIGIT:
+  case OP_NOT_WHITESPACE:
+  case OP_WHITESPACE:
+  case OP_NOT_WORDCHAR:
+  case OP_WORDCHAR:
+  case OP_ANY:
+  case OP_ALLANY:
+  case OP_NOTPROP:
+  case OP_PROP:
+  case OP_ANYNL:
+  case OP_NOT_HSPACE:
+  case OP_HSPACE:
+  case OP_NOT_VSPACE:
+  case OP_VSPACE:
+  case OP_EXTUNI:
+  case OP_EODN:
+  case OP_EOD:
+  case OP_CIRC:
+  case OP_CIRCM:
+  case OP_DOLL:
+  case OP_DOLLM:
+  case OP_CRSTAR:
+  case OP_CRMINSTAR:
+  case OP_CRPLUS:
+  case OP_CRMINPLUS:
+  case OP_CRQUERY:
+  case OP_CRMINQUERY:
+  case OP_CRRANGE:
+  case OP_CRMINRANGE:
+  case OP_CRPOSSTAR:
+  case OP_CRPOSPLUS:
+  case OP_CRPOSQUERY:
+  case OP_CRPOSRANGE:
+  case OP_CLASS:
+  case OP_NCLASS:
+  case OP_REF:
+  case OP_REFI:
+  case OP_DNREF:
+  case OP_DNREFI:
+  case OP_RECURSE:
+  case OP_CALLOUT:
+  case OP_ALT:
+  case OP_KET:
+  case OP_KETRMAX:
+  case OP_KETRMIN:
+  case OP_KETRPOS:
+  case OP_REVERSE:
+  case OP_ASSERT:
+  case OP_ASSERT_NOT:
+  case OP_ASSERTBACK:
+  case OP_ASSERTBACK_NOT:
+  case OP_ONCE:
+  case OP_ONCE_NC:
+  case OP_BRA:
+  case OP_BRAPOS:
+  case OP_CBRA:
+  case OP_CBRAPOS:
+  case OP_COND:
+  case OP_SBRA:
+  case OP_SBRAPOS:
+  case OP_SCBRA:
+  case OP_SCBRAPOS:
+  case OP_SCOND:
+  case OP_CREF:
+  case OP_DNCREF:
+  case OP_RREF:
+  case OP_DNRREF:
+  case OP_DEF:
+  case OP_BRAZERO:
+  case OP_BRAMINZERO:
+  case OP_BRAPOSZERO:
+  case OP_PRUNE:
+  case OP_SKIP:
+  case OP_THEN:
+  case OP_COMMIT:
+  case OP_FAIL:
+  case OP_ACCEPT:
+  case OP_ASSERT_ACCEPT:
+  case OP_CLOSE:
+  case OP_SKIPZERO:
+  return cc + PRIV(OP_lengths)[*cc];
+
+  case OP_CHAR:
+  case OP_CHARI:
+  case OP_NOT:
+  case OP_NOTI:
+  case OP_STAR:
+  case OP_MINSTAR:
+  case OP_PLUS:
+  case OP_MINPLUS:
+  case OP_QUERY:
+  case OP_MINQUERY:
+  case OP_UPTO:
+  case OP_MINUPTO:
+  case OP_EXACT:
+  case OP_POSSTAR:
+  case OP_POSPLUS:
+  case OP_POSQUERY:
+  case OP_POSUPTO:
+  case OP_STARI:
+  case OP_MINSTARI:
+  case OP_PLUSI:
+  case OP_MINPLUSI:
+  case OP_QUERYI:
+  case OP_MINQUERYI:
+  case OP_UPTOI:
+  case OP_MINUPTOI:
+  case OP_EXACTI:
+  case OP_POSSTARI:
+  case OP_POSPLUSI:
+  case OP_POSQUERYI:
+  case OP_POSUPTOI:
+  case OP_NOTSTAR:
+  case OP_NOTMINSTAR:
+  case OP_NOTPLUS:
+  case OP_NOTMINPLUS:
+  case OP_NOTQUERY:
+  case OP_NOTMINQUERY:
+  case OP_NOTUPTO:
+  case OP_NOTMINUPTO:
+  case OP_NOTEXACT:
+  case OP_NOTPOSSTAR:
+  case OP_NOTPOSPLUS:
+  case OP_NOTPOSQUERY:
+  case OP_NOTPOSUPTO:
+  case OP_NOTSTARI:
+  case OP_NOTMINSTARI:
+  case OP_NOTPLUSI:
+  case OP_NOTMINPLUSI:
+  case OP_NOTQUERYI:
+  case OP_NOTMINQUERYI:
+  case OP_NOTUPTOI:
+  case OP_NOTMINUPTOI:
+  case OP_NOTEXACTI:
+  case OP_NOTPOSSTARI:
+  case OP_NOTPOSPLUSI:
+  case OP_NOTPOSQUERYI:
+  case OP_NOTPOSUPTOI:
+  cc += PRIV(OP_lengths)[*cc];
+#ifdef SUPPORT_UTF
+  if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
+#endif
+  return cc;
+
+  /* Special cases. */
+  case OP_TYPESTAR:
+  case OP_TYPEMINSTAR:
+  case OP_TYPEPLUS:
+  case OP_TYPEMINPLUS:
+  case OP_TYPEQUERY:
+  case OP_TYPEMINQUERY:
+  case OP_TYPEUPTO:
+  case OP_TYPEMINUPTO:
+  case OP_TYPEEXACT:
+  case OP_TYPEPOSSTAR:
+  case OP_TYPEPOSPLUS:
+  case OP_TYPEPOSQUERY:
+  case OP_TYPEPOSUPTO:
+  return cc + PRIV(OP_lengths)[*cc] - 1;
+
+  case OP_ANYBYTE:
+#ifdef SUPPORT_UTF
+  if (common->utf) return NULL;
+#endif
+  return cc + 1;
+
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+  case OP_XCLASS:
+  return cc + GET(cc, 1);
+#endif
+
+  case OP_MARK:
+  case OP_PRUNE_ARG:
+  case OP_SKIP_ARG:
+  case OP_THEN_ARG:
+  return cc + 1 + 2 + cc[1];
+
+  default:
+  /* All opcodes are supported now! */
+  SLJIT_ASSERT_STOP();
+  return NULL;
+  }
+}
+
+static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
+{
+int count;
+pcre_uchar *slot;
+
+/* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
+while (cc < ccend)
+  {
+  switch(*cc)
+    {
+    case OP_SET_SOM:
+    common->has_set_som = TRUE;
+    common->might_be_empty = TRUE;
+    cc += 1;
+    break;
+
+    case OP_REF:
+    case OP_REFI:
+    common->optimized_cbracket[GET2(cc, 1)] = 0;
+    cc += 1 + IMM2_SIZE;
+    break;
+
+    case OP_CBRAPOS:
+    case OP_SCBRAPOS:
+    common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
+    cc += 1 + LINK_SIZE + IMM2_SIZE;
+    break;
+
+    case OP_COND:
+    case OP_SCOND:
+    /* Only AUTO_CALLOUT can insert this opcode. We do
+       not intend to support this case. */
+    if (cc[1 + LINK_SIZE] == OP_CALLOUT)
+      return FALSE;
+    cc += 1 + LINK_SIZE;
+    break;
+
+    case OP_CREF:
+    common->optimized_cbracket[GET2(cc, 1)] = 0;
+    cc += 1 + IMM2_SIZE;
+    break;
+
+    case OP_DNREF:
+    case OP_DNREFI:
+    case OP_DNCREF:
+    count = GET2(cc, 1 + IMM2_SIZE);
+    slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
+    while (count-- > 0)
+      {
+      common->optimized_cbracket[GET2(slot, 0)] = 0;
+      slot += common->name_entry_size;
+      }
+    cc += 1 + 2 * IMM2_SIZE;
+    break;
+
+    case OP_RECURSE:
+    /* Set its value only once. */
+    if (common->recursive_head_ptr == 0)
+      {
+      common->recursive_head_ptr = common->ovector_start;
+      common->ovector_start += sizeof(sljit_sw);
+      }
+    cc += 1 + LINK_SIZE;
+    break;
+
+    case OP_CALLOUT:
+    if (common->capture_last_ptr == 0)
+      {
+      common->capture_last_ptr = common->ovector_start;
+      common->ovector_start += sizeof(sljit_sw);
+      }
+    cc += 2 + 2 * LINK_SIZE;
+    break;
+
+    case OP_THEN_ARG:
+    common->has_then = TRUE;
+    common->control_head_ptr = 1;
+    /* Fall through. */
+
+    case OP_PRUNE_ARG:
+    common->needs_start_ptr = TRUE;
+    /* Fall through. */
+
+    case OP_MARK:
+    if (common->mark_ptr == 0)
+      {
+      common->mark_ptr = common->ovector_start;
+      common->ovector_start += sizeof(sljit_sw);
+      }
+    cc += 1 + 2 + cc[1];
+    break;
+
+    case OP_THEN:
+    common->has_then = TRUE;
+    common->control_head_ptr = 1;
+    /* Fall through. */
+
+    case OP_PRUNE:
+    case OP_SKIP:
+    common->needs_start_ptr = TRUE;
+    cc += 1;
+    break;
+
+    case OP_SKIP_ARG:
+    common->control_head_ptr = 1;
+    common->has_skip_arg = TRUE;
+    cc += 1 + 2 + cc[1];
+    break;
+
+    default:
+    cc = next_opcode(common, cc);
+    if (cc == NULL)
+      return FALSE;
+    break;
+    }
+  }
+return TRUE;
+}
+
+static int get_class_iterator_size(pcre_uchar *cc)
+{
+switch(*cc)
+  {
+  case OP_CRSTAR:
+  case OP_CRPLUS:
+  return 2;
+
+  case OP_CRMINSTAR:
+  case OP_CRMINPLUS:
+  case OP_CRQUERY:
+  case OP_CRMINQUERY:
+  return 1;
+
+  case OP_CRRANGE:
+  case OP_CRMINRANGE:
+  if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
+    return 0;
+  return 2;
+
+  default:
+  return 0;
+  }
+}
+
+static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
+{
+pcre_uchar *end = bracketend(begin);
+pcre_uchar *next;
+pcre_uchar *next_end;
+pcre_uchar *max_end;
+pcre_uchar type;
+sljit_sw length = end - begin;
+int min, max, i;
+
+/* Detect fixed iterations first. */
+if (end[-(1 + LINK_SIZE)] != OP_KET)
+  return FALSE;
+
+/* Already detected repeat. */
+if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
+  return TRUE;
+
+next = end;
+min = 1;
+while (1)
+  {
+  if (*next != *begin)
+    break;
+  next_end = bracketend(next);
+  if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
+    break;
+  next = next_end;
+  min++;
+  }
+
+if (min == 2)
+  return FALSE;
+
+max = 0;
+max_end = next;
+if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
+  {
+  type = *next;
+  while (1)
+    {
+    if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
+      break;
+    next_end = bracketend(next + 2 + LINK_SIZE);
+    if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
+      break;
+    next = next_end;
+    max++;
+    }
+
+  if (next[0] == type && next[1] == *begin && max >= 1)
+    {
+    next_end = bracketend(next + 1);
+    if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
+      {
+      for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
+        if (*next_end != OP_KET)
+          break;
+
+      if (i == max)
+        {
+        common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
+        common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
+        /* +2 the original and the last. */
+        common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
+        if (min == 1)
+          return TRUE;
+        min--;
+        max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
+        }
+      }
+    }
+  }
+
+if (min >= 3)
+  {
+  common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
+  common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
+  common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
+  return TRUE;
+  }
+
+return FALSE;
+}
+
+#define CASE_ITERATOR_PRIVATE_DATA_1 \
+    case OP_MINSTAR: \
+    case OP_MINPLUS: \
+    case OP_QUERY: \
+    case OP_MINQUERY: \
+    case OP_MINSTARI: \
+    case OP_MINPLUSI: \
+    case OP_QUERYI: \
+    case OP_MINQUERYI: \
+    case OP_NOTMINSTAR: \
+    case OP_NOTMINPLUS: \
+    case OP_NOTQUERY: \
+    case OP_NOTMINQUERY: \
+    case OP_NOTMINSTARI: \
+    case OP_NOTMINPLUSI: \
+    case OP_NOTQUERYI: \
+    case OP_NOTMINQUERYI:
+
+#define CASE_ITERATOR_PRIVATE_DATA_2A \
+    case OP_STAR: \
+    case OP_PLUS: \
+    case OP_STARI: \
+    case OP_PLUSI: \
+    case OP_NOTSTAR: \
+    case OP_NOTPLUS: \
+    case OP_NOTSTARI: \
+    case OP_NOTPLUSI:
+
+#define CASE_ITERATOR_PRIVATE_DATA_2B \
+    case OP_UPTO: \
+    case OP_MINUPTO: \
+    case OP_UPTOI: \
+    case OP_MINUPTOI: \
+    case OP_NOTUPTO: \
+    case OP_NOTMINUPTO: \
+    case OP_NOTUPTOI: \
+    case OP_NOTMINUPTOI:
+
+#define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
+    case OP_TYPEMINSTAR: \
+    case OP_TYPEMINPLUS: \
+    case OP_TYPEQUERY: \
+    case OP_TYPEMINQUERY:
+
+#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
+    case OP_TYPESTAR: \
+    case OP_TYPEPLUS:
+
+#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
+    case OP_TYPEUPTO: \
+    case OP_TYPEMINUPTO:
+
+static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
+{
+pcre_uchar *cc = common->start;
+pcre_uchar *alternative;
+pcre_uchar *end = NULL;
+int private_data_ptr = *private_data_start;
+int space, size, bracketlen;
+
+while (cc < ccend)
+  {
+  space = 0;
+  size = 0;
+  bracketlen = 0;
+  if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
+    return;
+
+  if (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)
+    if (detect_repeat(common, cc))
+      {
+      /* These brackets are converted to repeats, so no global
+      based single character repeat is allowed. */
+      if (cc >= end)
+        end = bracketend(cc);
+      }
+
+  switch(*cc)
+    {
+    case OP_KET:
+    if (common->private_data_ptrs[cc + 1 - common->start] != 0)
+      {
+      common->private_data_ptrs[cc - common->start] = private_data_ptr;
+      private_data_ptr += sizeof(sljit_sw);
+      cc += common->private_data_ptrs[cc + 1 - common->start];
+      }
+    cc += 1 + LINK_SIZE;
+    break;
+
+    case OP_ASSERT:
+    case OP_ASSERT_NOT:
+    case OP_ASSERTBACK:
+    case OP_ASSERTBACK_NOT:
+    case OP_ONCE:
+    case OP_ONCE_NC:
+    case OP_BRAPOS:
+    case OP_SBRA:
+    case OP_SBRAPOS:
+    case OP_SCOND:
+    common->private_data_ptrs[cc - common->start] = private_data_ptr;
+    private_data_ptr += sizeof(sljit_sw);
+    bracketlen = 1 + LINK_SIZE;
+    break;
+
+    case OP_CBRAPOS:
+    case OP_SCBRAPOS:
+    common->private_data_ptrs[cc - common->start] = private_data_ptr;
+    private_data_ptr += sizeof(sljit_sw);
+    bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
+    break;
+
+    case OP_COND:
+    /* Might be a hidden SCOND. */
+    alternative = cc + GET(cc, 1);
+    if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
+      {
+      common->private_data_ptrs[cc - common->start] = private_data_ptr;
+      private_data_ptr += sizeof(sljit_sw);
+      }
+    bracketlen = 1 + LINK_SIZE;
+    break;
+
+    case OP_BRA:
+    bracketlen = 1 + LINK_SIZE;
+    break;
+
+    case OP_CBRA:
+    case OP_SCBRA:
+    bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
+    break;
+
+    CASE_ITERATOR_PRIVATE_DATA_1
+    space = 1;
+    size = -2;
+    break;
+
+    CASE_ITERATOR_PRIVATE_DATA_2A
+    space = 2;
+    size = -2;
+    break;
+
+    CASE_ITERATOR_PRIVATE_DATA_2B
+    space = 2;
+    size = -(2 + IMM2_SIZE);
+    break;
+
+    CASE_ITERATOR_TYPE_PRIVATE_DATA_1
+    space = 1;
+    size = 1;
+    break;
+
+    CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
+    if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
+      space = 2;
+    size = 1;
+    break;
+
+    CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
+    if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
+      space = 2;
+    size = 1 + IMM2_SIZE;
+    break;
+
+    case OP_CLASS:
+    case OP_NCLASS:
+    size += 1 + 32 / sizeof(pcre_uchar);
+    space = get_class_iterator_size(cc + size);
+    break;
+
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+    case OP_XCLASS:
+    size = GET(cc, 1);
+    space = get_class_iterator_size(cc + size);
+    break;
+#endif
+
+    default:
+    cc = next_opcode(common, cc);
+    SLJIT_ASSERT(cc != NULL);
+    break;
+    }
+
+  /* Character iterators, which are not inside a repeated bracket,
+     gets a private slot instead of allocating it on the stack. */
+  if (space > 0 && cc >= end)
+    {
+    common->private_data_ptrs[cc - common->start] = private_data_ptr;
+    private_data_ptr += sizeof(sljit_sw) * space;
+    }
+
+  if (size != 0)
+    {
+    if (size < 0)
+      {
+      cc += -size;
+#ifdef SUPPORT_UTF
+      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
+#endif
+      }
+    else
+      cc += size;
+    }
+
+  if (bracketlen > 0)
+    {
+    if (cc >= end)
+      {
+      end = bracketend(cc);
+      if (end[-1 - LINK_SIZE] == OP_KET)
+        end = NULL;
+      }
+    cc += bracketlen;
+    }
+  }
+*private_data_start = private_data_ptr;
+}
+
+/* Returns with a frame_types (always < 0) if no need for frame. */
+static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL *needs_control_head)
+{
+int length = 0;
+int possessive = 0;
+BOOL stack_restore = FALSE;
+BOOL setsom_found = recursive;
+BOOL setmark_found = recursive;
+/* The last capture is a local variable even for recursions. */
+BOOL capture_last_found = FALSE;
+
+#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
+SLJIT_ASSERT(common->control_head_ptr != 0);
+*needs_control_head = TRUE;
+#else
+*needs_control_head = FALSE;
+#endif
+
+if (ccend == NULL)
+  {
+  ccend = bracketend(cc) - (1 + LINK_SIZE);
+  if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
+    {
+    possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
+    /* This is correct regardless of common->capture_last_ptr. */
+    capture_last_found = TRUE;
+    }
+  cc = next_opcode(common, cc);
+  }
+
+SLJIT_ASSERT(cc != NULL);
+while (cc < ccend)
+  switch(*cc)
+    {
+    case OP_SET_SOM:
+    SLJIT_ASSERT(common->has_set_som);
+    stack_restore = TRUE;
+    if (!setsom_found)
+      {
+      length += 2;
+      setsom_found = TRUE;
+      }
+    cc += 1;
+    break;
+
+    case OP_MARK:
+    case OP_PRUNE_ARG:
+    case OP_THEN_ARG:
+    SLJIT_ASSERT(common->mark_ptr != 0);
+    stack_restore = TRUE;
+    if (!setmark_found)
+      {
+      length += 2;
+      setmark_found = TRUE;
+      }
+    if (common->control_head_ptr != 0)
+      *needs_control_head = TRUE;
+    cc += 1 + 2 + cc[1];
+    break;
+
+    case OP_RECURSE:
+    stack_restore = TRUE;
+    if (common->has_set_som && !setsom_found)
+      {
+      length += 2;
+      setsom_found = TRUE;
+      }
+    if (common->mark_ptr != 0 && !setmark_found)
+      {
+      length += 2;
+      setmark_found = TRUE;
+      }
+    if (common->capture_last_ptr != 0 && !capture_last_found)
+      {
+      length += 2;
+      capture_last_found = TRUE;
+      }
+    cc += 1 + LINK_SIZE;
+    break;
+
+    case OP_CBRA:
+    case OP_CBRAPOS:
+    case OP_SCBRA:
+    case OP_SCBRAPOS:
+    stack_restore = TRUE;
+    if (common->capture_last_ptr != 0 && !capture_last_found)
+      {
+      length += 2;
+      capture_last_found = TRUE;
+      }
+    length += 3;
+    cc += 1 + LINK_SIZE + IMM2_SIZE;
+    break;
+
+    default:
+    stack_restore = TRUE;
+    /* Fall through. */
+
+    case OP_NOT_WORD_BOUNDARY:
+    case OP_WORD_BOUNDARY:
+    case OP_NOT_DIGIT:
+    case OP_DIGIT:
+    case OP_NOT_WHITESPACE:
+    case OP_WHITESPACE:
+    case OP_NOT_WORDCHAR:
+    case OP_WORDCHAR:
+    case OP_ANY:
+    case OP_ALLANY:
+    case OP_ANYBYTE:
+    case OP_NOTPROP:
+    case OP_PROP:
+    case OP_ANYNL:
+    case OP_NOT_HSPACE:
+    case OP_HSPACE:
+    case OP_NOT_VSPACE:
+    case OP_VSPACE:
+    case OP_EXTUNI:
+    case OP_EODN:
+    case OP_EOD:
+    case OP_CIRC:
+    case OP_CIRCM:
+    case OP_DOLL:
+    case OP_DOLLM:
+    case OP_CHAR:
+    case OP_CHARI:
+    case OP_NOT:
+    case OP_NOTI:
+
+    case OP_EXACT:
+    case OP_POSSTAR:
+    case OP_POSPLUS:
+    case OP_POSQUERY:
+    case OP_POSUPTO:
+
+    case OP_EXACTI:
+    case OP_POSSTARI:
+    case OP_POSPLUSI:
+    case OP_POSQUERYI:
+    case OP_POSUPTOI:
+
+    case OP_NOTEXACT:
+    case OP_NOTPOSSTAR:
+    case OP_NOTPOSPLUS:
+    case OP_NOTPOSQUERY:
+    case OP_NOTPOSUPTO:
+
+    case OP_NOTEXACTI:
+    case OP_NOTPOSSTARI:
+    case OP_NOTPOSPLUSI:
+    case OP_NOTPOSQUERYI:
+    case OP_NOTPOSUPTOI:
+
+    case OP_TYPEEXACT:
+    case OP_TYPEPOSSTAR:
+    case OP_TYPEPOSPLUS:
+    case OP_TYPEPOSQUERY:
+    case OP_TYPEPOSUPTO:
+
+    case OP_CLASS:
+    case OP_NCLASS:
+    case OP_XCLASS:
+
+    cc = next_opcode(common, cc);
+    SLJIT_ASSERT(cc != NULL);
+    break;
+    }
+
+/* Possessive quantifiers can use a special case. */
+if (SLJIT_UNLIKELY(possessive == length))
+  return stack_restore ? no_frame : no_stack;
+
+if (length > 0)
+  return length + 1;
+return stack_restore ? no_frame : no_stack;
+}
+
+static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
+{
+DEFINE_COMPILER;
+BOOL setsom_found = recursive;
+BOOL setmark_found = recursive;
+/* The last capture is a local variable even for recursions. */
+BOOL capture_last_found = FALSE;
+int offset;
+
+/* >= 1 + shortest item size (2) */
+SLJIT_UNUSED_ARG(stacktop);
+SLJIT_ASSERT(stackpos >= stacktop + 2);
+
+stackpos = STACK(stackpos);
+if (ccend == NULL)
+  {
+  ccend = bracketend(cc) - (1 + LINK_SIZE);
+  if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
+    cc = next_opcode(common, cc);
+  }
+
+SLJIT_ASSERT(cc != NULL);
+while (cc < ccend)
+  switch(*cc)
+    {
+    case OP_SET_SOM:
+    SLJIT_ASSERT(common->has_set_som);
+    if (!setsom_found)
+      {
+      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
+      stackpos += (int)sizeof(sljit_sw);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
+      stackpos += (int)sizeof(sljit_sw);
+      setsom_found = TRUE;
+      }
+    cc += 1;
+    break;
+
+    case OP_MARK:
+    case OP_PRUNE_ARG:
+    case OP_THEN_ARG:
+    SLJIT_ASSERT(common->mark_ptr != 0);
+    if (!setmark_found)
+      {
+      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
+      stackpos += (int)sizeof(sljit_sw);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
+      stackpos += (int)sizeof(sljit_sw);
+      setmark_found = TRUE;
+      }
+    cc += 1 + 2 + cc[1];
+    break;
+
+    case OP_RECURSE:
+    if (common->has_set_som && !setsom_found)
+      {
+      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
+      stackpos += (int)sizeof(sljit_sw);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
+      stackpos += (int)sizeof(sljit_sw);
+      setsom_found = TRUE;
+      }
+    if (common->mark_ptr != 0 && !setmark_found)
+      {
+      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
+      stackpos += (int)sizeof(sljit_sw);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
+      stackpos += (int)sizeof(sljit_sw);
+      setmark_found = TRUE;
+      }
+    if (common->capture_last_ptr != 0 && !capture_last_found)
+      {
+      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
+      stackpos += (int)sizeof(sljit_sw);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
+      stackpos += (int)sizeof(sljit_sw);
+      capture_last_found = TRUE;
+      }
+    cc += 1 + LINK_SIZE;
+    break;
+
+    case OP_CBRA:
+    case OP_CBRAPOS:
+    case OP_SCBRA:
+    case OP_SCBRAPOS:
+    if (common->capture_last_ptr != 0 && !capture_last_found)
+      {
+      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
+      stackpos += (int)sizeof(sljit_sw);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
+      stackpos += (int)sizeof(sljit_sw);
+      capture_last_found = TRUE;
+      }
+    offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
+    stackpos += (int)sizeof(sljit_sw);
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
+    stackpos += (int)sizeof(sljit_sw);
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
+    stackpos += (int)sizeof(sljit_sw);
+
+    cc += 1 + LINK_SIZE + IMM2_SIZE;
+    break;
+
+    default:
+    cc = next_opcode(common, cc);
+    SLJIT_ASSERT(cc != NULL);
+    break;
+    }
+
+OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
+SLJIT_ASSERT(stackpos == STACK(stacktop));
+}
+
+static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
+{
+int private_data_length = needs_control_head ? 3 : 2;
+int size;
+pcre_uchar *alternative;
+/* Calculate the sum of the private machine words. */
+while (cc < ccend)
+  {
+  size = 0;
+  switch(*cc)
+    {
+    case OP_KET:
+    if (PRIVATE_DATA(cc) != 0)
+      {
+      private_data_length++;
+      SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
+      cc += PRIVATE_DATA(cc + 1);
+      }
+    cc += 1 + LINK_SIZE;
+    break;
+
+    case OP_ASSERT:
+    case OP_ASSERT_NOT:
+    case OP_ASSERTBACK:
+    case OP_ASSERTBACK_NOT:
+    case OP_ONCE:
+    case OP_ONCE_NC:
+    case OP_BRAPOS:
+    case OP_SBRA:
+    case OP_SBRAPOS:
+    case OP_SCOND:
+    private_data_length++;
+    SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
+    cc += 1 + LINK_SIZE;
+    break;
+
+    case OP_CBRA:
+    case OP_SCBRA:
+    if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
+      private_data_length++;
+    cc += 1 + LINK_SIZE + IMM2_SIZE;
+    break;
+
+    case OP_CBRAPOS:
+    case OP_SCBRAPOS:
+    private_data_length += 2;
+    cc += 1 + LINK_SIZE + IMM2_SIZE;
+    break;
+
+    case OP_COND:
+    /* Might be a hidden SCOND. */
+    alternative = cc + GET(cc, 1);
+    if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
+      private_data_length++;
+    cc += 1 + LINK_SIZE;
+    break;
+
+    CASE_ITERATOR_PRIVATE_DATA_1
+    if (PRIVATE_DATA(cc))
+      private_data_length++;
+    cc += 2;
+#ifdef SUPPORT_UTF
+    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
+#endif
+    break;
+
+    CASE_ITERATOR_PRIVATE_DATA_2A
+    if (PRIVATE_DATA(cc))
+      private_data_length += 2;
+    cc += 2;
+#ifdef SUPPORT_UTF
+    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
+#endif
+    break;
+
+    CASE_ITERATOR_PRIVATE_DATA_2B
+    if (PRIVATE_DATA(cc))
+      private_data_length += 2;
+    cc += 2 + IMM2_SIZE;
+#ifdef SUPPORT_UTF
+    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
+#endif
+    break;
+
+    CASE_ITERATOR_TYPE_PRIVATE_DATA_1
+    if (PRIVATE_DATA(cc))
+      private_data_length++;
+    cc += 1;
+    break;
+
+    CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
+    if (PRIVATE_DATA(cc))
+      private_data_length += 2;
+    cc += 1;
+    break;
+
+    CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
+    if (PRIVATE_DATA(cc))
+      private_data_length += 2;
+    cc += 1 + IMM2_SIZE;
+    break;
+
+    case OP_CLASS:
+    case OP_NCLASS:
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+    case OP_XCLASS:
+    size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
+#else
+    size = 1 + 32 / (int)sizeof(pcre_uchar);
+#endif
+    if (PRIVATE_DATA(cc))
+      private_data_length += get_class_iterator_size(cc + size);
+    cc += size;
+    break;
+
+    default:
+    cc = next_opcode(common, cc);
+    SLJIT_ASSERT(cc != NULL);
+    break;
+    }
+  }
+SLJIT_ASSERT(cc == ccend);
+return private_data_length;
+}
+
+static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
+  BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
+{
+DEFINE_COMPILER;
+int srcw[2];
+int count, size;
+BOOL tmp1next = TRUE;
+BOOL tmp1empty = TRUE;
+BOOL tmp2empty = TRUE;
+pcre_uchar *alternative;
+enum {
+  start,
+  loop,
+  end
+} status;
+
+status = save ? start : loop;
+stackptr = STACK(stackptr - 2);
+stacktop = STACK(stacktop - 1);
+
+if (!save)
+  {
+  stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
+  if (stackptr < stacktop)
+    {
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
+    stackptr += sizeof(sljit_sw);
+    tmp1empty = FALSE;
+    }
+  if (stackptr < stacktop)
+    {
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
+    stackptr += sizeof(sljit_sw);
+    tmp2empty = FALSE;
+    }
+  /* The tmp1next must be TRUE in either way. */
+  }
+
+do
+  {
+  count = 0;
+  switch(status)
+    {
+    case start:
+    SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
+    count = 1;
+    srcw[0] = common->recursive_head_ptr;
+    if (needs_control_head)
+      {
+      SLJIT_ASSERT(common->control_head_ptr != 0);
+      count = 2;
+      srcw[1] = common->control_head_ptr;
+      }
+    status = loop;
+    break;
+
+    case loop:
+    if (cc >= ccend)
+      {
+      status = end;
+      break;
+      }
+
+    switch(*cc)
+      {
+      case OP_KET:
+      if (PRIVATE_DATA(cc) != 0)
+        {
+        count = 1;
+        srcw[0] = PRIVATE_DATA(cc);
+        SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
+        cc += PRIVATE_DATA(cc + 1);
+        }
+      cc += 1 + LINK_SIZE;
+      break;
+
+      case OP_ASSERT:
+      case OP_ASSERT_NOT:
+      case OP_ASSERTBACK:
+      case OP_ASSERTBACK_NOT:
+      case OP_ONCE:
+      case OP_ONCE_NC:
+      case OP_BRAPOS:
+      case OP_SBRA:
+      case OP_SBRAPOS:
+      case OP_SCOND:
+      count = 1;
+      srcw[0] = PRIVATE_DATA(cc);
+      SLJIT_ASSERT(srcw[0] != 0);
+      cc += 1 + LINK_SIZE;
+      break;
+
+      case OP_CBRA:
+      case OP_SCBRA:
+      if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
+        {
+        count = 1;
+        srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
+        }
+      cc += 1 + LINK_SIZE + IMM2_SIZE;
+      break;
+
+      case OP_CBRAPOS:
+      case OP_SCBRAPOS:
+      count = 2;
+      srcw[0] = PRIVATE_DATA(cc);
+      srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
+      SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
+      cc += 1 + LINK_SIZE + IMM2_SIZE;
+      break;
+
+      case OP_COND:
+      /* Might be a hidden SCOND. */
+      alternative = cc + GET(cc, 1);
+      if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
+        {
+        count = 1;
+        srcw[0] = PRIVATE_DATA(cc);
+        SLJIT_ASSERT(srcw[0] != 0);
+        }
+      cc += 1 + LINK_SIZE;
+      break;
+
+      CASE_ITERATOR_PRIVATE_DATA_1
+      if (PRIVATE_DATA(cc))
+        {
+        count = 1;
+        srcw[0] = PRIVATE_DATA(cc);
+        }
+      cc += 2;
+#ifdef SUPPORT_UTF
+      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
+#endif
+      break;
+
+      CASE_ITERATOR_PRIVATE_DATA_2A
+      if (PRIVATE_DATA(cc))
+        {
+        count = 2;
+        srcw[0] = PRIVATE_DATA(cc);
+        srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
+        }
+      cc += 2;
+#ifdef SUPPORT_UTF
+      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
+#endif
+      break;
+
+      CASE_ITERATOR_PRIVATE_DATA_2B
+      if (PRIVATE_DATA(cc))
+        {
+        count = 2;
+        srcw[0] = PRIVATE_DATA(cc);
+        srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
+        }
+      cc += 2 + IMM2_SIZE;
+#ifdef SUPPORT_UTF
+      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
+#endif
+      break;
+
+      CASE_ITERATOR_TYPE_PRIVATE_DATA_1
+      if (PRIVATE_DATA(cc))
+        {
+        count = 1;
+        srcw[0] = PRIVATE_DATA(cc);
+        }
+      cc += 1;
+      break;
+
+      CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
+      if (PRIVATE_DATA(cc))
+        {
+        count = 2;
+        srcw[0] = PRIVATE_DATA(cc);
+        srcw[1] = srcw[0] + sizeof(sljit_sw);
+        }
+      cc += 1;
+      break;
+
+      CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
+      if (PRIVATE_DATA(cc))
+        {
+        count = 2;
+        srcw[0] = PRIVATE_DATA(cc);
+        srcw[1] = srcw[0] + sizeof(sljit_sw);
+        }
+      cc += 1 + IMM2_SIZE;
+      break;
+
+      case OP_CLASS:
+      case OP_NCLASS:
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+      case OP_XCLASS:
+      size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
+#else
+      size = 1 + 32 / (int)sizeof(pcre_uchar);
+#endif
+      if (PRIVATE_DATA(cc))
+        switch(get_class_iterator_size(cc + size))
+          {
+          case 1:
+          count = 1;
+          srcw[0] = PRIVATE_DATA(cc);
+          break;
+
+          case 2:
+          count = 2;
+          srcw[0] = PRIVATE_DATA(cc);
+          srcw[1] = srcw[0] + sizeof(sljit_sw);
+          break;
+
+          default:
+          SLJIT_ASSERT_STOP();
+          break;
+          }
+      cc += size;
+      break;
+
+      default:
+      cc = next_opcode(common, cc);
+      SLJIT_ASSERT(cc != NULL);
+      break;
+      }
+    break;
+
+    case end:
+    SLJIT_ASSERT_STOP();
+    break;
+    }
+
+  while (count > 0)
+    {
+    count--;
+    if (save)
+      {
+      if (tmp1next)
+        {
+        if (!tmp1empty)
+          {
+          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
+          stackptr += sizeof(sljit_sw);
+          }
+        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
+        tmp1empty = FALSE;
+        tmp1next = FALSE;
+        }
+      else
+        {
+        if (!tmp2empty)
+          {
+          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
+          stackptr += sizeof(sljit_sw);
+          }
+        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
+        tmp2empty = FALSE;
+        tmp1next = TRUE;
+        }
+      }
+    else
+      {
+      if (tmp1next)
+        {
+        SLJIT_ASSERT(!tmp1empty);
+        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP1, 0);
+        tmp1empty = stackptr >= stacktop;
+        if (!tmp1empty)
+          {
+          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
+          stackptr += sizeof(sljit_sw);
+          }
+        tmp1next = FALSE;
+        }
+      else
+        {
+        SLJIT_ASSERT(!tmp2empty);
+        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP2, 0);
+        tmp2empty = stackptr >= stacktop;
+        if (!tmp2empty)
+          {
+          OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
+          stackptr += sizeof(sljit_sw);
+          }
+        tmp1next = TRUE;
+        }
+      }
+    }
+  }
+while (status != end);
+
+if (save)
+  {
+  if (tmp1next)
+    {
+    if (!tmp1empty)
+      {
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
+      stackptr += sizeof(sljit_sw);
+      }
+    if (!tmp2empty)
+      {
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
+      stackptr += sizeof(sljit_sw);
+      }
+    }
+  else
+    {
+    if (!tmp2empty)
+      {
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
+      stackptr += sizeof(sljit_sw);
+      }
+    if (!tmp1empty)
+      {
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
+      stackptr += sizeof(sljit_sw);
+      }
+    }
+  }
+SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
+}
+
+static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
+{
+pcre_uchar *end = bracketend(cc);
+BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
+
+/* Assert captures then. */
+if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
+  current_offset = NULL;
+/* Conditional block does not. */
+if (*cc == OP_COND || *cc == OP_SCOND)
+  has_alternatives = FALSE;
+
+cc = next_opcode(common, cc);
+if (has_alternatives)
+  current_offset = common->then_offsets + (cc - common->start);
+
+while (cc < end)
+  {
+  if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
+    cc = set_then_offsets(common, cc, current_offset);
+  else
+    {
+    if (*cc == OP_ALT && has_alternatives)
+      current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
+    if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
+      *current_offset = 1;
+    cc = next_opcode(common, cc);
+    }
+  }
+
+return end;
+}
+
+#undef CASE_ITERATOR_PRIVATE_DATA_1
+#undef CASE_ITERATOR_PRIVATE_DATA_2A
+#undef CASE_ITERATOR_PRIVATE_DATA_2B
+#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
+#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
+#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
+
+static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
+{
+return (value & (value - 1)) == 0;
+}
+
+static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
+{
+while (list)
+  {
+  /* sljit_set_label is clever enough to do nothing
+  if either the jump or the label is NULL. */
+  SET_LABEL(list->jump, label);
+  list = list->next;
+  }
+}
+
+static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
+{
+jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
+if (list_item)
+  {
+  list_item->next = *list;
+  list_item->jump = jump;
+  *list = list_item;
+  }
+}
+
+static void add_stub(compiler_common *common, struct sljit_jump *start)
+{
+DEFINE_COMPILER;
+stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
+
+if (list_item)
+  {
+  list_item->start = start;
+  list_item->quit = LABEL();
+  list_item->next = common->stubs;
+  common->stubs = list_item;
+  }
+}
+
+static void flush_stubs(compiler_common *common)
+{
+DEFINE_COMPILER;
+stub_list *list_item = common->stubs;
+
+while (list_item)
+  {
+  JUMPHERE(list_item->start);
+  add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
+  JUMPTO(SLJIT_JUMP, list_item->quit);
+  list_item = list_item->next;
+  }
+common->stubs = NULL;
+}
+
+static void add_label_addr(compiler_common *common, sljit_uw *update_addr)
+{
+DEFINE_COMPILER;
+label_addr_list *label_addr;
+
+label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
+if (label_addr == NULL)
+  return;
+label_addr->label = LABEL();
+label_addr->update_addr = update_addr;
+label_addr->next = common->label_addrs;
+common->label_addrs = label_addr;
+}
+
+static SLJIT_INLINE void count_match(compiler_common *common)
+{
+DEFINE_COMPILER;
+
+OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
+add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
+}
+
+static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
+{
+/* May destroy all locals and registers except TMP2. */
+DEFINE_COMPILER;
+
+OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
+#ifdef DESTROY_REGISTERS
+OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
+OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
+OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
+#endif
+add_stub(common, CMP(SLJIT_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
+}
+
+static SLJIT_INLINE void free_stack(compiler_common *common, int size)
+{
+DEFINE_COMPILER;
+OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
+}
+
+static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
+{
+DEFINE_COMPILER;
+sljit_uw *result;
+
+if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
+  return NULL;
+
+result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
+if (SLJIT_UNLIKELY(result == NULL))
+  {
+  sljit_set_compiler_memory_error(compiler);
+  return NULL;
+  }
+
+*(void**)result = common->read_only_data_head;
+common->read_only_data_head = (void *)result;
+return result + 1;
+}
+
+static void free_read_only_data(void *current, void *allocator_data)
+{
+void *next;
+
+SLJIT_UNUSED_ARG(allocator_data);
+
+while (current != NULL)
+  {
+  next = *(void**)current;
+  SLJIT_FREE(current, allocator_data);
+  current = next;
+  }
+}
+
+static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
+{
+DEFINE_COMPILER;
+struct sljit_label *loop;
+int i;
+
+/* At this point we can freely use all temporary registers. */
+SLJIT_ASSERT(length > 1);
+/* TMP1 returns with begin - 1. */
+OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
+if (length < 8)
+  {
+  for (i = 1; i < length; i++)
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
+  }
+else
+  {
+  GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
+  OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
+  loop = LABEL();
+  OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_R0, 0);
+  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
+  JUMPTO(SLJIT_NOT_ZERO, loop);
+  }
+}
+
+static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
+{
+DEFINE_COMPILER;
+struct sljit_label *loop;
+int i;
+
+SLJIT_ASSERT(length > 1);
+/* OVECTOR(1) contains the "string begin - 1" constant. */
+if (length > 2)
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
+if (length < 8)
+  {
+  for (i = 2; i < length; i++)
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
+  }
+else
+  {
+  GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
+  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
+  loop = LABEL();
+  OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
+  OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
+  JUMPTO(SLJIT_NOT_ZERO, loop);
+  }
+
+OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
+if (common->mark_ptr != 0)
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
+if (common->control_head_ptr != 0)
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
+OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
+OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
+OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
+}
+
+static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
+{
+while (current != NULL)
+  {
+  switch (current[-2])
+    {
+    case type_then_trap:
+    break;
+
+    case type_mark:
+    if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
+      return current[-4];
+    break;
+
+    default:
+    SLJIT_ASSERT_STOP();
+    break;
+    }
+  current = (sljit_sw*)current[-1];
+  }
+return -1;
+}
+
+static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
+{
+DEFINE_COMPILER;
+struct sljit_label *loop;
+struct sljit_jump *early_quit;
+
+/* At this point we can freely use all registers. */
+OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
+
+OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
+if (common->mark_ptr != 0)
+  OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
+OP1(SLJIT_MOV_SI, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offset_count));
+if (common->mark_ptr != 0)
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
+OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
+OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin));
+GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START);
+/* Unlikely, but possible */
+early_quit = CMP(SLJIT_EQUAL, SLJIT_R1, 0, SLJIT_IMM, 0);
+loop = LABEL();
+OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0);
+OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
+/* Copy the integer value to the output buffer */
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
+#endif
+OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_R2), sizeof(int), SLJIT_S1, 0);
+OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
+JUMPTO(SLJIT_NOT_ZERO, loop);
+JUMPHERE(early_quit);
+
+/* Calculate the return value, which is the maximum ovector value. */
+if (topbracket > 1)
+  {
+  GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
+  OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
+
+  /* OVECTOR(0) is never equal to SLJIT_S2. */
+  loop = LABEL();
+  OP1(SLJIT_MOVU, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
+  OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
+  CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
+  OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
+  }
+else
+  OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
+}
+
+static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
+{
+DEFINE_COMPILER;
+struct sljit_jump *jump;
+
+SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S1, str_end_must_be_saved_reg2);
+SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
+  && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
+
+OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
+OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
+OP1(SLJIT_MOV_SI, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
+CMPTO(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 2, quit);
+
+/* Store match begin and end. */
+OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin));
+OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, offsets));
+
+jump = CMP(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 3);
+OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_S0, 0);
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
+#endif
+OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(int), SLJIT_R2, 0);
+JUMPHERE(jump);
+
+OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
+OP2(SLJIT_SUB, SLJIT_S1, 0, STR_END, 0, SLJIT_S0, 0);
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
+#endif
+OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), sizeof(int), SLJIT_S1, 0);
+
+OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S0, 0);
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
+#endif
+OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R2, 0);
+
+JUMPTO(SLJIT_JUMP, quit);
+}
+
+static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
+{
+/* May destroy TMP1. */
+DEFINE_COMPILER;
+struct sljit_jump *jump;
+
+if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
+  {
+  /* The value of -1 must be kept for start_used_ptr! */
+  OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
+  /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
+  is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
+  jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
+  JUMPHERE(jump);
+  }
+else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
+  {
+  jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
+  JUMPHERE(jump);
+  }
+}
+
+static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar *cc)
+{
+/* Detects if the character has an othercase. */
+unsigned int c;
+
+#ifdef SUPPORT_UTF
+if (common->utf)
+  {
+  GETCHAR(c, cc);
+  if (c > 127)
+    {
+#ifdef SUPPORT_UCP
+    return c != UCD_OTHERCASE(c);
+#else
+    return FALSE;
+#endif
+    }
+#ifndef COMPILE_PCRE8
+  return common->fcc[c] != c;
+#endif
+  }
+else
+#endif
+  c = *cc;
+return MAX_255(c) ? common->fcc[c] != c : FALSE;
+}
+
+static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
+{
+/* Returns with the othercase. */
+#ifdef SUPPORT_UTF
+if (common->utf && c > 127)
+  {
+#ifdef SUPPORT_UCP
+  return UCD_OTHERCASE(c);
+#else
+  return c;
+#endif
+  }
+#endif
+return TABLE_GET(c, common->fcc, c);
+}
+
+static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar *cc)
+{
+/* Detects if the character and its othercase has only 1 bit difference. */
+unsigned int c, oc, bit;
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+int n;
+#endif
+
+#ifdef SUPPORT_UTF
+if (common->utf)
+  {
+  GETCHAR(c, cc);
+  if (c <= 127)
+    oc = common->fcc[c];
+  else
+    {
+#ifdef SUPPORT_UCP
+    oc = UCD_OTHERCASE(c);
+#else
+    oc = c;
+#endif
+    }
+  }
+else
+  {
+  c = *cc;
+  oc = TABLE_GET(c, common->fcc, c);
+  }
+#else
+c = *cc;
+oc = TABLE_GET(c, common->fcc, c);
+#endif
+
+SLJIT_ASSERT(c != oc);
+
+bit = c ^ oc;
+/* Optimized for English alphabet. */
+if (c <= 127 && bit == 0x20)
+  return (0 << 8) | 0x20;
+
+/* Since c != oc, they must have at least 1 bit difference. */
+if (!is_powerof2(bit))
+  return 0;
+
+#if defined COMPILE_PCRE8
+
+#ifdef SUPPORT_UTF
+if (common->utf && c > 127)
+  {
+  n = GET_EXTRALEN(*cc);
+  while ((bit & 0x3f) == 0)
+    {
+    n--;
+    bit >>= 6;
+    }
+  return (n << 8) | bit;
+  }
+#endif /* SUPPORT_UTF */
+return (0 << 8) | bit;
+
+#elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+
+#ifdef SUPPORT_UTF
+if (common->utf && c > 65535)
+  {
+  if (bit >= (1 << 10))
+    bit >>= 10;
+  else
+    return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
+  }
+#endif /* SUPPORT_UTF */
+return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
+
+#endif /* COMPILE_PCRE[8|16|32] */
+}
+
+static void check_partial(compiler_common *common, BOOL force)
+{
+/* Checks whether a partial matching is occurred. Does not modify registers. */
+DEFINE_COMPILER;
+struct sljit_jump *jump = NULL;
+
+SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
+
+if (common->mode == JIT_COMPILE)
+  return;
+
+if (!force)
+  jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
+else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
+  jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
+
+if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
+else
+  {
+  if (common->partialmatchlabel != NULL)
+    JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
+  else
+    add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
+  }
+
+if (jump != NULL)
+  JUMPHERE(jump);
+}
+
+static void check_str_end(compiler_common *common, jump_list **end_reached)
+{
+/* Does not affect registers. Usually used in a tight spot. */
+DEFINE_COMPILER;
+struct sljit_jump *jump;
+
+if (common->mode == JIT_COMPILE)
+  {
+  add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
+  return;
+  }
+
+jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
+if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
+  {
+  add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
+  add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
+  }
+else
+  {
+  add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
+  if (common->partialmatchlabel != NULL)
+    JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
+  else
+    add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
+  }
+JUMPHERE(jump);
+}
+
+static void detect_partial_match(compiler_common *common, jump_list **backtracks)
+{
+DEFINE_COMPILER;
+struct sljit_jump *jump;
+
+if (common->mode == JIT_COMPILE)
+  {
+  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
+  return;
+  }
+
+/* Partial matching mode. */
+jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
+add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
+if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
+  {
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
+  add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
+  }
+else
+  {
+  if (common->partialmatchlabel != NULL)
+    JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
+  else
+    add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
+  }
+JUMPHERE(jump);
+}
+
+static void peek_char(compiler_common *common, pcre_uint32 max)
+{
+/* Reads the character into TMP1, keeps STR_PTR.
+Does not check STR_END. TMP2 Destroyed. */
+DEFINE_COMPILER;
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
+struct sljit_jump *jump;
+#endif
+
+SLJIT_UNUSED_ARG(max);
+
+OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+if (common->utf)
+  {
+  if (max < 128) return;
+
+  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
+  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+  add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
+  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
+  JUMPHERE(jump);
+  }
+#endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
+
+#if defined SUPPORT_UTF && defined COMPILE_PCRE16
+if (common->utf)
+  {
+  if (max < 0xd800) return;
+
+  OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
+  jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
+  /* TMP2 contains the high surrogate. */
+  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
+  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
+  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
+  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
+  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
+  JUMPHERE(jump);
+  }
+#endif
+}
+
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+
+static BOOL is_char7_bitset(const pcre_uint8 *bitset, BOOL nclass)
+{
+/* Tells whether the character codes below 128 are enough
+to determine a match. */
+const pcre_uint8 value = nclass ? 0xff : 0;
+const pcre_uint8 *end = bitset + 32;
+
+bitset += 16;
+do
+  {
+  if (*bitset++ != value)
+    return FALSE;
+  }
+while (bitset < end);
+return TRUE;
+}
+
+static void read_char7_type(compiler_common *common, BOOL full_read)
+{
+/* Reads the precise character type of a character into TMP1, if the character
+is less than 128. Otherwise it returns with zero. Does not check STR_END. The
+full_read argument tells whether characters above max are accepted or not. */
+DEFINE_COMPILER;
+struct sljit_jump *jump;
+
+SLJIT_ASSERT(common->utf);
+
+OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+
+OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
+
+if (full_read)
+  {
+  jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
+  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
+  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
+  JUMPHERE(jump);
+  }
+}
+
+#endif /* SUPPORT_UTF && COMPILE_PCRE8 */
+
+static void read_char_range(compiler_common *common, pcre_uint32 min, pcre_uint32 max, BOOL update_str_ptr)
+{
+/* Reads the precise value of a character into TMP1, if the character is
+between min and max (c >= min && c <= max). Otherwise it returns with a value
+outside the range. Does not check STR_END. */
+DEFINE_COMPILER;
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
+struct sljit_jump *jump;
+#endif
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+struct sljit_jump *jump2;
+#endif
+
+SLJIT_UNUSED_ARG(update_str_ptr);
+SLJIT_UNUSED_ARG(min);
+SLJIT_UNUSED_ARG(max);
+SLJIT_ASSERT(min <= max);
+
+OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+if (common->utf)
+  {
+  if (max < 128 && !update_str_ptr) return;
+
+  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
+  if (min >= 0x10000)
+    {
+    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
+    if (update_str_ptr)
+      OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
+    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
+    jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
+    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
+    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
+    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
+    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
+    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
+    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
+    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
+    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
+    if (!update_str_ptr)
+      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
+    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
+    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
+    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
+    JUMPHERE(jump2);
+    if (update_str_ptr)
+      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
+    }
+  else if (min >= 0x800 && max <= 0xffff)
+    {
+    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
+    if (update_str_ptr)
+      OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
+    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
+    jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
+    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
+    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
+    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
+    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
+    if (!update_str_ptr)
+      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
+    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
+    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
+    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
+    JUMPHERE(jump2);
+    if (update_str_ptr)
+      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
+    }
+  else if (max >= 0x800)
+    add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
+  else if (max < 128)
+    {
+    OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
+    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
+    }
+  else
+    {
+    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
+    if (!update_str_ptr)
+      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+    else
+      OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
+    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
+    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
+    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
+    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
+    if (update_str_ptr)
+      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
+    }
+  JUMPHERE(jump);
+  }
+#endif
+
+#if defined SUPPORT_UTF && defined COMPILE_PCRE16
+if (common->utf)
+  {
+  if (max >= 0x10000)
+    {
+    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
+    jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
+    /* TMP2 contains the high surrogate. */
+    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
+    OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
+    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
+    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
+    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
+    JUMPHERE(jump);
+    return;
+    }
+
+  if (max < 0xd800 && !update_str_ptr) return;
+
+  /* Skip low surrogate if necessary. */
+  OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
+  jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
+  if (update_str_ptr)
+    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+  if (max >= 0xd800)
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
+  JUMPHERE(jump);
+  }
+#endif
+}
+
+static SLJIT_INLINE void read_char(compiler_common *common)
+{
+read_char_range(common, 0, READ_CHAR_MAX, TRUE);
+}
+
+static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
+{
+/* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
+DEFINE_COMPILER;
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+struct sljit_jump *jump;
+#endif
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+struct sljit_jump *jump2;
+#endif
+
+SLJIT_UNUSED_ARG(update_str_ptr);
+
+OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+if (common->utf)
+  {
+  /* This can be an extra read in some situations, but hopefully
+  it is needed in most cases. */
+  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
+  jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
+  if (!update_str_ptr)
+    {
+    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
+    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
+    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
+    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
+    OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
+    jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
+    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
+    JUMPHERE(jump2);
+    }
+  else
+    add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
+  JUMPHERE(jump);
+  return;
+  }
+#endif /* SUPPORT_UTF && COMPILE_PCRE8 */
+
+#if !defined COMPILE_PCRE8
+/* The ctypes array contains only 256 values. */
+OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
+jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
+#endif
+OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
+#if !defined COMPILE_PCRE8
+JUMPHERE(jump);
+#endif
+
+#if defined SUPPORT_UTF && defined COMPILE_PCRE16
+if (common->utf && update_str_ptr)
+  {
+  /* Skip low surrogate if necessary. */
+  OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
+  jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
+  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+  JUMPHERE(jump);
+  }
+#endif /* SUPPORT_UTF && COMPILE_PCRE16 */
+}
+
+static void skip_char_back(compiler_common *common)
+{
+/* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
+DEFINE_COMPILER;
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
+#if defined COMPILE_PCRE8
+struct sljit_label *label;
+
+if (common->utf)
+  {
+  label = LABEL();
+  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
+  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
+  CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
+  return;
+  }
+#elif defined COMPILE_PCRE16
+if (common->utf)
+  {
+  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
+  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+  /* Skip low surrogate if necessary. */
+  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
+  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
+  OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
+  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
+  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
+  return;
+  }
+#endif /* COMPILE_PCRE[8|16] */
+#endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
+OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+}
+
+static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
+{
+/* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
+DEFINE_COMPILER;
+struct sljit_jump *jump;
+
+if (nltype == NLTYPE_ANY)
+  {
+  add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
+  add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
+  }
+else if (nltype == NLTYPE_ANYCRLF)
+  {
+  if (jumpifmatch)
+    {
+    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
+    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
+    }
+  else
+    {
+    jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
+    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
+    JUMPHERE(jump);
+    }
+  }
+else
+  {
+  SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
+  add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
+  }
+}
+
+#ifdef SUPPORT_UTF
+
+#if defined COMPILE_PCRE8
+static void do_utfreadchar(compiler_common *common)
+{
+/* Fast decoding a UTF-8 character. TMP1 contains the first byte
+of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
+DEFINE_COMPILER;
+struct sljit_jump *jump;
+
+sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
+OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
+OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
+OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
+OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
+OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
+
+/* Searching for the first zero. */
+OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
+jump = JUMP(SLJIT_NOT_ZERO);
+/* Two byte sequence. */
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
+sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+
+JUMPHERE(jump);
+OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
+OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
+OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
+OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
+OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
+
+OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
+jump = JUMP(SLJIT_NOT_ZERO);
+/* Three byte sequence. */
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
+OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
+sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+
+/* Four byte sequence. */
+JUMPHERE(jump);
+OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
+OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
+OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
+OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
+OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
+OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
+sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+}
+
+static void do_utfreadchar16(compiler_common *common)
+{
+/* Fast decoding a UTF-8 character. TMP1 contains the first byte
+of the character (>= 0xc0). Return value in TMP1. */
+DEFINE_COMPILER;
+struct sljit_jump *jump;
+
+sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
+OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
+OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
+OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
+OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
+OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
+
+/* Searching for the first zero. */
+OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
+jump = JUMP(SLJIT_NOT_ZERO);
+/* Two byte sequence. */
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+
+JUMPHERE(jump);
+OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
+OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_NOT_ZERO);
+/* This code runs only in 8 bit mode. No need to shift the value. */
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
+OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
+OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
+OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
+OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
+OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
+/* Three byte sequence. */
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
+sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+}
+
+static void do_utfreadtype8(compiler_common *common)
+{
+/* Fast decoding a UTF-8 character type. TMP2 contains the first byte
+of the character (>= 0xc0). Return value in TMP1. */
+DEFINE_COMPILER;
+struct sljit_jump *jump;
+struct sljit_jump *compare;
+
+sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
+
+OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
+jump = JUMP(SLJIT_NOT_ZERO);
+/* Two byte sequence. */
+OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
+/* The upper 5 bits are known at this point. */
+compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
+OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
+OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
+OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
+OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
+sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+
+JUMPHERE(compare);
+OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
+sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+
+/* We only have types for characters less than 256. */
+JUMPHERE(jump);
+OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
+OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
+sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+}
+
+#endif /* COMPILE_PCRE8 */
+
+#endif /* SUPPORT_UTF */
+
+#ifdef SUPPORT_UCP
+
+/* UCD_BLOCK_SIZE must be 128 (see the assert below). */
+#define UCD_BLOCK_MASK 127
+#define UCD_BLOCK_SHIFT 7
+
+static void do_getucd(compiler_common *common)
+{
+/* Search the UCD record for the character comes in TMP1.
+Returns chartype in TMP1 and UCD offset in TMP2. */
+DEFINE_COMPILER;
+
+SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
+
+sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
+OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
+OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
+OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
+OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
+OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
+OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
+OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
+OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
+OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
+sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+}
+#endif
+
+static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
+{
+DEFINE_COMPILER;
+struct sljit_label *mainloop;
+struct sljit_label *newlinelabel = NULL;
+struct sljit_jump *start;
+struct sljit_jump *end = NULL;
+struct sljit_jump *nl = NULL;
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
+struct sljit_jump *singlechar;
+#endif
+jump_list *newline = NULL;
+BOOL newlinecheck = FALSE;
+BOOL readuchar = FALSE;
+
+if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
+    common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
+  newlinecheck = TRUE;
+
+if (firstline)
+  {
+  /* Search for the end of the first line. */
+  SLJIT_ASSERT(common->first_line_end != 0);
+  OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
+
+  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
+    {
+    mainloop = LABEL();
+    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+    end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
+    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
+    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
+    CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
+    CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
+    JUMPHERE(end);
+    OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+    }
+  else
+    {
+    end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
+    mainloop = LABEL();
+    /* Continual stores does not cause data dependency. */
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
+    read_char_range(common, common->nlmin, common->nlmax, TRUE);
+    check_newlinechar(common, common->nltype, &newline, TRUE);
+    CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
+    JUMPHERE(end);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
+    set_jumps(newline, LABEL());
+    }
+
+  OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
+  }
+
+start = JUMP(SLJIT_JUMP);
+
+if (newlinecheck)
+  {
+  newlinelabel = LABEL();
+  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+  end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
+  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
+  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
+  OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
+#endif
+  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
+  nl = JUMP(SLJIT_JUMP);
+  }
+
+mainloop = LABEL();
+
+/* Increasing the STR_PTR here requires one less jump in the most common case. */
+#ifdef SUPPORT_UTF
+if (common->utf) readuchar = TRUE;
+#endif
+if (newlinecheck) readuchar = TRUE;
+
+if (readuchar)
+  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
+
+if (newlinecheck)
+  CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
+
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
+#if defined COMPILE_PCRE8
+if (common->utf)
+  {
+  singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
+  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
+  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
+  JUMPHERE(singlechar);
+  }
+#elif defined COMPILE_PCRE16
+if (common->utf)
+  {
+  singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
+  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
+  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
+  OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
+  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
+  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
+  JUMPHERE(singlechar);
+  }
+#endif /* COMPILE_PCRE[8|16] */
+#endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
+JUMPHERE(start);
+
+if (newlinecheck)
+  {
+  JUMPHERE(end);
+  JUMPHERE(nl);
+  }
+
+return mainloop;
+}
+
+#define MAX_N_CHARS 16
+#define MAX_N_BYTES 8
+
+static SLJIT_INLINE void add_prefix_byte(pcre_uint8 byte, pcre_uint8 *bytes)
+{
+pcre_uint8 len = bytes[0];
+int i;
+
+if (len == 255)
+  return;
+
+if (len == 0)
+  {
+  bytes[0] = 1;
+  bytes[1] = byte;
+  return;
+  }
+
+for (i = len; i > 0; i--)
+  if (bytes[i] == byte)
+    return;
+
+if (len >= MAX_N_BYTES - 1)
+  {
+  bytes[0] = 255;
+  return;
+  }
+
+len++;
+bytes[len] = byte;
+bytes[0] = len;
+}
+
+static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, pcre_uint8 *bytes, int max_chars)
+{
+/* Recursive function, which scans prefix literals. */
+BOOL last, any, caseless;
+int len, repeat, len_save, consumed = 0;
+pcre_uint32 chr, mask;
+pcre_uchar *alternative, *cc_save, *oc;
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+pcre_uchar othercase[8];
+#elif defined SUPPORT_UTF && defined COMPILE_PCRE16
+pcre_uchar othercase[2];
+#else
+pcre_uchar othercase[1];
+#endif
+
+repeat = 1;
+while (TRUE)
+  {
+  last = TRUE;
+  any = FALSE;
+  caseless = FALSE;
+  switch (*cc)
+    {
+    case OP_CHARI:
+    caseless = TRUE;
+    case OP_CHAR:
+    last = FALSE;
+    cc++;
+    break;
+
+    case OP_SOD:
+    case OP_SOM:
+    case OP_SET_SOM:
+    case OP_NOT_WORD_BOUNDARY:
+    case OP_WORD_BOUNDARY:
+    case OP_EODN:
+    case OP_EOD:
+    case OP_CIRC:
+    case OP_CIRCM:
+    case OP_DOLL:
+    case OP_DOLLM:
+    /* Zero width assertions. */
+    cc++;
+    continue;
+
+    case OP_ASSERT:
+    case OP_ASSERT_NOT:
+    case OP_ASSERTBACK:
+    case OP_ASSERTBACK_NOT:
+    cc = bracketend(cc);
+    continue;
+
+    case OP_PLUSI:
+    case OP_MINPLUSI:
+    case OP_POSPLUSI:
+    caseless = TRUE;
+    case OP_PLUS:
+    case OP_MINPLUS:
+    case OP_POSPLUS:
+    cc++;
+    break;
+
+    case OP_EXACTI:
+    caseless = TRUE;
+    case OP_EXACT:
+    repeat = GET2(cc, 1);
+    last = FALSE;
+    cc += 1 + IMM2_SIZE;
+    break;
+
+    case OP_QUERYI:
+    case OP_MINQUERYI:
+    case OP_POSQUERYI:
+    caseless = TRUE;
+    case OP_QUERY:
+    case OP_MINQUERY:
+    case OP_POSQUERY:
+    len = 1;
+    cc++;
+#ifdef SUPPORT_UTF
+    if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
+#endif
+    max_chars = scan_prefix(common, cc + len, chars, bytes, max_chars);
+    if (max_chars == 0)
+      return consumed;
+    last = FALSE;
+    break;
+
+    case OP_KET:
+    cc += 1 + LINK_SIZE;
+    continue;
+
+    case OP_ALT:
+    cc += GET(cc, 1);
+    continue;
+
+    case OP_ONCE:
+    case OP_ONCE_NC:
+    case OP_BRA:
+    case OP_BRAPOS:
+    case OP_CBRA:
+    case OP_CBRAPOS:
+    alternative = cc + GET(cc, 1);
+    while (*alternative == OP_ALT)
+      {
+      max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, bytes, max_chars);
+      if (max_chars == 0)
+        return consumed;
+      alternative += GET(alternative, 1);
+      }
+
+    if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
+      cc += IMM2_SIZE;
+    cc += 1 + LINK_SIZE;
+    continue;
+
+    case OP_CLASS:
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+    if (common->utf && !is_char7_bitset((const pcre_uint8 *)(cc + 1), FALSE)) return consumed;
+#endif
+    any = TRUE;
+    cc += 1 + 32 / sizeof(pcre_uchar);
+    break;
+
+    case OP_NCLASS:
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
+    if (common->utf) return consumed;
+#endif
+    any = TRUE;
+    cc += 1 + 32 / sizeof(pcre_uchar);
+    break;
+
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+    case OP_XCLASS:
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
+    if (common->utf) return consumed;
+#endif
+    any = TRUE;
+    cc += GET(cc, 1);
+    break;
+#endif
+
+    case OP_DIGIT:
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+    if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
+      return consumed;
+#endif
+    any = TRUE;
+    cc++;
+    break;
+
+    case OP_WHITESPACE:
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+    if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_space, FALSE))
+      return consumed;
+#endif
+    any = TRUE;
+    cc++;
+    break;
+
+    case OP_WORDCHAR:
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+    if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_word, FALSE))
+      return consumed;
+#endif
+    any = TRUE;
+    cc++;
+    break;
+
+    case OP_NOT:
+    case OP_NOTI:
+    cc++;
+    /* Fall through. */
+    case OP_NOT_DIGIT:
+    case OP_NOT_WHITESPACE:
+    case OP_NOT_WORDCHAR:
+    case OP_ANY:
+    case OP_ALLANY:
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
+    if (common->utf) return consumed;
+#endif
+    any = TRUE;
+    cc++;
+    break;
+
+#ifdef SUPPORT_UCP
+    case OP_NOTPROP:
+    case OP_PROP:
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
+    if (common->utf) return consumed;
+#endif
+    any = TRUE;
+    cc += 1 + 2;
+    break;
+#endif
+
+    case OP_TYPEEXACT:
+    repeat = GET2(cc, 1);
+    cc += 1 + IMM2_SIZE;
+    continue;
+
+    case OP_NOTEXACT:
+    case OP_NOTEXACTI:
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
+    if (common->utf) return consumed;
+#endif
+    any = TRUE;
+    repeat = GET2(cc, 1);
+    cc += 1 + IMM2_SIZE + 1;
+    break;
+
+    default:
+    return consumed;
+    }
+
+  if (any)
+    {
+#if defined COMPILE_PCRE8
+    mask = 0xff;
+#elif defined COMPILE_PCRE16
+    mask = 0xffff;
+#elif defined COMPILE_PCRE32
+    mask = 0xffffffff;
+#else
+    SLJIT_ASSERT_STOP();
+#endif
+
+    do
+      {
+      chars[0] = mask;
+      chars[1] = mask;
+      bytes[0] = 255;
+
+      consumed++;
+      if (--max_chars == 0)
+        return consumed;
+      chars += 2;
+      bytes += MAX_N_BYTES;
+      }
+    while (--repeat > 0);
+
+    repeat = 1;
+    continue;
+    }
+
+  len = 1;
+#ifdef SUPPORT_UTF
+  if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
+#endif
+
+  if (caseless && char_has_othercase(common, cc))
+    {
+#ifdef SUPPORT_UTF
+    if (common->utf)
+      {
+      GETCHAR(chr, cc);
+      if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
+        return consumed;
+      }
+    else
+#endif
+      {
+      chr = *cc;
+      othercase[0] = TABLE_GET(chr, common->fcc, chr);
+      }
+    }
+  else
+    caseless = FALSE;
+
+  len_save = len;
+  cc_save = cc;
+  while (TRUE)
+    {
+    oc = othercase;
+    do
+      {
+      chr = *cc;
+#ifdef COMPILE_PCRE32
+      if (SLJIT_UNLIKELY(chr == NOTACHAR))
+        return consumed;
+#endif
+      add_prefix_byte((pcre_uint8)chr, bytes);
+
+      mask = 0;
+      if (caseless)
+        {
+        add_prefix_byte((pcre_uint8)*oc, bytes);
+        mask = *cc ^ *oc;
+        chr |= mask;
+        }
+
+#ifdef COMPILE_PCRE32
+      if (chars[0] == NOTACHAR && chars[1] == 0)
+#else
+      if (chars[0] == NOTACHAR)
+#endif
+        {
+        chars[0] = chr;
+        chars[1] = mask;
+        }
+      else
+        {
+        mask |= chars[0] ^ chr;
+        chr |= mask;
+        chars[0] = chr;
+        chars[1] |= mask;
+        }
+
+      len--;
+      consumed++;
+      if (--max_chars == 0)
+        return consumed;
+      chars += 2;
+      bytes += MAX_N_BYTES;
+      cc++;
+      oc++;
+      }
+    while (len > 0);
+
+    if (--repeat == 0)
+      break;
+
+    len = len_save;
+    cc = cc_save;
+    }
+
+  repeat = 1;
+  if (last)
+    return consumed;
+  }
+}
+
+static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
+{
+DEFINE_COMPILER;
+struct sljit_label *start;
+struct sljit_jump *quit;
+pcre_uint32 chars[MAX_N_CHARS * 2];
+pcre_uint8 bytes[MAX_N_CHARS * MAX_N_BYTES];
+pcre_uint8 ones[MAX_N_CHARS];
+int offsets[3];
+pcre_uint32 mask;
+pcre_uint8 *byte_set, *byte_set_end;
+int i, max, from;
+int range_right = -1, range_len = 3 - 1;
+sljit_ub *update_table = NULL;
+BOOL in_range;
+
+for (i = 0; i < MAX_N_CHARS; i++)
+  {
+  chars[i << 1] = NOTACHAR;
+  chars[(i << 1) + 1] = 0;
+  bytes[i * MAX_N_BYTES] = 0;
+  }
+
+max = scan_prefix(common, common->start, chars, bytes, MAX_N_CHARS);
+
+if (max <= 1)
+  return FALSE;
+
+for (i = 0; i < max; i++)
+  {
+  mask = chars[(i << 1) + 1];
+  ones[i] = ones_in_half_byte[mask & 0xf];
+  mask >>= 4;
+  while (mask != 0)
+    {
+    ones[i] += ones_in_half_byte[mask & 0xf];
+    mask >>= 4;
+    }
+  }
+
+in_range = FALSE;
+from = 0;   /* Prevent compiler "uninitialized" warning */
+for (i = 0; i <= max; i++)
+  {
+  if (in_range && (i - from) > range_len && (bytes[(i - 1) * MAX_N_BYTES] <= 4))
+    {
+    range_len = i - from;
+    range_right = i - 1;
+    }
+
+  if (i < max && bytes[i * MAX_N_BYTES] < 255)
+    {
+    if (!in_range)
+      {
+      in_range = TRUE;
+      from = i;
+      }
+    }
+  else if (in_range)
+    in_range = FALSE;
+  }
+
+if (range_right >= 0)
+  {
+  update_table = (sljit_ub *)allocate_read_only_data(common, 256);
+  if (update_table == NULL)
+    return TRUE;
+  memset(update_table, IN_UCHARS(range_len), 256);
+
+  for (i = 0; i < range_len; i++)
+    {
+    byte_set = bytes + ((range_right - i) * MAX_N_BYTES);
+    SLJIT_ASSERT(byte_set[0] > 0 && byte_set[0] < 255);
+    byte_set_end = byte_set + byte_set[0];
+    byte_set++;
+    while (byte_set <= byte_set_end)
+      {
+      if (update_table[*byte_set] > IN_UCHARS(i))
+        update_table[*byte_set] = IN_UCHARS(i);
+      byte_set++;
+      }
+    }
+  }
+
+offsets[0] = -1;
+/* Scan forward. */
+for (i = 0; i < max; i++)
+  if (ones[i] <= 2) {
+    offsets[0] = i;
+    break;
+  }
+
+if (offsets[0] < 0 && range_right < 0)
+  return FALSE;
+
+if (offsets[0] >= 0)
+  {
+  /* Scan backward. */
+  offsets[1] = -1;
+  for (i = max - 1; i > offsets[0]; i--)
+    if (ones[i] <= 2 && i != range_right)
+      {
+      offsets[1] = i;
+      break;
+      }
+
+  /* This case is handled better by fast_forward_first_char. */
+  if (offsets[1] == -1 && offsets[0] == 0 && range_right < 0)
+    return FALSE;
+
+  offsets[2] = -1;
+  /* We only search for a middle character if there is no range check. */
+  if (offsets[1] >= 0 && range_right == -1)
+    {
+    /* Scan from middle. */
+    for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++)
+      if (ones[i] <= 2)
+        {
+        offsets[2] = i;
+        break;
+        }
+
+    if (offsets[2] == -1)
+      {
+      for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--)
+        if (ones[i] <= 2)
+          {
+          offsets[2] = i;
+          break;
+          }
+      }
+    }
+
+  SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1]));
+  SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2]));
+
+  chars[0] = chars[offsets[0] << 1];
+  chars[1] = chars[(offsets[0] << 1) + 1];
+  if (offsets[2] >= 0)
+    {
+    chars[2] = chars[offsets[2] << 1];
+    chars[3] = chars[(offsets[2] << 1) + 1];
+    }
+  if (offsets[1] >= 0)
+    {
+    chars[4] = chars[offsets[1] << 1];
+    chars[5] = chars[(offsets[1] << 1) + 1];
+    }
+  }
+
+max -= 1;
+if (firstline)
+  {
+  SLJIT_ASSERT(common->first_line_end != 0);
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
+  OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
+  OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
+  quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP1, 0);
+  OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
+  JUMPHERE(quit);
+  }
+else
+  OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
+
+#if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+if (range_right >= 0)
+  OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
+#endif
+
+start = LABEL();
+quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
+
+SLJIT_ASSERT(range_right >= 0 || offsets[0] >= 0);
+
+if (range_right >= 0)
+  {
+#if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
+  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
+#else
+  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
+#endif
+
+#if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
+#else
+  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
+#endif
+  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
+  CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
+  }
+
+if (offsets[0] >= 0)
+  {
+  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0]));
+  if (offsets[1] >= 0)
+    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1]));
+  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+
+  if (chars[1] != 0)
+    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
+  CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
+  if (offsets[2] >= 0)
+    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1));
+
+  if (offsets[1] >= 0)
+    {
+    if (chars[5] != 0)
+      OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]);
+    CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start);
+    }
+
+  if (offsets[2] >= 0)
+    {
+    if (chars[3] != 0)
+      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]);
+    CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start);
+    }
+  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+  }
+
+JUMPHERE(quit);
+
+if (firstline)
+  {
+  if (range_right >= 0)
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
+  OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
+  if (range_right >= 0)
+    {
+    quit = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
+    OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
+    JUMPHERE(quit);
+    }
+  }
+else
+  OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
+return TRUE;
+}
+
+#undef MAX_N_CHARS
+#undef MAX_N_BYTES
+
+static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
+{
+DEFINE_COMPILER;
+struct sljit_label *start;
+struct sljit_jump *quit;
+struct sljit_jump *found;
+pcre_uchar oc, bit;
+
+if (firstline)
+  {
+  SLJIT_ASSERT(common->first_line_end != 0);
+  OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
+  OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
+  }
+
+start = LABEL();
+quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
+OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
+
+oc = first_char;
+if (caseless)
+  {
+  oc = TABLE_GET(first_char, common->fcc, first_char);
+#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
+  if (first_char > 127 && common->utf)
+    oc = UCD_OTHERCASE(first_char);
+#endif
+  }
+if (first_char == oc)
+  found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
+else
+  {
+  bit = first_char ^ oc;
+  if (is_powerof2(bit))
+    {
+    OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
+    found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
+    }
+  else
+    {
+    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
+    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
+    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
+    OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+    found = JUMP(SLJIT_NOT_ZERO);
+    }
+  }
+
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+JUMPTO(SLJIT_JUMP, start);
+JUMPHERE(found);
+JUMPHERE(quit);
+
+if (firstline)
+  OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
+}
+
+static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
+{
+DEFINE_COMPILER;
+struct sljit_label *loop;
+struct sljit_jump *lastchar;
+struct sljit_jump *firstchar;
+struct sljit_jump *quit;
+struct sljit_jump *foundcr = NULL;
+struct sljit_jump *notfoundnl;
+jump_list *newline = NULL;
+
+if (firstline)
+  {
+  SLJIT_ASSERT(common->first_line_end != 0);
+  OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
+  OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
+  }
+
+if (common->nltype == NLTYPE_FIXED && common->newline > 255)
+  {
+  lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
+  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
+  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
+  firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
+
+  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
+  OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
+  OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER_EQUAL);
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
+#endif
+  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
+
+  loop = LABEL();
+  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+  quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
+  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
+  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
+  CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
+  CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
+
+  JUMPHERE(quit);
+  JUMPHERE(firstchar);
+  JUMPHERE(lastchar);
+
+  if (firstline)
+    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
+  return;
+  }
+
+OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
+OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
+firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
+skip_char_back(common);
+
+loop = LABEL();
+common->ff_newline_shortcut = loop;
+
+read_char_range(common, common->nlmin, common->nlmax, TRUE);
+lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
+if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
+  foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
+check_newlinechar(common, common->nltype, &newline, FALSE);
+set_jumps(newline, loop);
+
+if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
+  {
+  quit = JUMP(SLJIT_JUMP);
+  JUMPHERE(foundcr);
+  notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
+  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
+  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
+  OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
+#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
+#endif
+  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
+  JUMPHERE(notfoundnl);
+  JUMPHERE(quit);
+  }
+JUMPHERE(lastchar);
+JUMPHERE(firstchar);
+
+if (firstline)
+  OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
+}
+
+static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
+
+static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, pcre_uint8 *start_bits, BOOL firstline)
+{
+DEFINE_COMPILER;
+struct sljit_label *start;
+struct sljit_jump *quit;
+struct sljit_jump *found = NULL;
+jump_list *matches = NULL;
+#ifndef COMPILE_PCRE8
+struct sljit_jump *jump;
+#endif
+
+if (firstline)
+  {
+  SLJIT_ASSERT(common->first_line_end != 0);
+  OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
+  OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
+  }
+
+start = LABEL();
+quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
+OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
+#ifdef SUPPORT_UTF
+if (common->utf)
+  OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
+#endif
+
+if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
+  {
+#ifndef COMPILE_PCRE8
+  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 255);
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
+  JUMPHERE(jump);
+#endif
+  OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
+  OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
+  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
+  OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
+  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
+  found = JUMP(SLJIT_NOT_ZERO);
+  }
+
+#ifdef SUPPORT_UTF
+if (common->utf)
+  OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
+#endif
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+#ifdef SUPPORT_UTF
+#if defined COMPILE_PCRE8
+if (common->utf)
+  {
+  CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
+  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
+  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
+  }
+#elif defined COMPILE_PCRE16
+if (common->utf)
+  {
+  CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
+  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
+  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
+  OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
+  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
+  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
+  }
+#endif /* COMPILE_PCRE[8|16] */
+#endif /* SUPPORT_UTF */
+JUMPTO(SLJIT_JUMP, start);
+if (found != NULL)
+  JUMPHERE(found);
+if (matches != NULL)
+  set_jumps(matches, LABEL());
+JUMPHERE(quit);
+
+if (firstline)
+  OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
+}
+
+static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
+{
+DEFINE_COMPILER;
+struct sljit_label *loop;
+struct sljit_jump *toolong;
+struct sljit_jump *alreadyfound;
+struct sljit_jump *found;
+struct sljit_jump *foundoc = NULL;
+struct sljit_jump *notfound;
+pcre_uint32 oc, bit;
+
+SLJIT_ASSERT(common->req_char_ptr != 0);
+OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
+OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
+toolong = CMP(SLJIT_LESS, TMP1, 0, STR_END, 0);
+alreadyfound = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
+
+if (has_firstchar)
+  OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+else
+  OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
+
+loop = LABEL();
+notfound = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0);
+
+OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
+oc = req_char;
+if (caseless)
+  {
+  oc = TABLE_GET(req_char, common->fcc, req_char);
+#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
+  if (req_char > 127 && common->utf)
+    oc = UCD_OTHERCASE(req_char);
+#endif
+  }
+if (req_char == oc)
+  found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
+else
+  {
+  bit = req_char ^ oc;
+  if (is_powerof2(bit))
+    {
+    OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
+    found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
+    }
+  else
+    {
+    found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
+    foundoc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
+    }
+  }
+OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
+JUMPTO(SLJIT_JUMP, loop);
+
+JUMPHERE(found);
+if (foundoc)
+  JUMPHERE(foundoc);
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
+JUMPHERE(alreadyfound);
+JUMPHERE(toolong);
+return notfound;
+}
+
+static void do_revertframes(compiler_common *common)
+{
+DEFINE_COMPILER;
+struct sljit_jump *jump;
+struct sljit_label *mainloop;
+
+sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
+OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
+GET_LOCAL_BASE(TMP3, 0, 0);
+
+/* Drop frames until we reach STACK_TOP. */
+mainloop = LABEL();
+OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
+OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
+jump = JUMP(SLJIT_SIG_LESS_EQUAL);
+
+OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
+OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
+OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
+OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
+JUMPTO(SLJIT_JUMP, mainloop);
+
+JUMPHERE(jump);
+jump = JUMP(SLJIT_SIG_LESS);
+/* End of dropping frames. */
+sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+
+JUMPHERE(jump);
+OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
+OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
+OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
+OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
+JUMPTO(SLJIT_JUMP, mainloop);
+}
+
+static void check_wordboundary(compiler_common *common)
+{
+DEFINE_COMPILER;
+struct sljit_jump *skipread;
+jump_list *skipread_list = NULL;
+#if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
+struct sljit_jump *jump;
+#endif
+
+SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
+
+sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
+/* Get type of the previous char, and put it to LOCALS1. */
+OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
+OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, SLJIT_IMM, 0);
+skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
+skip_char_back(common);
+check_start_used_ptr(common);
+read_char(common);
+
+/* Testing char type. */
+#ifdef SUPPORT_UCP
+if (common->use_ucp)
+  {
+  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
+  jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
+  add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
+  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
+  OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
+  OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
+  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
+  OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
+  OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
+  JUMPHERE(jump);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
+  }
+else
+#endif
+  {
+#ifndef COMPILE_PCRE8
+  jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
+#elif defined SUPPORT_UTF
+  /* Here LOCALS1 has already been zeroed. */
+  jump = NULL;
+  if (common->utf)
+    jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
+#endif /* COMPILE_PCRE8 */
+  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
+  OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
+  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
+#ifndef COMPILE_PCRE8
+  JUMPHERE(jump);
+#elif defined SUPPORT_UTF
+  if (jump != NULL)
+    JUMPHERE(jump);
+#endif /* COMPILE_PCRE8 */
+  }
+JUMPHERE(skipread);
+
+OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
+check_str_end(common, &skipread_list);
+peek_char(common, READ_CHAR_MAX);
+
+/* Testing char type. This is a code duplication. */
+#ifdef SUPPORT_UCP
+if (common->use_ucp)
+  {
+  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
+  jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
+  add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
+  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
+  OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
+  OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
+  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
+  OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
+  OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
+  JUMPHERE(jump);
+  }
+else
+#endif
+  {
+#ifndef COMPILE_PCRE8
+  /* TMP2 may be destroyed by peek_char. */
+  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
+  jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
+#elif defined SUPPORT_UTF
+  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
+  jump = NULL;
+  if (common->utf)
+    jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
+#endif
+  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
+  OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
+  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
+#ifndef COMPILE_PCRE8
+  JUMPHERE(jump);
+#elif defined SUPPORT_UTF
+  if (jump != NULL)
+    JUMPHERE(jump);
+#endif /* COMPILE_PCRE8 */
+  }
+set_jumps(skipread_list, LABEL());
+
+OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
+sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
+}
+
+static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
+{
+DEFINE_COMPILER;
+int ranges[MAX_RANGE_SIZE];
+pcre_uint8 bit, cbit, all;
+int i, byte, length = 0;
+
+bit = bits[0] & 0x1;
+/* All bits will be zero or one (since bit is zero or one). */
+all = -bit;
+
+for (i = 0; i < 256; )
+  {
+  byte = i >> 3;
+  if ((i & 0x7) == 0 && bits[byte] == all)
+    i += 8;
+  else
+    {
+    cbit = (bits[byte] >> (i & 0x7)) & 0x1;
+    if (cbit != bit)
+      {
+      if (length >= MAX_RANGE_SIZE)
+        return FALSE;
+      ranges[length] = i;
+      length++;
+      bit = cbit;
+      all = -cbit;
+      }
+    i++;
+    }
+  }
+
+if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
+  {
+  if (length >= MAX_RANGE_SIZE)
+    return FALSE;
+  ranges[length] = 256;
+  length++;
+  }
+
+if (length < 0 || length > 4)
+  return FALSE;
+
+bit = bits[0] & 0x1;
+if (invert) bit ^= 0x1;
+
+/* No character is accepted. */
+if (length == 0 && bit == 0)
+  add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
+
+switch(length)
+  {
+  case 0:
+  /* When bit != 0, all characters are accepted. */
+  return TRUE;
+
+  case 1:
+  add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
+  return TRUE;
+
+  case 2:
+  if (ranges[0] + 1 != ranges[1])
+    {
+    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
+    add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
+    }
+  else
+    add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
+  return TRUE;
+
+  case 3:
+  if (bit != 0)
+    {
+    add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
+    if (ranges[0] + 1 != ranges[1])
+      {
+      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
+      add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
+      }
+    else
+      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
+    return TRUE;
+    }
+
+  add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
+  if (ranges[1] + 1 != ranges[2])
+    {
+    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
+    add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
+    }
+  else
+    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
+  return TRUE;
+
+  case 4:
+  if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
+      && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
+      && is_powerof2(ranges[2] - ranges[0]))
+    {
+    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
+    if (ranges[2] + 1 != ranges[3])
+      {
+      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
+      add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
+      }
+    else
+      add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
+    return TRUE;
+    }
+
+  if (bit != 0)
+    {
+    i = 0;
+    if (ranges[0] + 1 != ranges[1])
+      {
+      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
+      add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
+      i = ranges[0];
+      }
+    else
+      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
+
+    if (ranges[2] + 1 != ranges[3])
+      {
+      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
+      add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
+      }
+    else
+      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
+    return TRUE;
+    }
+
+  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
+  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
+  if (ranges[1] + 1 != ranges[2])
+    {
+    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
+    add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
+    }
+  else
+    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
+  return TRUE;
+
+  default:
+  SLJIT_ASSERT_STOP();
+  return FALSE;
+  }
+}
+
+static void check_anynewline(compiler_common *common)
+{
+/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
+DEFINE_COMPILER;
+
+sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
+
+OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
+OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
+OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
+OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
+#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+#ifdef COMPILE_PCRE8
+if (common->utf)
+  {
+#endif
+  OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
+  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
+#ifdef COMPILE_PCRE8
+  }
+#endif
+#endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
+OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+}
+
+static void check_hspace(compiler_common *common)
+{
+/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
+DEFINE_COMPILER;
+
+sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
+
+OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
+OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
+OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
+OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
+#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+#ifdef COMPILE_PCRE8
+if (common->utf)
+  {
+#endif
+  OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
+  OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
+  OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
+  OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
+  OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
+  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
+  OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
+  OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
+#ifdef COMPILE_PCRE8
+  }
+#endif
+#endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
+OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+
+sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+}
+
+static void check_vspace(compiler_common *common)
+{
+/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
+DEFINE_COMPILER;
+
+sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
+
+OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
+OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
+OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
+OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
+#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+#ifdef COMPILE_PCRE8
+if (common->utf)
+  {
+#endif
+  OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
+  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
+#ifdef COMPILE_PCRE8
+  }
+#endif
+#endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
+OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+
+sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+}
+
+#define CHAR1 STR_END
+#define CHAR2 STACK_TOP
+
+static void do_casefulcmp(compiler_common *common)
+{
+DEFINE_COMPILER;
+struct sljit_jump *jump;
+struct sljit_label *label;
+
+sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
+OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
+OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR2, 0);
+OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
+OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+
+label = LABEL();
+OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
+OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
+jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
+OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
+JUMPTO(SLJIT_NOT_ZERO, label);
+
+JUMPHERE(jump);
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
+OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
+sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+}
+
+#define LCC_TABLE STACK_LIMIT
+
+static void do_caselesscmp(compiler_common *common)
+{
+DEFINE_COMPILER;
+struct sljit_jump *jump;
+struct sljit_label *label;
+
+sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
+OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
+
+OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR1, 0);
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, CHAR2, 0);
+OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
+OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
+OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+
+label = LABEL();
+OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
+OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
+#ifndef COMPILE_PCRE8
+jump = CMP(SLJIT_GREATER, CHAR1, 0, SLJIT_IMM, 255);
+#endif
+OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
+#ifndef COMPILE_PCRE8
+JUMPHERE(jump);
+jump = CMP(SLJIT_GREATER, CHAR2, 0, SLJIT_IMM, 255);
+#endif
+OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
+#ifndef COMPILE_PCRE8
+JUMPHERE(jump);
+#endif
+jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
+OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
+JUMPTO(SLJIT_NOT_ZERO, label);
+
+JUMPHERE(jump);
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
+OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
+OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
+sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+}
+
+#undef LCC_TABLE
+#undef CHAR1
+#undef CHAR2
+
+#if defined SUPPORT_UTF && defined SUPPORT_UCP
+
+static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
+{
+/* This function would be ineffective to do in JIT level. */
+pcre_uint32 c1, c2;
+const pcre_uchar *src2 = args->uchar_ptr;
+const pcre_uchar *end2 = args->end;
+const ucd_record *ur;
+const pcre_uint32 *pp;
+
+while (src1 < end1)
+  {
+  if (src2 >= end2)
+    return (pcre_uchar*)1;
+  GETCHARINC(c1, src1);
+  GETCHARINC(c2, src2);
+  ur = GET_UCD(c2);
+  if (c1 != c2 && c1 != c2 + ur->other_case)
+    {
+    pp = PRIV(ucd_caseless_sets) + ur->caseset;
+    for (;;)
+      {
+      if (c1 < *pp) return NULL;
+      if (c1 == *pp++) break;
+      }
+    }
+  }
+return src2;
+}
+
+#endif /* SUPPORT_UTF && SUPPORT_UCP */
+
+static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
+    compare_context *context, jump_list **backtracks)
+{
+DEFINE_COMPILER;
+unsigned int othercasebit = 0;
+pcre_uchar *othercasechar = NULL;
+#ifdef SUPPORT_UTF
+int utflength;
+#endif
+
+if (caseless && char_has_othercase(common, cc))
+  {
+  othercasebit = char_get_othercase_bit(common, cc);
+  SLJIT_ASSERT(othercasebit);
+  /* Extracting bit difference info. */
+#if defined COMPILE_PCRE8
+  othercasechar = cc + (othercasebit >> 8);
+  othercasebit &= 0xff;
+#elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+  /* Note that this code only handles characters in the BMP. If there
+  ever are characters outside the BMP whose othercase differs in only one
+  bit from itself (there currently are none), this code will need to be
+  revised for COMPILE_PCRE32. */
+  othercasechar = cc + (othercasebit >> 9);
+  if ((othercasebit & 0x100) != 0)
+    othercasebit = (othercasebit & 0xff) << 8;
+  else
+    othercasebit &= 0xff;
+#endif /* COMPILE_PCRE[8|16|32] */
+  }
+
+if (context->sourcereg == -1)
+  {
+#if defined COMPILE_PCRE8
+#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
+  if (context->length >= 4)
+    OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
+  else if (context->length >= 2)
+    OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
+  else
+#endif
+    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
+#elif defined COMPILE_PCRE16
+#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
+  if (context->length >= 4)
+    OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
+  else
+#endif
+    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
+#elif defined COMPILE_PCRE32
+  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
+#endif /* COMPILE_PCRE[8|16|32] */
+  context->sourcereg = TMP2;
+  }
+
+#ifdef SUPPORT_UTF
+utflength = 1;
+if (common->utf && HAS_EXTRALEN(*cc))
+  utflength += GET_EXTRALEN(*cc);
+
+do
+  {
+#endif
+
+  context->length -= IN_UCHARS(1);
+#if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
+
+  /* Unaligned read is supported. */
+  if (othercasebit != 0 && othercasechar == cc)
+    {
+    context->c.asuchars[context->ucharptr] = *cc | othercasebit;
+    context->oc.asuchars[context->ucharptr] = othercasebit;
+    }
+  else
+    {
+    context->c.asuchars[context->ucharptr] = *cc;
+    context->oc.asuchars[context->ucharptr] = 0;
+    }
+  context->ucharptr++;
+
+#if defined COMPILE_PCRE8
+  if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
+#else
+  if (context->ucharptr >= 2 || context->length == 0)
+#endif
+    {
+    if (context->length >= 4)
+      OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
+    else if (context->length >= 2)
+      OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
+#if defined COMPILE_PCRE8
+    else if (context->length >= 1)
+      OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
+#endif /* COMPILE_PCRE8 */
+    context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
+
+    switch(context->ucharptr)
+      {
+      case 4 / sizeof(pcre_uchar):
+      if (context->oc.asint != 0)
+        OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
+      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
+      break;
+
+      case 2 / sizeof(pcre_uchar):
+      if (context->oc.asushort != 0)
+        OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
+      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
+      break;
+
+#ifdef COMPILE_PCRE8
+      case 1:
+      if (context->oc.asbyte != 0)
+        OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
+      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
+      break;
+#endif
+
+      default:
+      SLJIT_ASSERT_STOP();
+      break;
+      }
+    context->ucharptr = 0;
+    }
+
+#else
+
+  /* Unaligned read is unsupported or in 32 bit mode. */
+  if (context->length >= 1)
+    OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
+
+  context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
+
+  if (othercasebit != 0 && othercasechar == cc)
+    {
+    OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
+    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
+    }
+  else
+    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
+
+#endif
+
+  cc++;
+#ifdef SUPPORT_UTF
+  utflength--;
+  }
+while (utflength > 0);
+#endif
+
+return cc;
+}
+
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+
+#define SET_TYPE_OFFSET(value) \
+  if ((value) != typeoffset) \
+    { \
+    if ((value) < typeoffset) \
+      OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
+    else \
+      OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
+    } \
+  typeoffset = (value);
+
+#define SET_CHAR_OFFSET(value) \
+  if ((value) != charoffset) \
+    { \
+    if ((value) < charoffset) \
+      OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
+    else \
+      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
+    } \
+  charoffset = (value);
+
+static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
+{
+DEFINE_COMPILER;
+jump_list *found = NULL;
+jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
+sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
+struct sljit_jump *jump = NULL;
+pcre_uchar *ccbegin;
+int compares, invertcmp, numberofcmps;
+#if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
+BOOL utf = common->utf;
+#endif
+
+#ifdef SUPPORT_UCP
+BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
+BOOL charsaved = FALSE;
+int typereg = TMP1, scriptreg = TMP1;
+const pcre_uint32 *other_cases;
+sljit_uw typeoffset;
+#endif
+
+/* Scanning the necessary info. */
+cc++;
+ccbegin = cc;
+compares = 0;
+if (cc[-1] & XCL_MAP)
+  {
+  min = 0;
+  cc += 32 / sizeof(pcre_uchar);
+  }
+
+while (*cc != XCL_END)
+  {
+  compares++;
+  if (*cc == XCL_SINGLE)
+    {
+    cc ++;
+    GETCHARINCTEST(c, cc);
+    if (c > max) max = c;
+    if (c < min) min = c;
+#ifdef SUPPORT_UCP
+    needschar = TRUE;
+#endif
+    }
+  else if (*cc == XCL_RANGE)
+    {
+    cc ++;
+    GETCHARINCTEST(c, cc);
+    if (c < min) min = c;
+    GETCHARINCTEST(c, cc);
+    if (c > max) max = c;
+#ifdef SUPPORT_UCP
+    needschar = TRUE;
+#endif
+    }
+#ifdef SUPPORT_UCP
+  else
+    {
+    SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
+    cc++;
+    if (*cc == PT_CLIST)
+      {
+      other_cases = PRIV(ucd_caseless_sets) + cc[1];
+      while (*other_cases != NOTACHAR)
+        {
+        if (*other_cases > max) max = *other_cases;
+        if (*other_cases < min) min = *other_cases;
+        other_cases++;
+        }
+      }
+    else
+      {
+      max = READ_CHAR_MAX;
+      min = 0;
+      }
+
+    switch(*cc)
+      {
+      case PT_ANY:
+      break;
+
+      case PT_LAMP:
+      case PT_GC:
+      case PT_PC:
+      case PT_ALNUM:
+      needstype = TRUE;
+      break;
+
+      case PT_SC:
+      needsscript = TRUE;
+      break;
+
+      case PT_SPACE:
+      case PT_PXSPACE:
+      case PT_WORD:
+      case PT_PXGRAPH:
+      case PT_PXPRINT:
+      case PT_PXPUNCT:
+      needstype = TRUE;
+      needschar = TRUE;
+      break;
+
+      case PT_CLIST:
+      case PT_UCNC:
+      needschar = TRUE;
+      break;
+
+      default:
+      SLJIT_ASSERT_STOP();
+      break;
+      }
+    cc += 2;
+    }
+#endif
+  }
+
+/* We are not necessary in utf mode even in 8 bit mode. */
+cc = ccbegin;
+detect_partial_match(common, backtracks);
+read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
+
+if ((cc[-1] & XCL_HASPROP) == 0)
+  {
+  if ((cc[-1] & XCL_MAP) != 0)
+    {
+    jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
+    if (!check_class_ranges(common, (const pcre_uint8 *)cc, (((const pcre_uint8 *)cc)[31] & 0x80) != 0, TRUE, &found))
+      {
+      OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
+      OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
+      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
+      OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
+      OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
+      add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
+      }
+
+    add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
+    JUMPHERE(jump);
+
+    cc += 32 / sizeof(pcre_uchar);
+    }
+  else
+    {
+    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
+    add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
+    }
+  }
+else if ((cc[-1] & XCL_MAP) != 0)
+  {
+  OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
+#ifdef SUPPORT_UCP
+  charsaved = TRUE;
+#endif
+  if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
+    {
+#ifdef COMPILE_PCRE8
+    SLJIT_ASSERT(common->utf);
+#endif
+    jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
+
+    OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
+    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
+    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
+    OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
+    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
+    add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
+
+    JUMPHERE(jump);
+    }
+
+  OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
+  cc += 32 / sizeof(pcre_uchar);
+  }
+
+#ifdef SUPPORT_UCP
+/* Simple register allocation. TMP1 is preferred if possible. */
+if (needstype || needsscript)
+  {
+  if (needschar && !charsaved)
+    OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
+  add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
+  if (needschar)
+    {
+    if (needstype)
+      {
+      OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
+      typereg = RETURN_ADDR;
+      }
+
+    if (needsscript)
+      scriptreg = TMP3;
+    OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
+    }
+  else if (needstype && needsscript)
+    scriptreg = TMP3;
+  /* In all other cases only one of them was specified, and that can goes to TMP1. */
+
+  if (needsscript)
+    {
+    if (scriptreg == TMP1)
+      {
+      OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
+      OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
+      }
+    else
+      {
+      OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
+      OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
+      OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
+      }
+    }
+  }
+#endif
+
+/* Generating code. */
+charoffset = 0;
+numberofcmps = 0;
+#ifdef SUPPORT_UCP
+typeoffset = 0;
+#endif
+
+while (*cc != XCL_END)
+  {
+  compares--;
+  invertcmp = (compares == 0 && list != backtracks);
+  jump = NULL;
+
+  if (*cc == XCL_SINGLE)
+    {
+    cc ++;
+    GETCHARINCTEST(c, cc);
+
+    if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
+      {
+      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
+      OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_EQUAL);
+      numberofcmps++;
+      }
+    else if (numberofcmps > 0)
+      {
+      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
+      OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+      jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
+      numberofcmps = 0;
+      }
+    else
+      {
+      jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
+      numberofcmps = 0;
+      }
+    }
+  else if (*cc == XCL_RANGE)
+    {
+    cc ++;
+    GETCHARINCTEST(c, cc);
+    SET_CHAR_OFFSET(c);
+    GETCHARINCTEST(c, cc);
+
+    if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
+      {
+      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
+      OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
+      numberofcmps++;
+      }
+    else if (numberofcmps > 0)
+      {
+      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
+      OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
+      jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
+      numberofcmps = 0;
+      }
+    else
+      {
+      jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
+      numberofcmps = 0;
+      }
+    }
+#ifdef SUPPORT_UCP
+  else
+    {
+    if (*cc == XCL_NOTPROP)
+      invertcmp ^= 0x1;
+    cc++;
+    switch(*cc)
+      {
+      case PT_ANY:
+      if (list != backtracks)
+        {
+        if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
+          continue;
+        }
+      else if (cc[-1] == XCL_NOTPROP)
+        continue;
+      jump = JUMP(SLJIT_JUMP);
+      break;
+
+      case PT_LAMP:
+      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
+      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
+      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
+      OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
+      OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+      jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
+      break;
+
+      case PT_GC:
+      c = PRIV(ucp_typerange)[(int)cc[1] * 2];
+      SET_TYPE_OFFSET(c);
+      jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
+      break;
+
+      case PT_PC:
+      jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
+      break;
+
+      case PT_SC:
+      jump = CMP(SLJIT_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
+      break;
+
+      case PT_SPACE:
+      case PT_PXSPACE:
+      SET_CHAR_OFFSET(9);
+      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
+      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
+
+      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
+      OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+
+      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
+      OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+
+      SET_TYPE_OFFSET(ucp_Zl);
+      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
+      OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
+      jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
+      break;
+
+      case PT_WORD:
+      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
+      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
+      /* Fall through. */
+
+      case PT_ALNUM:
+      SET_TYPE_OFFSET(ucp_Ll);
+      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
+      OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
+      SET_TYPE_OFFSET(ucp_Nd);
+      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
+      OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
+      jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
+      break;
+
+      case PT_CLIST:
+      other_cases = PRIV(ucd_caseless_sets) + cc[1];
+
+      /* At least three characters are required.
+         Otherwise this case would be handled by the normal code path. */
+      SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
+      SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
+
+      /* Optimizing character pairs, if their difference is power of 2. */
+      if (is_powerof2(other_cases[1] ^ other_cases[0]))
+        {
+        if (charoffset == 0)
+          OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
+        else
+          {
+          OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
+          OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
+          }
+        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
+        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
+        other_cases += 2;
+        }
+      else if (is_powerof2(other_cases[2] ^ other_cases[1]))
+        {
+        if (charoffset == 0)
+          OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
+        else
+          {
+          OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
+          OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
+          }
+        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
+        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
+
+        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
+        OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+
+        other_cases += 3;
+        }
+      else
+        {
+        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
+        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
+        }
+
+      while (*other_cases != NOTACHAR)
+        {
+        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
+        OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+        }
+      jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
+      break;
+
+      case PT_UCNC:
+      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
+      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
+      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
+      OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
+      OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+
+      SET_CHAR_OFFSET(0xa0);
+      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
+      OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
+      SET_CHAR_OFFSET(0);
+      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
+      OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_GREATER_EQUAL);
+      jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
+      break;
+
+      case PT_PXGRAPH:
+      /* C and Z groups are the farthest two groups. */
+      SET_TYPE_OFFSET(ucp_Ll);
+      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
+      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
+
+      jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
+
+      /* In case of ucp_Cf, we overwrite the result. */
+      SET_CHAR_OFFSET(0x2066);
+      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
+      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
+
+      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
+      OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+
+      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
+      OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+
+      JUMPHERE(jump);
+      jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
+      break;
+
+      case PT_PXPRINT:
+      /* C and Z groups are the farthest two groups. */
+      SET_TYPE_OFFSET(ucp_Ll);
+      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
+      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
+
+      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
+      OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
+
+      jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
+
+      /* In case of ucp_Cf, we overwrite the result. */
+      SET_CHAR_OFFSET(0x2066);
+      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
+      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
+
+      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
+      OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+
+      JUMPHERE(jump);
+      jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
+      break;
+
+      case PT_PXPUNCT:
+      SET_TYPE_OFFSET(ucp_Sc);
+      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
+      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
+
+      SET_CHAR_OFFSET(0);
+      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xff);
+      OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
+
+      SET_TYPE_OFFSET(ucp_Pc);
+      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
+      OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
+      jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
+      break;
+      }
+    cc += 2;
+    }
+#endif
+
+  if (jump != NULL)
+    add_jump(compiler, compares > 0 ? list : backtracks, jump);
+  }
+
+if (found != NULL)
+  set_jumps(found, LABEL());
+}
+
+#undef SET_TYPE_OFFSET
+#undef SET_CHAR_OFFSET
+
+#endif
+
+static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
+{
+DEFINE_COMPILER;
+int length;
+unsigned int c, oc, bit;
+compare_context context;
+struct sljit_jump *jump[4];
+jump_list *end_list;
+#ifdef SUPPORT_UTF
+struct sljit_label *label;
+#ifdef SUPPORT_UCP
+pcre_uchar propdata[5];
+#endif
+#endif /* SUPPORT_UTF */
+
+switch(type)
+  {
+  case OP_SOD:
+  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
+  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
+  return cc;
+
+  case OP_SOM:
+  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
+  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
+  return cc;
+
+  case OP_NOT_WORD_BOUNDARY:
+  case OP_WORD_BOUNDARY:
+  add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
+  add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
+  return cc;
+
+  case OP_NOT_DIGIT:
+  case OP_DIGIT:
+  /* Digits are usually 0-9, so it is worth to optimize them. */
+  detect_partial_match(common, backtracks);
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+  if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_digit, FALSE))
+    read_char7_type(common, type == OP_NOT_DIGIT);
+  else
+#endif
+    read_char8_type(common, type == OP_NOT_DIGIT);
+    /* Flip the starting bit in the negative case. */
+  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
+  add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
+  return cc;
+
+  case OP_NOT_WHITESPACE:
+  case OP_WHITESPACE:
+  detect_partial_match(common, backtracks);
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+  if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_space, FALSE))
+    read_char7_type(common, type == OP_NOT_WHITESPACE);
+  else
+#endif
+    read_char8_type(common, type == OP_NOT_WHITESPACE);
+  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
+  add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
+  return cc;
+
+  case OP_NOT_WORDCHAR:
+  case OP_WORDCHAR:
+  detect_partial_match(common, backtracks);
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+  if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_word, FALSE))
+    read_char7_type(common, type == OP_NOT_WORDCHAR);
+  else
+#endif
+    read_char8_type(common, type == OP_NOT_WORDCHAR);
+  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
+  add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
+  return cc;
+
+  case OP_ANY:
+  detect_partial_match(common, backtracks);
+  read_char_range(common, common->nlmin, common->nlmax, TRUE);
+  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
+    {
+    jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
+    end_list = NULL;
+    if (common->mode != JIT_PARTIAL_HARD_COMPILE)
+      add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
+    else
+      check_str_end(common, &end_list);
+
+    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
+    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
+    set_jumps(end_list, LABEL());
+    JUMPHERE(jump[0]);
+    }
+  else
+    check_newlinechar(common, common->nltype, backtracks, TRUE);
+  return cc;
+
+  case OP_ALLANY:
+  detect_partial_match(common, backtracks);
+#ifdef SUPPORT_UTF
+  if (common->utf)
+    {
+    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
+    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+#if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
+#if defined COMPILE_PCRE8
+    jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
+    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
+    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
+#elif defined COMPILE_PCRE16
+    jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
+    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
+    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
+    OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
+    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
+    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
+#endif
+    JUMPHERE(jump[0]);
+#endif /* COMPILE_PCRE[8|16] */
+    return cc;
+    }
+#endif
+  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+  return cc;
+
+  case OP_ANYBYTE:
+  detect_partial_match(common, backtracks);
+  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+  return cc;
+
+#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UCP
+  case OP_NOTPROP:
+  case OP_PROP:
+  propdata[0] = XCL_HASPROP;
+  propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
+  propdata[2] = cc[0];
+  propdata[3] = cc[1];
+  propdata[4] = XCL_END;
+  compile_xclass_matchingpath(common, propdata, backtracks);
+  return cc + 2;
+#endif
+#endif
+
+  case OP_ANYNL:
+  detect_partial_match(common, backtracks);
+  read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
+  jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
+  /* We don't need to handle soft partial matching case. */
+  end_list = NULL;
+  if (common->mode != JIT_PARTIAL_HARD_COMPILE)
+    add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
+  else
+    check_str_end(common, &end_list);
+  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
+  jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
+  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+  jump[2] = JUMP(SLJIT_JUMP);
+  JUMPHERE(jump[0]);
+  check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
+  set_jumps(end_list, LABEL());
+  JUMPHERE(jump[1]);
+  JUMPHERE(jump[2]);
+  return cc;
+
+  case OP_NOT_HSPACE:
+  case OP_HSPACE:
+  detect_partial_match(common, backtracks);
+  read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
+  add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
+  add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
+  return cc;
+
+  case OP_NOT_VSPACE:
+  case OP_VSPACE:
+  detect_partial_match(common, backtracks);
+  read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
+  add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
+  add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
+  return cc;
+
+#ifdef SUPPORT_UCP
+  case OP_EXTUNI:
+  detect_partial_match(common, backtracks);
+  read_char(common);
+  add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
+  /* Optimize register allocation: use a real register. */
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
+  OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
+
+  label = LABEL();
+  jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
+  OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
+  read_char(common);
+  add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
+  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
+
+  OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
+  OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
+  OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
+  OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
+  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
+  JUMPTO(SLJIT_NOT_ZERO, label);
+
+  OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
+  JUMPHERE(jump[0]);
+  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
+
+  if (common->mode == JIT_PARTIAL_HARD_COMPILE)
+    {
+    jump[0] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
+    /* Since we successfully read a char above, partial matching must occure. */
+    check_partial(common, TRUE);
+    JUMPHERE(jump[0]);
+    }
+  return cc;
+#endif
+
+  case OP_EODN:
+  /* Requires rather complex checks. */
+  jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
+  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
+    {
+    OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
+    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
+    if (common->mode == JIT_COMPILE)
+      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
+    else
+      {
+      jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
+      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
+      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS);
+      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
+      OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
+      add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
+      check_partial(common, TRUE);
+      add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
+      JUMPHERE(jump[1]);
+      }
+    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
+    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
+    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
+    }
+  else if (common->nltype == NLTYPE_FIXED)
+    {
+    OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
+    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
+    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
+    }
+  else
+    {
+    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
+    jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
+    OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
+    OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
+    jump[2] = JUMP(SLJIT_GREATER);
+    add_jump(compiler, backtracks, JUMP(SLJIT_LESS));
+    /* Equal. */
+    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
+    jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
+    add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
+
+    JUMPHERE(jump[1]);
+    if (common->nltype == NLTYPE_ANYCRLF)
+      {
+      OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+      add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
+      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
+      }
+    else
+      {
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
+      read_char_range(common, common->nlmin, common->nlmax, TRUE);
+      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
+      add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
+      add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
+      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
+      }
+    JUMPHERE(jump[2]);
+    JUMPHERE(jump[3]);
+    }
+  JUMPHERE(jump[0]);
+  check_partial(common, FALSE);
+  return cc;
+
+  case OP_EOD:
+  add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
+  check_partial(common, FALSE);
+  return cc;
+
+  case OP_CIRC:
+  OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
+  add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
+  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
+  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
+  return cc;
+
+  case OP_CIRCM:
+  OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
+  jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0);
+  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
+  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
+  jump[0] = JUMP(SLJIT_JUMP);
+  JUMPHERE(jump[1]);
+
+  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
+  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
+    {
+    OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
+    add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0));
+    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
+    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
+    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
+    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
+    }
+  else
+    {
+    skip_char_back(common);
+    read_char_range(common, common->nlmin, common->nlmax, TRUE);
+    check_newlinechar(common, common->nltype, backtracks, FALSE);
+    }
+  JUMPHERE(jump[0]);
+  return cc;
+
+  case OP_DOLL:
+  OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
+  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
+  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
+
+  if (!common->endonly)
+    compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
+  else
+    {
+    add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
+    check_partial(common, FALSE);
+    }
+  return cc;
+
+  case OP_DOLLM:
+  jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
+  OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
+  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
+  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
+  check_partial(common, FALSE);
+  jump[0] = JUMP(SLJIT_JUMP);
+  JUMPHERE(jump[1]);
+
+  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
+    {
+    OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
+    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
+    if (common->mode == JIT_COMPILE)
+      add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
+    else
+      {
+      jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
+      /* STR_PTR = STR_END - IN_UCHARS(1) */
+      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
+      check_partial(common, TRUE);
+      add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
+      JUMPHERE(jump[1]);
+      }
+
+    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
+    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
+    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
+    }
+  else
+    {
+    peek_char(common, common->nlmax);
+    check_newlinechar(common, common->nltype, backtracks, FALSE);
+    }
+  JUMPHERE(jump[0]);
+  return cc;
+
+  case OP_CHAR:
+  case OP_CHARI:
+  length = 1;
+#ifdef SUPPORT_UTF
+  if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
+#endif
+  if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
+    {
+    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
+    add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
+
+    context.length = IN_UCHARS(length);
+    context.sourcereg = -1;
+#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
+    context.ucharptr = 0;
+#endif
+    return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
+    }
+
+  detect_partial_match(common, backtracks);
+#ifdef SUPPORT_UTF
+  if (common->utf)
+    {
+    GETCHAR(c, cc);
+    }
+  else
+#endif
+    c = *cc;
+
+  if (type == OP_CHAR || !char_has_othercase(common, cc))
+    {
+    read_char_range(common, c, c, FALSE);
+    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
+    return cc + length;
+    }
+  oc = char_othercase(common, c);
+  read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
+  bit = c ^ oc;
+  if (is_powerof2(bit))
+    {
+    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
+    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
+    return cc + length;
+    }
+  jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
+  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
+  JUMPHERE(jump[0]);
+  return cc + length;
+
+  case OP_NOT:
+  case OP_NOTI:
+  detect_partial_match(common, backtracks);
+  length = 1;
+#ifdef SUPPORT_UTF
+  if (common->utf)
+    {
+#ifdef COMPILE_PCRE8
+    c = *cc;
+    if (c < 128)
+      {
+      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
+      if (type == OP_NOT || !char_has_othercase(common, cc))
+        add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
+      else
+        {
+        /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
+        OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
+        add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
+        }
+      /* Skip the variable-length character. */
+      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+      jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
+      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
+      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
+      JUMPHERE(jump[0]);
+      return cc + 1;
+      }
+    else
+#endif /* COMPILE_PCRE8 */
+      {
+      GETCHARLEN(c, cc, length);
+      }
+    }
+  else
+#endif /* SUPPORT_UTF */
+    c = *cc;
+
+  if (type == OP_NOT || !char_has_othercase(common, cc))
+    {
+    read_char_range(common, c, c, TRUE);
+    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
+    }
+  else
+    {
+    oc = char_othercase(common, c);
+    read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
+    bit = c ^ oc;
+    if (is_powerof2(bit))
+      {
+      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
+      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
+      }
+    else
+      {
+      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
+      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
+      }
+    }
+  return cc + length;
+
+  case OP_CLASS:
+  case OP_NCLASS:
+  detect_partial_match(common, backtracks);
+
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+  bit = (common->utf && is_char7_bitset((const pcre_uint8 *)cc, type == OP_NCLASS)) ? 127 : 255;
+  read_char_range(common, 0, bit, type == OP_NCLASS);
+#else
+  read_char_range(common, 0, 255, type == OP_NCLASS);
+#endif
+
+  if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks))
+    return cc + 32 / sizeof(pcre_uchar);
+
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+  jump[0] = NULL;
+  if (common->utf)
+    {
+    jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
+    if (type == OP_CLASS)
+      {
+      add_jump(compiler, backtracks, jump[0]);
+      jump[0] = NULL;
+      }
+    }
+#elif !defined COMPILE_PCRE8
+  jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
+  if (type == OP_CLASS)
+    {
+    add_jump(compiler, backtracks, jump[0]);
+    jump[0] = NULL;
+    }
+#endif /* SUPPORT_UTF && COMPILE_PCRE8 */
+
+  OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
+  OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
+  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
+  OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
+  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
+  add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
+
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+  if (jump[0] != NULL)
+    JUMPHERE(jump[0]);
+#endif
+
+  return cc + 32 / sizeof(pcre_uchar);
+
+#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+  case OP_XCLASS:
+  compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
+  return cc + GET(cc, 0) - 1;
+#endif
+
+  case OP_REVERSE:
+  length = GET(cc, 0);
+  if (length == 0)
+    return cc + LINK_SIZE;
+  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
+#ifdef SUPPORT_UTF
+  if (common->utf)
+    {
+    OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
+    label = LABEL();
+    add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
+    skip_char_back(common);
+    OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
+    JUMPTO(SLJIT_NOT_ZERO, label);
+    }
+  else
+#endif
+    {
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
+    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
+    add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0));
+    }
+  check_start_used_ptr(common);
+  return cc + LINK_SIZE;
+  }
+SLJIT_ASSERT_STOP();
+return cc;
+}
+
+static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
+{
+/* This function consumes at least one input character. */
+/* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
+DEFINE_COMPILER;
+pcre_uchar *ccbegin = cc;
+compare_context context;
+int size;
+
+context.length = 0;
+do
+  {
+  if (cc >= ccend)
+    break;
+
+  if (*cc == OP_CHAR)
+    {
+    size = 1;
+#ifdef SUPPORT_UTF
+    if (common->utf && HAS_EXTRALEN(cc[1]))
+      size += GET_EXTRALEN(cc[1]);
+#endif
+    }
+  else if (*cc == OP_CHARI)
+    {
+    size = 1;
+#ifdef SUPPORT_UTF
+    if (common->utf)
+      {
+      if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
+        size = 0;
+      else if (HAS_EXTRALEN(cc[1]))
+        size += GET_EXTRALEN(cc[1]);
+      }
+    else
+#endif
+    if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
+      size = 0;
+    }
+  else
+    size = 0;
+
+  cc += 1 + size;
+  context.length += IN_UCHARS(size);
+  }
+while (size > 0 && context.length <= 128);
+
+cc = ccbegin;
+if (context.length > 0)
+  {
+  /* We have a fixed-length byte sequence. */
+  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
+  add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
+
+  context.sourcereg = -1;
+#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
+  context.ucharptr = 0;
+#endif
+  do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
+  return cc;
+  }
+
+/* A non-fixed length character will be checked if length == 0. */
+return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
+}
+
+/* Forward definitions. */
+static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
+static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
+
+#define PUSH_BACKTRACK(size, ccstart, error) \
+  do \
+    { \
+    backtrack = sljit_alloc_memory(compiler, (size)); \
+    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
+      return error; \
+    memset(backtrack, 0, size); \
+    backtrack->prev = parent->top; \
+    backtrack->cc = (ccstart); \
+    parent->top = backtrack; \
+    } \
+  while (0)
+
+#define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
+  do \
+    { \
+    backtrack = sljit_alloc_memory(compiler, (size)); \
+    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
+      return; \
+    memset(backtrack, 0, size); \
+    backtrack->prev = parent->top; \
+    backtrack->cc = (ccstart); \
+    parent->top = backtrack; \
+    } \
+  while (0)
+
+#define BACKTRACK_AS(type) ((type *)backtrack)
+
+static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
+{
+/* The OVECTOR offset goes to TMP2. */
+DEFINE_COMPILER;
+int count = GET2(cc, 1 + IMM2_SIZE);
+pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
+unsigned int offset;
+jump_list *found = NULL;
+
+SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
+
+OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
+
+count--;
+while (count-- > 0)
+  {
+  offset = GET2(slot, 0) << 1;
+  GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
+  add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
+  slot += common->name_entry_size;
+  }
+
+offset = GET2(slot, 0) << 1;
+GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
+if (backtracks != NULL && !common->jscript_compat)
+  add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
+
+set_jumps(found, LABEL());
+}
+
+static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
+{
+DEFINE_COMPILER;
+BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
+int offset = 0;
+struct sljit_jump *jump = NULL;
+struct sljit_jump *partial;
+struct sljit_jump *nopartial;
+
+if (ref)
+  {
+  offset = GET2(cc, 1) << 1;
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
+  /* OVECTOR(1) contains the "string begin - 1" constant. */
+  if (withchecks && !common->jscript_compat)
+    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
+  }
+else
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
+
+#if defined SUPPORT_UTF && defined SUPPORT_UCP
+if (common->utf && *cc == OP_REFI)
+  {
+  SLJIT_ASSERT(TMP1 == SLJIT_R0 && STACK_TOP == SLJIT_R1 && TMP2 == SLJIT_R2);
+  if (ref)
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
+  else
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
+
+  if (withchecks)
+    jump = CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0);
+
+  /* Needed to save important temporary registers. */
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
+  OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
+  sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
+  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
+  if (common->mode == JIT_COMPILE)
+    add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
+  else
+    {
+    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
+    nopartial = CMP(SLJIT_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
+    check_partial(common, FALSE);
+    add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
+    JUMPHERE(nopartial);
+    }
+  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
+  }
+else
+#endif /* SUPPORT_UTF && SUPPORT_UCP */
+  {
+  if (ref)
+    OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
+  else
+    OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
+
+  if (withchecks)
+    jump = JUMP(SLJIT_ZERO);
+
+  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
+  partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
+  if (common->mode == JIT_COMPILE)
+    add_jump(compiler, backtracks, partial);
+
+  add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
+  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
+
+  if (common->mode != JIT_COMPILE)
+    {
+    nopartial = JUMP(SLJIT_JUMP);
+    JUMPHERE(partial);
+    /* TMP2 -= STR_END - STR_PTR */
+    OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
+    OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
+    partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
+    OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
+    add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
+    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
+    JUMPHERE(partial);
+    check_partial(common, FALSE);
+    add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
+    JUMPHERE(nopartial);
+    }
+  }
+
+if (jump != NULL)
+  {
+  if (emptyfail)
+    add_jump(compiler, backtracks, jump);
+  else
+    JUMPHERE(jump);
+  }
+}
+
+static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
+{
+DEFINE_COMPILER;
+BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
+backtrack_common *backtrack;
+pcre_uchar type;
+int offset = 0;
+struct sljit_label *label;
+struct sljit_jump *zerolength;
+struct sljit_jump *jump = NULL;
+pcre_uchar *ccbegin = cc;
+int min = 0, max = 0;
+BOOL minimize;
+
+PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
+
+if (ref)
+  offset = GET2(cc, 1) << 1;
+else
+  cc += IMM2_SIZE;
+type = cc[1 + IMM2_SIZE];
+
+SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
+minimize = (type & 0x1) != 0;
+switch(type)
+  {
+  case OP_CRSTAR:
+  case OP_CRMINSTAR:
+  min = 0;
+  max = 0;
+  cc += 1 + IMM2_SIZE + 1;
+  break;
+  case OP_CRPLUS:
+  case OP_CRMINPLUS:
+  min = 1;
+  max = 0;
+  cc += 1 + IMM2_SIZE + 1;
+  break;
+  case OP_CRQUERY:
+  case OP_CRMINQUERY:
+  min = 0;
+  max = 1;
+  cc += 1 + IMM2_SIZE + 1;
+  break;
+  case OP_CRRANGE:
+  case OP_CRMINRANGE:
+  min = GET2(cc, 1 + IMM2_SIZE + 1);
+  max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
+  cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
+  break;
+  default:
+  SLJIT_ASSERT_STOP();
+  break;
+  }
+
+if (!minimize)
+  {
+  if (min == 0)
+    {
+    allocate_stack(common, 2);
+    if (ref)
+      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
+    /* Temporary release of STR_PTR. */
+    OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
+    /* Handles both invalid and empty cases. Since the minimum repeat,
+    is zero the invalid case is basically the same as an empty case. */
+    if (ref)
+      zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
+    else
+      {
+      compile_dnref_search(common, ccbegin, NULL);
+      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
+      zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
+      }
+    /* Restore if not zero length. */
+    OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
+    }
+  else
+    {
+    allocate_stack(common, 1);
+    if (ref)
+      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
+    if (ref)
+      {
+      add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
+      zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
+      }
+    else
+      {
+      compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
+      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
+      zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
+      }
+    }
+
+  if (min > 1 || max > 1)
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
+
+  label = LABEL();
+  if (!ref)
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
+  compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
+
+  if (min > 1 || max > 1)
+    {
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
+    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
+    if (min > 1)
+      CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
+    if (max > 1)
+      {
+      jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
+      allocate_stack(common, 1);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
+      JUMPTO(SLJIT_JUMP, label);
+      JUMPHERE(jump);
+      }
+    }
+
+  if (max == 0)
+    {
+    /* Includes min > 1 case as well. */
+    allocate_stack(common, 1);
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
+    JUMPTO(SLJIT_JUMP, label);
+    }
+
+  JUMPHERE(zerolength);
+  BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
+
+  count_match(common);
+  return cc;
+  }
+
+allocate_stack(common, ref ? 2 : 3);
+if (ref)
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
+OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
+if (type != OP_CRMINSTAR)
+  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
+
+if (min == 0)
+  {
+  /* Handles both invalid and empty cases. Since the minimum repeat,
+  is zero the invalid case is basically the same as an empty case. */
+  if (ref)
+    zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
+  else
+    {
+    compile_dnref_search(common, ccbegin, NULL);
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
+    zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
+    }
+  /* Length is non-zero, we can match real repeats. */
+  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
+  jump = JUMP(SLJIT_JUMP);
+  }
+else
+  {
+  if (ref)
+    {
+    add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
+    zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
+    }
+  else
+    {
+    compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
+    zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
+    }
+  }
+
+BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
+if (max > 0)
+  add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
+
+if (!ref)
+  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
+compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
+OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
+
+if (min > 1)
+  {
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
+  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
+  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
+  CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
+  }
+else if (max > 0)
+  OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
+
+if (jump != NULL)
+  JUMPHERE(jump);
+JUMPHERE(zerolength);
+
+count_match(common);
+return cc;
+}
+
+static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
+{
+DEFINE_COMPILER;
+backtrack_common *backtrack;
+recurse_entry *entry = common->entries;
+recurse_entry *prev = NULL;
+sljit_sw start = GET(cc, 1);
+pcre_uchar *start_cc;
+BOOL needs_control_head;
+
+PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
+
+/* Inlining simple patterns. */
+if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
+  {
+  start_cc = common->start + start;
+  compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
+  BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
+  return cc + 1 + LINK_SIZE;
+  }
+
+while (entry != NULL)
+  {
+  if (entry->start == start)
+    break;
+  prev = entry;
+  entry = entry->next;
+  }
+
+if (entry == NULL)
+  {
+  entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
+  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
+    return NULL;
+  entry->next = NULL;
+  entry->entry = NULL;
+  entry->calls = NULL;
+  entry->start = start;
+
+  if (prev != NULL)
+    prev->next = entry;
+  else
+    common->entries = entry;
+  }
+
+if (common->has_set_som && common->mark_ptr != 0)
+  {
+  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
+  allocate_stack(common, 2);
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
+  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
+  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
+  }
+else if (common->has_set_som || common->mark_ptr != 0)
+  {
+  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
+  allocate_stack(common, 1);
+  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
+  }
+
+if (entry->entry == NULL)
+  add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
+else
+  JUMPTO(SLJIT_FAST_CALL, entry->entry);
+/* Leave if the match is failed. */
+add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
+return cc + 1 + LINK_SIZE;
+}
+
+static int SLJIT_CALL do_callout(struct jit_arguments *arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
+{
+const pcre_uchar *begin = arguments->begin;
+int *offset_vector = arguments->offsets;
+int offset_count = arguments->offset_count;
+int i;
+
+if (PUBL(callout) == NULL)
+  return 0;
+
+callout_block->version = 2;
+callout_block->callout_data = arguments->callout_data;
+
+/* Offsets in subject. */
+callout_block->subject_length = arguments->end - arguments->begin;
+callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
+callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
+#if defined COMPILE_PCRE8
+callout_block->subject = (PCRE_SPTR)begin;
+#elif defined COMPILE_PCRE16
+callout_block->subject = (PCRE_SPTR16)begin;
+#elif defined COMPILE_PCRE32
+callout_block->subject = (PCRE_SPTR32)begin;
+#endif
+
+/* Convert and copy the JIT offset vector to the offset_vector array. */
+callout_block->capture_top = 0;
+callout_block->offset_vector = offset_vector;
+for (i = 2; i < offset_count; i += 2)
+  {
+  offset_vector[i] = jit_ovector[i] - begin;
+  offset_vector[i + 1] = jit_ovector[i + 1] - begin;
+  if (jit_ovector[i] >= begin)
+    callout_block->capture_top = i;
+  }
+
+callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
+if (offset_count > 0)
+  offset_vector[0] = -1;
+if (offset_count > 1)
+  offset_vector[1] = -1;
+return (*PUBL(callout))(callout_block);
+}
+
+/* Aligning to 8 byte. */
+#define CALLOUT_ARG_SIZE \
+    (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
+
+#define CALLOUT_ARG_OFFSET(arg) \
+    (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
+
+static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
+{
+DEFINE_COMPILER;
+backtrack_common *backtrack;
+
+PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
+
+allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
+
+SLJIT_ASSERT(common->capture_last_ptr != 0);
+OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
+OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
+OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
+OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
+
+/* These pointer sized fields temporarly stores internal variables. */
+OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
+OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
+OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
+
+if (common->mark_ptr != 0)
+  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
+OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
+OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
+OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
+
+/* Needed to save important temporary registers. */
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
+OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
+GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
+sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
+OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
+OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
+free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
+
+/* Check return value. */
+OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
+add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER));
+if (common->forced_quit_label == NULL)
+  add_jump(compiler, &common->forced_quit, JUMP(SLJIT_SIG_LESS));
+else
+  JUMPTO(SLJIT_SIG_LESS, common->forced_quit_label);
+return cc + 2 + 2 * LINK_SIZE;
+}
+
+#undef CALLOUT_ARG_SIZE
+#undef CALLOUT_ARG_OFFSET
+
+static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
+{
+DEFINE_COMPILER;
+int framesize;
+int extrasize;
+BOOL needs_control_head;
+int private_data_ptr;
+backtrack_common altbacktrack;
+pcre_uchar *ccbegin;
+pcre_uchar opcode;
+pcre_uchar bra = OP_BRA;
+jump_list *tmp = NULL;
+jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
+jump_list **found;
+/* Saving previous accept variables. */
+BOOL save_local_exit = common->local_exit;
+BOOL save_positive_assert = common->positive_assert;
+then_trap_backtrack *save_then_trap = common->then_trap;
+struct sljit_label *save_quit_label = common->quit_label;
+struct sljit_label *save_accept_label = common->accept_label;
+jump_list *save_quit = common->quit;
+jump_list *save_positive_assert_quit = common->positive_assert_quit;
+jump_list *save_accept = common->accept;
+struct sljit_jump *jump;
+struct sljit_jump *brajump = NULL;
+
+/* Assert captures then. */
+common->then_trap = NULL;
+
+if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
+  {
+  SLJIT_ASSERT(!conditional);
+  bra = *cc;
+  cc++;
+  }
+private_data_ptr = PRIVATE_DATA(cc);
+SLJIT_ASSERT(private_data_ptr != 0);
+framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
+backtrack->framesize = framesize;
+backtrack->private_data_ptr = private_data_ptr;
+opcode = *cc;
+SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
+found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
+ccbegin = cc;
+cc += GET(cc, 1);
+
+if (bra == OP_BRAMINZERO)
+  {
+  /* This is a braminzero backtrack path. */
+  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+  free_stack(common, 1);
+  brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
+  }
+
+if (framesize < 0)
+  {
+  extrasize = needs_control_head ? 2 : 1;
+  if (framesize == no_frame)
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
+  allocate_stack(common, extrasize);
+  if (needs_control_head)
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
+  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
+  if (needs_control_head)
+    {
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
+    }
+  }
+else
+  {
+  extrasize = needs_control_head ? 3 : 2;
+  allocate_stack(common, framesize + extrasize);
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
+  OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
+  if (needs_control_head)
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
+  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
+  if (needs_control_head)
+    {
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
+    }
+  else
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
+  init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
+  }
+
+memset(&altbacktrack, 0, sizeof(backtrack_common));
+if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
+  {
+  /* Negative assert is stronger than positive assert. */
+  common->local_exit = TRUE;
+  common->quit_label = NULL;
+  common->quit = NULL;
+  common->positive_assert = FALSE;
+  }
+else
+  common->positive_assert = TRUE;
+common->positive_assert_quit = NULL;
+
+while (1)
+  {
+  common->accept_label = NULL;
+  common->accept = NULL;
+  altbacktrack.top = NULL;
+  altbacktrack.topbacktracks = NULL;
+
+  if (*ccbegin == OP_ALT)
+    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+
+  altbacktrack.cc = ccbegin;
+  compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
+  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
+    {
+    if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
+      {
+      common->local_exit = save_local_exit;
+      common->quit_label = save_quit_label;
+      common->quit = save_quit;
+      }
+    common->positive_assert = save_positive_assert;
+    common->then_trap = save_then_trap;
+    common->accept_label = save_accept_label;
+    common->positive_assert_quit = save_positive_assert_quit;
+    common->accept = save_accept;
+    return NULL;
+    }
+  common->accept_label = LABEL();
+  if (common->accept != NULL)
+    set_jumps(common->accept, common->accept_label);
+
+  /* Reset stack. */
+  if (framesize < 0)
+    {
+    if (framesize == no_frame)
+      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
+    else
+      free_stack(common, extrasize);
+    if (needs_control_head)
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
+    }
+  else
+    {
+    if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
+      {
+      /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
+      OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
+      if (needs_control_head)
+        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
+      }
+    else
+      {
+      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
+      if (needs_control_head)
+        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
+      add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
+      }
+    }
+
+  if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
+    {
+    /* We know that STR_PTR was stored on the top of the stack. */
+    if (conditional)
+      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
+    else if (bra == OP_BRAZERO)
+      {
+      if (framesize < 0)
+        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
+      else
+        {
+        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
+        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
+        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
+        }
+      OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
+      }
+    else if (framesize >= 0)
+      {
+      /* For OP_BRA and OP_BRAMINZERO. */
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
+      }
+    }
+  add_jump(compiler, found, JUMP(SLJIT_JUMP));
+
+  compile_backtrackingpath(common, altbacktrack.top);
+  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
+    {
+    if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
+      {
+      common->local_exit = save_local_exit;
+      common->quit_label = save_quit_label;
+      common->quit = save_quit;
+      }
+    common->positive_assert = save_positive_assert;
+    common->then_trap = save_then_trap;
+    common->accept_label = save_accept_label;
+    common->positive_assert_quit = save_positive_assert_quit;
+    common->accept = save_accept;
+    return NULL;
+    }
+  set_jumps(altbacktrack.topbacktracks, LABEL());
+
+  if (*cc != OP_ALT)
+    break;
+
+  ccbegin = cc;
+  cc += GET(cc, 1);
+  }
+
+if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
+  {
+  SLJIT_ASSERT(common->positive_assert_quit == NULL);
+  /* Makes the check less complicated below. */
+  common->positive_assert_quit = common->quit;
+  }
+
+/* None of them matched. */
+if (common->positive_assert_quit != NULL)
+  {
+  jump = JUMP(SLJIT_JUMP);
+  set_jumps(common->positive_assert_quit, LABEL());
+  SLJIT_ASSERT(framesize != no_stack);
+  if (framesize < 0)
+    OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
+  else
+    {
+    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
+    add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
+    OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
+    }
+  JUMPHERE(jump);
+  }
+
+if (needs_control_head)
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
+
+if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
+  {
+  /* Assert is failed. */
+  if (conditional || bra == OP_BRAZERO)
+    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+
+  if (framesize < 0)
+    {
+    /* The topmost item should be 0. */
+    if (bra == OP_BRAZERO)
+      {
+      if (extrasize == 2)
+        free_stack(common, 1);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
+      }
+    else
+      free_stack(common, extrasize);
+    }
+  else
+    {
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
+    /* The topmost item should be 0. */
+    if (bra == OP_BRAZERO)
+      {
+      free_stack(common, framesize + extrasize - 1);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
+      }
+    else
+      free_stack(common, framesize + extrasize);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
+    }
+  jump = JUMP(SLJIT_JUMP);
+  if (bra != OP_BRAZERO)
+    add_jump(compiler, target, jump);
+
+  /* Assert is successful. */
+  set_jumps(tmp, LABEL());
+  if (framesize < 0)
+    {
+    /* We know that STR_PTR was stored on the top of the stack. */
+    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
+    /* Keep the STR_PTR on the top of the stack. */
+    if (bra == OP_BRAZERO)
+      {
+      OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
+      if (extrasize == 2)
+        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
+      }
+    else if (bra == OP_BRAMINZERO)
+      {
+      OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
+      }
+    }
+  else
+    {
+    if (bra == OP_BRA)
+      {
+      /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
+      OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
+      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
+      }
+    else
+      {
+      /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
+      OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
+      if (extrasize == 2)
+        {
+        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+        if (bra == OP_BRAMINZERO)
+          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
+        }
+      else
+        {
+        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
+        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
+        }
+      }
+    }
+
+  if (bra == OP_BRAZERO)
+    {
+    backtrack->matchingpath = LABEL();
+    SET_LABEL(jump, backtrack->matchingpath);
+    }
+  else if (bra == OP_BRAMINZERO)
+    {
+    JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
+    JUMPHERE(brajump);
+    if (framesize >= 0)
+      {
+      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
+      add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
+      }
+    set_jumps(backtrack->common.topbacktracks, LABEL());
+    }
+  }
+else
+  {
+  /* AssertNot is successful. */
+  if (framesize < 0)
+    {
+    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+    if (bra != OP_BRA)
+      {
+      if (extrasize == 2)
+        free_stack(common, 1);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
+      }
+    else
+      free_stack(common, extrasize);
+    }
+  else
+    {
+    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
+    /* The topmost item should be 0. */
+    if (bra != OP_BRA)
+      {
+      free_stack(common, framesize + extrasize - 1);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
+      }
+    else
+      free_stack(common, framesize + extrasize);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
+    }
+
+  if (bra == OP_BRAZERO)
+    backtrack->matchingpath = LABEL();
+  else if (bra == OP_BRAMINZERO)
+    {
+    JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
+    JUMPHERE(brajump);
+    }
+
+  if (bra != OP_BRA)
+    {
+    SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
+    set_jumps(backtrack->common.topbacktracks, LABEL());
+    backtrack->common.topbacktracks = NULL;
+    }
+  }
+
+if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
+  {
+  common->local_exit = save_local_exit;
+  common->quit_label = save_quit_label;
+  common->quit = save_quit;
+  }
+common->positive_assert = save_positive_assert;
+common->then_trap = save_then_trap;
+common->accept_label = save_accept_label;
+common->positive_assert_quit = save_positive_assert_quit;
+common->accept = save_accept;
+return cc + 1 + LINK_SIZE;
+}
+
+static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
+{
+DEFINE_COMPILER;
+int stacksize;
+
+if (framesize < 0)
+  {
+  if (framesize == no_frame)
+    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
+  else
+    {
+    stacksize = needs_control_head ? 1 : 0;
+    if (ket != OP_KET || has_alternatives)
+      stacksize++;
+    free_stack(common, stacksize);
+    }
+
+  if (needs_control_head)
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? sizeof(sljit_sw) : 0);
+
+  /* TMP2 which is set here used by OP_KETRMAX below. */
+  if (ket == OP_KETRMAX)
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
+  else if (ket == OP_KETRMIN)
+    {
+    /* Move the STR_PTR to the private_data_ptr. */
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
+    }
+  }
+else
+  {
+  stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
+  OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
+  if (needs_control_head)
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 0);
+
+  if (ket == OP_KETRMAX)
+    {
+    /* TMP2 which is set here used by OP_KETRMAX below. */
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+    }
+  }
+if (needs_control_head)
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
+}
+
+static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
+{
+DEFINE_COMPILER;
+
+if (common->capture_last_ptr != 0)
+  {
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
+  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
+  stacksize++;
+  }
+if (common->optimized_cbracket[offset >> 1] == 0)
+  {
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
+  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
+  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
+  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
+  stacksize += 2;
+  }
+return stacksize;
+}
+
+/*
+  Handling bracketed expressions is probably the most complex part.
+
+  Stack layout naming characters:
+    S - Push the current STR_PTR
+    0 - Push a 0 (NULL)
+    A - Push the current STR_PTR. Needed for restoring the STR_PTR
+        before the next alternative. Not pushed if there are no alternatives.
+    M - Any values pushed by the current alternative. Can be empty, or anything.
+    C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
+    L - Push the previous local (pointed by localptr) to the stack
+   () - opional values stored on the stack
+  ()* - optonal, can be stored multiple times
+
+  The following list shows the regular expression templates, their PCRE byte codes
+  and stack layout supported by pcre-sljit.
+
+  (?:)                     OP_BRA     | OP_KET                A M
+  ()                       OP_CBRA    | OP_KET                C M
+  (?:)+                    OP_BRA     | OP_KETRMAX        0   A M S   ( A M S )*
+                           OP_SBRA    | OP_KETRMAX        0   L M S   ( L M S )*
+  (?:)+?                   OP_BRA     | OP_KETRMIN        0   A M S   ( A M S )*
+                           OP_SBRA    | OP_KETRMIN        0   L M S   ( L M S )*
+  ()+                      OP_CBRA    | OP_KETRMAX        0   C M S   ( C M S )*
+                           OP_SCBRA   | OP_KETRMAX        0   C M S   ( C M S )*
+  ()+?                     OP_CBRA    | OP_KETRMIN        0   C M S   ( C M S )*
+                           OP_SCBRA   | OP_KETRMIN        0   C M S   ( C M S )*
+  (?:)?    OP_BRAZERO    | OP_BRA     | OP_KET            S ( A M 0 )
+  (?:)??   OP_BRAMINZERO | OP_BRA     | OP_KET            S ( A M 0 )
+  ()?      OP_BRAZERO    | OP_CBRA    | OP_KET            S ( C M 0 )
+  ()??     OP_BRAMINZERO | OP_CBRA    | OP_KET            S ( C M 0 )
+  (?:)*    OP_BRAZERO    | OP_BRA     | OP_KETRMAX      S 0 ( A M S )*
+           OP_BRAZERO    | OP_SBRA    | OP_KETRMAX      S 0 ( L M S )*
+  (?:)*?   OP_BRAMINZERO | OP_BRA     | OP_KETRMIN      S 0 ( A M S )*
+           OP_BRAMINZERO | OP_SBRA    | OP_KETRMIN      S 0 ( L M S )*
+  ()*      OP_BRAZERO    | OP_CBRA    | OP_KETRMAX      S 0 ( C M S )*
+           OP_BRAZERO    | OP_SCBRA   | OP_KETRMAX      S 0 ( C M S )*
+  ()*?     OP_BRAMINZERO | OP_CBRA    | OP_KETRMIN      S 0 ( C M S )*
+           OP_BRAMINZERO | OP_SCBRA   | OP_KETRMIN      S 0 ( C M S )*
+
+
+  Stack layout naming characters:
+    A - Push the alternative index (starting from 0) on the stack.
+        Not pushed if there is no alternatives.
+    M - Any values pushed by the current alternative. Can be empty, or anything.
+
+  The next list shows the possible content of a bracket:
+  (|)     OP_*BRA    | OP_ALT ...         M A
+  (?()|)  OP_*COND   | OP_ALT             M A
+  (?>|)   OP_ONCE    | OP_ALT ...         [stack trace] M A
+  (?>|)   OP_ONCE_NC | OP_ALT ...         [stack trace] M A
+                                          Or nothing, if trace is unnecessary
+*/
+
+static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
+{
+DEFINE_COMPILER;
+backtrack_common *backtrack;
+pcre_uchar opcode;
+int private_data_ptr = 0;
+int offset = 0;
+int i, stacksize;
+int repeat_ptr = 0, repeat_length = 0;
+int repeat_type = 0, repeat_count = 0;
+pcre_uchar *ccbegin;
+pcre_uchar *matchingpath;
+pcre_uchar *slot;
+pcre_uchar bra = OP_BRA;
+pcre_uchar ket;
+assert_backtrack *assert;
+BOOL has_alternatives;
+BOOL needs_control_head = FALSE;
+struct sljit_jump *jump;
+struct sljit_jump *skip;
+struct sljit_label *rmax_label = NULL;
+struct sljit_jump *braminzero = NULL;
+
+PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
+
+if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
+  {
+  bra = *cc;
+  cc++;
+  opcode = *cc;
+  }
+
+opcode = *cc;
+ccbegin = cc;
+matchingpath = bracketend(cc) - 1 - LINK_SIZE;
+ket = *matchingpath;
+if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
+  {
+  repeat_ptr = PRIVATE_DATA(matchingpath);
+  repeat_length = PRIVATE_DATA(matchingpath + 1);
+  repeat_type = PRIVATE_DATA(matchingpath + 2);
+  repeat_count = PRIVATE_DATA(matchingpath + 3);
+  SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
+  if (repeat_type == OP_UPTO)
+    ket = OP_KETRMAX;
+  if (repeat_type == OP_MINUPTO)
+    ket = OP_KETRMIN;
+  }
+
+if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
+  {
+  /* Drop this bracket_backtrack. */
+  parent->top = backtrack->prev;
+  return matchingpath + 1 + LINK_SIZE + repeat_length;
+  }
+
+matchingpath = ccbegin + 1 + LINK_SIZE;
+SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
+SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
+cc += GET(cc, 1);
+
+has_alternatives = *cc == OP_ALT;
+if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
+  has_alternatives = (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF || *matchingpath == OP_FAIL) ? FALSE : TRUE;
+
+if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
+  opcode = OP_SCOND;
+if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
+  opcode = OP_ONCE;
+
+if (opcode == OP_CBRA || opcode == OP_SCBRA)
+  {
+  /* Capturing brackets has a pre-allocated space. */
+  offset = GET2(ccbegin, 1 + LINK_SIZE);
+  if (common->optimized_cbracket[offset] == 0)
+    {
+    private_data_ptr = OVECTOR_PRIV(offset);
+    offset <<= 1;
+    }
+  else
+    {
+    offset <<= 1;
+    private_data_ptr = OVECTOR(offset);
+    }
+  BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
+  matchingpath += IMM2_SIZE;
+  }
+else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
+  {
+  /* Other brackets simply allocate the next entry. */
+  private_data_ptr = PRIVATE_DATA(ccbegin);
+  SLJIT_ASSERT(private_data_ptr != 0);
+  BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
+  if (opcode == OP_ONCE)
+    BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
+  }
+
+/* Instructions before the first alternative. */
+stacksize = 0;
+if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
+  stacksize++;
+if (bra == OP_BRAZERO)
+  stacksize++;
+
+if (stacksize > 0)
+  allocate_stack(common, stacksize);
+
+stacksize = 0;
+if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
+  {
+  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
+  stacksize++;
+  }
+
+if (bra == OP_BRAZERO)
+  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
+
+if (bra == OP_BRAMINZERO)
+  {
+  /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
+  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+  if (ket != OP_KETRMIN)
+    {
+    free_stack(common, 1);
+    braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
+    }
+  else
+    {
+    if (opcode == OP_ONCE || opcode >= OP_SBRA)
+      {
+      jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
+      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
+      /* Nothing stored during the first run. */
+      skip = JUMP(SLJIT_JUMP);
+      JUMPHERE(jump);
+      /* Checking zero-length iteration. */
+      if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
+        {
+        /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
+        braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
+        }
+      else
+        {
+        /* Except when the whole stack frame must be saved. */
+        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
+        braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
+        }
+      JUMPHERE(skip);
+      }
+    else
+      {
+      jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
+      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
+      JUMPHERE(jump);
+      }
+    }
+  }
+
+if (repeat_type != 0)
+  {
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);
+  if (repeat_type == OP_EXACT)
+    rmax_label = LABEL();
+  }
+
+if (ket == OP_KETRMIN)
+  BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
+
+if (ket == OP_KETRMAX)
+  {
+  rmax_label = LABEL();
+  if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA && repeat_type == 0)
+    BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
+  }
+
+/* Handling capturing brackets and alternatives. */
+if (opcode == OP_ONCE)
+  {
+  stacksize = 0;
+  if (needs_control_head)
+    {
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
+    stacksize++;
+    }
+
+  if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
+    {
+    /* Neither capturing brackets nor recursions are found in the block. */
+    if (ket == OP_KETRMIN)
+      {
+      stacksize += 2;
+      if (!needs_control_head)
+        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
+      }
+    else
+      {
+      if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
+        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
+      if (ket == OP_KETRMAX || has_alternatives)
+        stacksize++;
+      }
+
+    if (stacksize > 0)
+      allocate_stack(common, stacksize);
+
+    stacksize = 0;
+    if (needs_control_head)
+      {
+      stacksize++;
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
+      }
+
+    if (ket == OP_KETRMIN)
+      {
+      if (needs_control_head)
+        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
+      if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
+        OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
+      }
+    else if (ket == OP_KETRMAX || has_alternatives)
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
+    }
+  else
+    {
+    if (ket != OP_KET || has_alternatives)
+      stacksize++;
+
+    stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
+    allocate_stack(common, stacksize);
+
+    if (needs_control_head)
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
+
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
+    OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
+
+    stacksize = needs_control_head ? 1 : 0;
+    if (ket != OP_KET || has_alternatives)
+      {
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
+      stacksize++;
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
+      }
+    else
+      {
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
+      }
+    init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1, FALSE);
+    }
+  }
+else if (opcode == OP_CBRA || opcode == OP_SCBRA)
+  {
+  /* Saving the previous values. */
+  if (common->optimized_cbracket[offset >> 1] != 0)
+    {
+    SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
+    allocate_stack(common, 2);
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
+    }
+  else
+    {
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
+    allocate_stack(common, 1);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
+    }
+  }
+else if (opcode == OP_SBRA || opcode == OP_SCOND)
+  {
+  /* Saving the previous value. */
+  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
+  allocate_stack(common, 1);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
+  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
+  }
+else if (has_alternatives)
+  {
+  /* Pushing the starting string pointer. */
+  allocate_stack(common, 1);
+  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
+  }
+
+/* Generating code for the first alternative. */
+if (opcode == OP_COND || opcode == OP_SCOND)
+  {
+  if (*matchingpath == OP_CREF)
+    {
+    SLJIT_ASSERT(has_alternatives);
+    add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
+      CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
+    matchingpath += 1 + IMM2_SIZE;
+    }
+  else if (*matchingpath == OP_DNCREF)
+    {
+    SLJIT_ASSERT(has_alternatives);
+
+    i = GET2(matchingpath, 1 + IMM2_SIZE);
+    slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
+    OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
+    OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
+    slot += common->name_entry_size;
+    i--;
+    while (i-- > 0)
+      {
+      OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
+      OP2(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, STR_PTR, 0);
+      slot += common->name_entry_size;
+      }
+    OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
+    add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO));
+    matchingpath += 1 + 2 * IMM2_SIZE;
+    }
+  else if (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF || *matchingpath == OP_FAIL)
+    {
+    /* Never has other case. */
+    BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
+    SLJIT_ASSERT(!has_alternatives);
+
+    if (*matchingpath == OP_FAIL)
+      stacksize = 0;
+    if (*matchingpath == OP_RREF)
+      {
+      stacksize = GET2(matchingpath, 1);
+      if (common->currententry == NULL)
+        stacksize = 0;
+      else if (stacksize == RREF_ANY)
+        stacksize = 1;
+      else if (common->currententry->start == 0)
+        stacksize = stacksize == 0;
+      else
+        stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
+
+      if (stacksize != 0)
+        matchingpath += 1 + IMM2_SIZE;
+      }
+    else
+      {
+      if (common->currententry == NULL || common->currententry->start == 0)
+        stacksize = 0;
+      else
+        {
+        stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
+        slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
+        i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
+        while (stacksize > 0)
+          {
+          if ((int)GET2(slot, 0) == i)
+            break;
+          slot += common->name_entry_size;
+          stacksize--;
+          }
+        }
+
+      if (stacksize != 0)
+        matchingpath += 1 + 2 * IMM2_SIZE;
+      }
+
+      /* The stacksize == 0 is a common "else" case. */
+      if (stacksize == 0)
+        {
+        if (*cc == OP_ALT)
+          {
+          matchingpath = cc + 1 + LINK_SIZE;
+          cc += GET(cc, 1);
+          }
+        else
+          matchingpath = cc;
+        }
+    }
+  else
+    {
+    SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
+    /* Similar code as PUSH_BACKTRACK macro. */
+    assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
+    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
+      return NULL;
+    memset(assert, 0, sizeof(assert_backtrack));
+    assert->common.cc = matchingpath;
+    BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
+    matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
+    }
+  }
+
+compile_matchingpath(common, matchingpath, cc, backtrack);
+if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
+  return NULL;
+
+if (opcode == OP_ONCE)
+  match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
+
+stacksize = 0;
+if (repeat_type == OP_MINUPTO)
+  {
+  /* We need to preserve the counter. TMP2 will be used below. */
+  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
+  stacksize++;
+  }
+if (ket != OP_KET || bra != OP_BRA)
+  stacksize++;
+if (offset != 0)
+  {
+  if (common->capture_last_ptr != 0)
+    stacksize++;
+  if (common->optimized_cbracket[offset >> 1] == 0)
+    stacksize += 2;
+  }
+if (has_alternatives && opcode != OP_ONCE)
+  stacksize++;
+
+if (stacksize > 0)
+  allocate_stack(common, stacksize);
+
+stacksize = 0;
+if (repeat_type == OP_MINUPTO)
+  {
+  /* TMP2 was set above. */
+  OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
+  stacksize++;
+  }
+
+if (ket != OP_KET || bra != OP_BRA)
+  {
+  if (ket != OP_KET)
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
+  else
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
+  stacksize++;
+  }
+
+if (offset != 0)
+  stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
+
+if (has_alternatives)
+  {
+  if (opcode != OP_ONCE)
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
+  if (ket != OP_KETRMAX)
+    BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
+  }
+
+/* Must be after the matchingpath label. */
+if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
+  {
+  SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
+  }
+
+if (ket == OP_KETRMAX)
+  {
+  if (repeat_type != 0)
+    {
+    if (has_alternatives)
+      BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
+    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
+    JUMPTO(SLJIT_NOT_ZERO, rmax_label);
+    /* Drop STR_PTR for greedy plus quantifier. */
+    if (opcode != OP_ONCE)
+      free_stack(common, 1);
+    }
+  else if (opcode == OP_ONCE || opcode >= OP_SBRA)
+    {
+    if (has_alternatives)
+      BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
+    /* Checking zero-length iteration. */
+    if (opcode != OP_ONCE)
+      {
+      CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
+      /* Drop STR_PTR for greedy plus quantifier. */
+      if (bra != OP_BRAZERO)
+        free_stack(common, 1);
+      }
+    else
+      /* TMP2 must contain the starting STR_PTR. */
+      CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
+    }
+  else
+    JUMPTO(SLJIT_JUMP, rmax_label);
+  BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
+  }
+
+if (repeat_type == OP_EXACT)
+  {
+  count_match(common);
+  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
+  JUMPTO(SLJIT_NOT_ZERO, rmax_label);
+  }
+else if (repeat_type == OP_UPTO)
+  {
+  /* We need to preserve the counter. */
+  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
+  allocate_stack(common, 1);
+  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
+  }
+
+if (bra == OP_BRAZERO)
+  BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
+
+if (bra == OP_BRAMINZERO)
+  {
+  /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
+  JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
+  if (braminzero != NULL)
+    {
+    JUMPHERE(braminzero);
+    /* We need to release the end pointer to perform the
+    backtrack for the zero-length iteration. When
+    framesize is < 0, OP_ONCE will do the release itself. */
+    if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
+      {
+      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
+      add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
+      }
+    else if (ket == OP_KETRMIN && opcode != OP_ONCE)
+      free_stack(common, 1);
+    }
+  /* Continue to the normal backtrack. */
+  }
+
+if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
+  count_match(common);
+
+/* Skip the other alternatives. */
+while (*cc == OP_ALT)
+  cc += GET(cc, 1);
+cc += 1 + LINK_SIZE;
+
+/* Temporarily encoding the needs_control_head in framesize. */
+if (opcode == OP_ONCE)
+  BACKTRACK_AS(bracket_backtrack)->u.framesize = (BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
+return cc + repeat_length;
+}
+
+static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
+{
+DEFINE_COMPILER;
+backtrack_common *backtrack;
+pcre_uchar opcode;
+int private_data_ptr;
+int cbraprivptr = 0;
+BOOL needs_control_head;
+int framesize;
+int stacksize;
+int offset = 0;
+BOOL zero = FALSE;
+pcre_uchar *ccbegin = NULL;
+int stack; /* Also contains the offset of control head. */
+struct sljit_label *loop = NULL;
+struct jump_list *emptymatch = NULL;
+
+PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
+if (*cc == OP_BRAPOSZERO)
+  {
+  zero = TRUE;
+  cc++;
+  }
+
+opcode = *cc;
+private_data_ptr = PRIVATE_DATA(cc);
+SLJIT_ASSERT(private_data_ptr != 0);
+BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
+switch(opcode)
+  {
+  case OP_BRAPOS:
+  case OP_SBRAPOS:
+  ccbegin = cc + 1 + LINK_SIZE;
+  break;
+
+  case OP_CBRAPOS:
+  case OP_SCBRAPOS:
+  offset = GET2(cc, 1 + LINK_SIZE);
+  /* This case cannot be optimized in the same was as
+  normal capturing brackets. */
+  SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
+  cbraprivptr = OVECTOR_PRIV(offset);
+  offset <<= 1;
+  ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
+  break;
+
+  default:
+  SLJIT_ASSERT_STOP();
+  break;
+  }
+
+framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
+BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
+if (framesize < 0)
+  {
+  if (offset != 0)
+    {
+    stacksize = 2;
+    if (common->capture_last_ptr != 0)
+      stacksize++;
+    }
+  else
+    stacksize = 1;
+
+  if (needs_control_head)
+    stacksize++;
+  if (!zero)
+    stacksize++;
+
+  BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
+  allocate_stack(common, stacksize);
+  if (framesize == no_frame)
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
+
+  stack = 0;
+  if (offset != 0)
+    {
+    stack = 2;
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
+    if (common->capture_last_ptr != 0)
+      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
+    if (needs_control_head)
+      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
+    if (common->capture_last_ptr != 0)
+      {
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
+      stack = 3;
+      }
+    }
+  else
+    {
+    if (needs_control_head)
+      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
+    stack = 1;
+    }
+
+  if (needs_control_head)
+    stack++;
+  if (!zero)
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
+  if (needs_control_head)
+    {
+    stack--;
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
+    }
+  }
+else
+  {
+  stacksize = framesize + 1;
+  if (!zero)
+    stacksize++;
+  if (needs_control_head)
+    stacksize++;
+  if (offset == 0)
+    stacksize++;
+  BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
+
+  allocate_stack(common, stacksize);
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
+  if (needs_control_head)
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
+  OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
+
+  stack = 0;
+  if (!zero)
+    {
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
+    stack = 1;
+    }
+  if (needs_control_head)
+    {
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
+    stack++;
+    }
+  if (offset == 0)
+    {
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
+    stack++;
+    }
+  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
+  init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize, FALSE);
+  stack -= 1 + (offset == 0);
+  }
+
+if (offset != 0)
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
+
+loop = LABEL();
+while (*cc != OP_KETRPOS)
+  {
+  backtrack->top = NULL;
+  backtrack->topbacktracks = NULL;
+  cc += GET(cc, 1);
+
+  compile_matchingpath(common, ccbegin, cc, backtrack);
+  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
+    return NULL;
+
+  if (framesize < 0)
+    {
+    if (framesize == no_frame)
+      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
+
+    if (offset != 0)
+      {
+      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
+      if (common->capture_last_ptr != 0)
+        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
+      }
+    else
+      {
+      if (opcode == OP_SBRAPOS)
+        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
+      }
+
+    if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
+      add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
+
+    if (!zero)
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
+    }
+  else
+    {
+    if (offset != 0)
+      {
+      OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
+      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
+      if (common->capture_last_ptr != 0)
+        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
+      }
+    else
+      {
+      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
+      OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
+      if (opcode == OP_SBRAPOS)
+        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
+      OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw), STR_PTR, 0);
+      }
+
+    if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
+      add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
+
+    if (!zero)
+      {
+      if (framesize < 0)
+        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
+      else
+        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
+      }
+    }
+
+  if (needs_control_head)
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
+
+  JUMPTO(SLJIT_JUMP, loop);
+  flush_stubs(common);
+
+  compile_backtrackingpath(common, backtrack->top);
+  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
+    return NULL;
+  set_jumps(backtrack->topbacktracks, LABEL());
+
+  if (framesize < 0)
+    {
+    if (offset != 0)
+      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
+    else
+      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+    }
+  else
+    {
+    if (offset != 0)
+      {
+      /* Last alternative. */
+      if (*cc == OP_KETRPOS)
+        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
+      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
+      }
+    else
+      {
+      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
+      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
+      }
+    }
+
+  if (*cc == OP_KETRPOS)
+    break;
+  ccbegin = cc + 1 + LINK_SIZE;
+  }
+
+/* We don't have to restore the control head in case of a failed match. */
+
+backtrack->topbacktracks = NULL;
+if (!zero)
+  {
+  if (framesize < 0)
+    add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
+  else /* TMP2 is set to [private_data_ptr] above. */
+    add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_sw), SLJIT_IMM, 0));
+  }
+
+/* None of them matched. */
+set_jumps(emptymatch, LABEL());
+count_match(common);
+return cc + 1 + LINK_SIZE;
+}
+
+static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *max, int *min, pcre_uchar **end)
+{
+int class_len;
+
+*opcode = *cc;
+if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
+  {
+  cc++;
+  *type = OP_CHAR;
+  }
+else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
+  {
+  cc++;
+  *type = OP_CHARI;
+  *opcode -= OP_STARI - OP_STAR;
+  }
+else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
+  {
+  cc++;
+  *type = OP_NOT;
+  *opcode -= OP_NOTSTAR - OP_STAR;
+  }
+else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
+  {
+  cc++;
+  *type = OP_NOTI;
+  *opcode -= OP_NOTSTARI - OP_STAR;
+  }
+else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
+  {
+  cc++;
+  *opcode -= OP_TYPESTAR - OP_STAR;
+  *type = 0;
+  }
+else
+  {
+  SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
+  *type = *opcode;
+  cc++;
+  class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
+  *opcode = cc[class_len - 1];
+  if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
+    {
+    *opcode -= OP_CRSTAR - OP_STAR;
+    if (end != NULL)
+      *end = cc + class_len;
+    }
+  else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
+    {
+    *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
+    if (end != NULL)
+      *end = cc + class_len;
+    }
+  else
+    {
+    SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
+    *max = GET2(cc, (class_len + IMM2_SIZE));
+    *min = GET2(cc, class_len);
+
+    if (*min == 0)
+      {
+      SLJIT_ASSERT(*max != 0);
+      *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : (*opcode == OP_CRMINRANGE ? OP_MINUPTO : OP_POSUPTO);
+      }
+    if (*max == *min)
+      *opcode = OP_EXACT;
+
+    if (end != NULL)
+      *end = cc + class_len + 2 * IMM2_SIZE;
+    }
+  return cc;
+  }
+
+if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
+  {
+  *max = GET2(cc, 0);
+  cc += IMM2_SIZE;
+  }
+
+if (*type == 0)
+  {
+  *type = *cc;
+  if (end != NULL)
+    *end = next_opcode(common, cc);
+  cc++;
+  return cc;
+  }
+
+if (end != NULL)
+  {
+  *end = cc + 1;
+#ifdef SUPPORT_UTF
+  if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
+#endif
+  }
+return cc;
+}
+
+static pcre_uchar *compile_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
+{
+DEFINE_COMPILER;
+backtrack_common *backtrack;
+pcre_uchar opcode;
+pcre_uchar type;
+int max = -1, min = -1;
+pcre_uchar *end;
+jump_list *nomatch = NULL;
+struct sljit_jump *jump = NULL;
+struct sljit_label *label;
+int private_data_ptr = PRIVATE_DATA(cc);
+int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
+int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
+int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
+int tmp_base, tmp_offset;
+
+PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
+
+cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &min, &end);
+
+switch(type)
+  {
+  case OP_NOT_DIGIT:
+  case OP_DIGIT:
+  case OP_NOT_WHITESPACE:
+  case OP_WHITESPACE:
+  case OP_NOT_WORDCHAR:
+  case OP_WORDCHAR:
+  case OP_ANY:
+  case OP_ALLANY:
+  case OP_ANYBYTE:
+  case OP_ANYNL:
+  case OP_NOT_HSPACE:
+  case OP_HSPACE:
+  case OP_NOT_VSPACE:
+  case OP_VSPACE:
+  case OP_CHAR:
+  case OP_CHARI:
+  case OP_NOT:
+  case OP_NOTI:
+  case OP_CLASS:
+  case OP_NCLASS:
+  tmp_base = TMP3;
+  tmp_offset = 0;
+  break;
+
+  default:
+  SLJIT_ASSERT_STOP();
+  /* Fall through. */
+
+  case OP_EXTUNI:
+  case OP_XCLASS:
+  case OP_NOTPROP:
+  case OP_PROP:
+  tmp_base = SLJIT_MEM1(SLJIT_SP);
+  tmp_offset = POSSESSIVE0;
+  break;
+  }
+
+switch(opcode)
+  {
+  case OP_STAR:
+  case OP_PLUS:
+  case OP_UPTO:
+  case OP_CRRANGE:
+  if (type == OP_ANYNL || type == OP_EXTUNI)
+    {
+    SLJIT_ASSERT(private_data_ptr == 0);
+    if (opcode == OP_STAR || opcode == OP_UPTO)
+      {
+      allocate_stack(common, 2);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
+      }
+    else
+      {
+      allocate_stack(common, 1);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
+      }
+
+    if (opcode == OP_UPTO || opcode == OP_CRRANGE)
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
+
+    label = LABEL();
+    compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
+    if (opcode == OP_UPTO || opcode == OP_CRRANGE)
+      {
+      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
+      OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
+      if (opcode == OP_CRRANGE && min > 0)
+        CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
+      if (opcode == OP_UPTO || (opcode == OP_CRRANGE && max > 0))
+        jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
+      }
+
+    /* We cannot use TMP3 because of this allocate_stack. */
+    allocate_stack(common, 1);
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
+    JUMPTO(SLJIT_JUMP, label);
+    if (jump != NULL)
+      JUMPHERE(jump);
+    }
+  else
+    {
+    if (opcode == OP_PLUS)
+      compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
+    if (private_data_ptr == 0)
+      allocate_stack(common, 2);
+    OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
+    if (opcode <= OP_PLUS)
+      OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
+    else
+      OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
+    label = LABEL();
+    compile_char1_matchingpath(common, type, cc, &nomatch);
+    OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
+    if (opcode <= OP_PLUS)
+      JUMPTO(SLJIT_JUMP, label);
+    else if (opcode == OP_CRRANGE && max == 0)
+      {
+      OP2(SLJIT_ADD, base, offset1, base, offset1, SLJIT_IMM, 1);
+      JUMPTO(SLJIT_JUMP, label);
+      }
+    else
+      {
+      OP1(SLJIT_MOV, TMP1, 0, base, offset1);
+      OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
+      OP1(SLJIT_MOV, base, offset1, TMP1, 0);
+      CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, max + 1, label);
+      }
+    set_jumps(nomatch, LABEL());
+    if (opcode == OP_CRRANGE)
+      add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS, base, offset1, SLJIT_IMM, min + 1));
+    OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
+    }
+  BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
+  break;
+
+  case OP_MINSTAR:
+  case OP_MINPLUS:
+  if (opcode == OP_MINPLUS)
+    compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
+  if (private_data_ptr == 0)
+    allocate_stack(common, 1);
+  OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
+  BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
+  break;
+
+  case OP_MINUPTO:
+  case OP_CRMINRANGE:
+  if (private_data_ptr == 0)
+    allocate_stack(common, 2);
+  OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
+  OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
+  if (opcode == OP_CRMINRANGE)
+    add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
+  BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
+  break;
+
+  case OP_QUERY:
+  case OP_MINQUERY:
+  if (private_data_ptr == 0)
+    allocate_stack(common, 1);
+  OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
+  if (opcode == OP_QUERY)
+    compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
+  BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
+  break;
+
+  case OP_EXACT:
+  OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
+  label = LABEL();
+  compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
+  OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
+  JUMPTO(SLJIT_NOT_ZERO, label);
+  break;
+
+  case OP_POSSTAR:
+  case OP_POSPLUS:
+  case OP_POSUPTO:
+  if (opcode == OP_POSPLUS)
+    compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
+  if (opcode == OP_POSUPTO)
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_IMM, max);
+  OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
+  label = LABEL();
+  compile_char1_matchingpath(common, type, cc, &nomatch);
+  OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
+  if (opcode != OP_POSUPTO)
+    JUMPTO(SLJIT_JUMP, label);
+  else
+    {
+    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_IMM, 1);
+    JUMPTO(SLJIT_NOT_ZERO, label);
+    }
+  set_jumps(nomatch, LABEL());
+  OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
+  break;
+
+  case OP_POSQUERY:
+  OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
+  compile_char1_matchingpath(common, type, cc, &nomatch);
+  OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
+  set_jumps(nomatch, LABEL());
+  OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
+  break;
+
+  case OP_CRPOSRANGE:
+  /* Combination of OP_EXACT and OP_POSSTAR or OP_POSUPTO */
+  OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, min);
+  label = LABEL();
+  compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
+  OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
+  JUMPTO(SLJIT_NOT_ZERO, label);
+
+  if (max != 0)
+    {
+    SLJIT_ASSERT(max - min > 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_IMM, max - min);
+    }
+  OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
+  label = LABEL();
+  compile_char1_matchingpath(common, type, cc, &nomatch);
+  OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
+  if (max == 0)
+    JUMPTO(SLJIT_JUMP, label);
+  else
+    {
+    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_IMM, 1);
+    JUMPTO(SLJIT_NOT_ZERO, label);
+    }
+  set_jumps(nomatch, LABEL());
+  OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
+  break;
+
+  default:
+  SLJIT_ASSERT_STOP();
+  break;
+  }
+
+count_match(common);
+return end;
+}
+
+static SLJIT_INLINE pcre_uchar *compile_fail_accept_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
+{
+DEFINE_COMPILER;
+backtrack_common *backtrack;
+
+PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
+
+if (*cc == OP_FAIL)
+  {
+  add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
+  return cc + 1;
+  }
+
+if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
+  {
+  /* No need to check notempty conditions. */
+  if (common->accept_label == NULL)
+    add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
+  else
+    JUMPTO(SLJIT_JUMP, common->accept_label);
+  return cc + 1;
+  }
+
+if (common->accept_label == NULL)
+  add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));
+else
+  CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
+OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
+OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
+add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
+OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
+if (common->accept_label == NULL)
+  add_jump(compiler, &common->accept, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
+else
+  CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->accept_label);
+OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
+if (common->accept_label == NULL)
+  add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
+else
+  CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
+add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
+return cc + 1;
+}
+
+static SLJIT_INLINE pcre_uchar *compile_close_matchingpath(compiler_common *common, pcre_uchar *cc)
+{
+DEFINE_COMPILER;
+int offset = GET2(cc, 1);
+BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
+
+/* Data will be discarded anyway... */
+if (common->currententry != NULL)
+  return cc + 1 + IMM2_SIZE;
+
+if (!optimized_cbracket)
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));
+offset <<= 1;
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
+if (!optimized_cbracket)
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
+return cc + 1 + IMM2_SIZE;
+}
+
+static SLJIT_INLINE pcre_uchar *compile_control_verb_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
+{
+DEFINE_COMPILER;
+backtrack_common *backtrack;
+pcre_uchar opcode = *cc;
+pcre_uchar *ccend = cc + 1;
+
+if (opcode == OP_PRUNE_ARG || opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
+  ccend += 2 + cc[1];
+
+PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
+
+if (opcode == OP_SKIP)
+  {
+  allocate_stack(common, 1);
+  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
+  return ccend;
+  }
+
+if (opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
+  {
+  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
+  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
+  OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
+  }
+
+return ccend;
+}
+
+static pcre_uchar then_trap_opcode[1] = { OP_THEN_TRAP };
+
+static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
+{
+DEFINE_COMPILER;
+backtrack_common *backtrack;
+BOOL needs_control_head;
+int size;
+
+PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
+common->then_trap = BACKTRACK_AS(then_trap_backtrack);
+BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
+BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
+BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
+
+size = BACKTRACK_AS(then_trap_backtrack)->framesize;
+size = 3 + (size < 0 ? 0 : size);
+
+OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
+allocate_stack(common, size);
+if (size > 3)
+  OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
+else
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
+OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
+OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
+OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
+
+size = BACKTRACK_AS(then_trap_backtrack)->framesize;
+if (size >= 0)
+  init_frame(common, cc, ccend, size - 1, 0, FALSE);
+}
+
+static void compile_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
+{
+DEFINE_COMPILER;
+backtrack_common *backtrack;
+BOOL has_then_trap = FALSE;
+then_trap_backtrack *save_then_trap = NULL;
+
+SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
+
+if (common->has_then && common->then_offsets[cc - common->start] != 0)
+  {
+  SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
+  has_then_trap = TRUE;
+  save_then_trap = common->then_trap;
+  /* Tail item on backtrack. */
+  compile_then_trap_matchingpath(common, cc, ccend, parent);
+  }
+
+while (cc < ccend)
+  {
+  switch(*cc)
+    {
+    case OP_SOD:
+    case OP_SOM:
+    case OP_NOT_WORD_BOUNDARY:
+    case OP_WORD_BOUNDARY:
+    case OP_NOT_DIGIT:
+    case OP_DIGIT:
+    case OP_NOT_WHITESPACE:
+    case OP_WHITESPACE:
+    case OP_NOT_WORDCHAR:
+    case OP_WORDCHAR:
+    case OP_ANY:
+    case OP_ALLANY:
+    case OP_ANYBYTE:
+    case OP_NOTPROP:
+    case OP_PROP:
+    case OP_ANYNL:
+    case OP_NOT_HSPACE:
+    case OP_HSPACE:
+    case OP_NOT_VSPACE:
+    case OP_VSPACE:
+    case OP_EXTUNI:
+    case OP_EODN:
+    case OP_EOD:
+    case OP_CIRC:
+    case OP_CIRCM:
+    case OP_DOLL:
+    case OP_DOLLM:
+    case OP_NOT:
+    case OP_NOTI:
+    case OP_REVERSE:
+    cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
+    break;
+
+    case OP_SET_SOM:
+    PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
+    allocate_stack(common, 1);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
+    cc++;
+    break;
+
+    case OP_CHAR:
+    case OP_CHARI:
+    if (common->mode == JIT_COMPILE)
+      cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
+    else
+      cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
+    break;
+
+    case OP_STAR:
+    case OP_MINSTAR:
+    case OP_PLUS:
+    case OP_MINPLUS:
+    case OP_QUERY:
+    case OP_MINQUERY:
+    case OP_UPTO:
+    case OP_MINUPTO:
+    case OP_EXACT:
+    case OP_POSSTAR:
+    case OP_POSPLUS:
+    case OP_POSQUERY:
+    case OP_POSUPTO:
+    case OP_STARI:
+    case OP_MINSTARI:
+    case OP_PLUSI:
+    case OP_MINPLUSI:
+    case OP_QUERYI:
+    case OP_MINQUERYI:
+    case OP_UPTOI:
+    case OP_MINUPTOI:
+    case OP_EXACTI:
+    case OP_POSSTARI:
+    case OP_POSPLUSI:
+    case OP_POSQUERYI:
+    case OP_POSUPTOI:
+    case OP_NOTSTAR:
+    case OP_NOTMINSTAR:
+    case OP_NOTPLUS:
+    case OP_NOTMINPLUS:
+    case OP_NOTQUERY:
+    case OP_NOTMINQUERY:
+    case OP_NOTUPTO:
+    case OP_NOTMINUPTO:
+    case OP_NOTEXACT:
+    case OP_NOTPOSSTAR:
+    case OP_NOTPOSPLUS:
+    case OP_NOTPOSQUERY:
+    case OP_NOTPOSUPTO:
+    case OP_NOTSTARI:
+    case OP_NOTMINSTARI:
+    case OP_NOTPLUSI:
+    case OP_NOTMINPLUSI:
+    case OP_NOTQUERYI:
+    case OP_NOTMINQUERYI:
+    case OP_NOTUPTOI:
+    case OP_NOTMINUPTOI:
+    case OP_NOTEXACTI:
+    case OP_NOTPOSSTARI:
+    case OP_NOTPOSPLUSI:
+    case OP_NOTPOSQUERYI:
+    case OP_NOTPOSUPTOI:
+    case OP_TYPESTAR:
+    case OP_TYPEMINSTAR:
+    case OP_TYPEPLUS:
+    case OP_TYPEMINPLUS:
+    case OP_TYPEQUERY:
+    case OP_TYPEMINQUERY:
+    case OP_TYPEUPTO:
+    case OP_TYPEMINUPTO:
+    case OP_TYPEEXACT:
+    case OP_TYPEPOSSTAR:
+    case OP_TYPEPOSPLUS:
+    case OP_TYPEPOSQUERY:
+    case OP_TYPEPOSUPTO:
+    cc = compile_iterator_matchingpath(common, cc, parent);
+    break;
+
+    case OP_CLASS:
+    case OP_NCLASS:
+    if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRPOSRANGE)
+      cc = compile_iterator_matchingpath(common, cc, parent);
+    else
+      cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
+    break;
+
+#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
+    case OP_XCLASS:
+    if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
+      cc = compile_iterator_matchingpath(common, cc, parent);
+    else
+      cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
+    break;
+#endif
+
+    case OP_REF:
+    case OP_REFI:
+    if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
+      cc = compile_ref_iterator_matchingpath(common, cc, parent);
+    else
+      {
+      compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
+      cc += 1 + IMM2_SIZE;
+      }
+    break;
+
+    case OP_DNREF:
+    case OP_DNREFI:
+    if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
+      cc = compile_ref_iterator_matchingpath(common, cc, parent);
+    else
+      {
+      compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
+      compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
+      cc += 1 + 2 * IMM2_SIZE;
+      }
+    break;
+
+    case OP_RECURSE:
+    cc = compile_recurse_matchingpath(common, cc, parent);
+    break;
+
+    case OP_CALLOUT:
+    cc = compile_callout_matchingpath(common, cc, parent);
+    break;
+
+    case OP_ASSERT:
+    case OP_ASSERT_NOT:
+    case OP_ASSERTBACK:
+    case OP_ASSERTBACK_NOT:
+    PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
+    cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
+    break;
+
+    case OP_BRAMINZERO:
+    PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
+    cc = bracketend(cc + 1);
+    if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
+      {
+      allocate_stack(common, 1);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
+      }
+    else
+      {
+      allocate_stack(common, 2);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
+      }
+    BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
+    if (cc[1] > OP_ASSERTBACK_NOT)
+      count_match(common);
+    break;
+
+    case OP_ONCE:
+    case OP_ONCE_NC:
+    case OP_BRA:
+    case OP_CBRA:
+    case OP_COND:
+    case OP_SBRA:
+    case OP_SCBRA:
+    case OP_SCOND:
+    cc = compile_bracket_matchingpath(common, cc, parent);
+    break;
+
+    case OP_BRAZERO:
+    if (cc[1] > OP_ASSERTBACK_NOT)
+      cc = compile_bracket_matchingpath(common, cc, parent);
+    else
+      {
+      PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
+      cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
+      }
+    break;
+
+    case OP_BRAPOS:
+    case OP_CBRAPOS:
+    case OP_SBRAPOS:
+    case OP_SCBRAPOS:
+    case OP_BRAPOSZERO:
+    cc = compile_bracketpos_matchingpath(common, cc, parent);
+    break;
+
+    case OP_MARK:
+    PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
+    SLJIT_ASSERT(common->mark_ptr != 0);
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
+    allocate_stack(common, common->has_skip_arg ? 5 : 1);
+    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
+    if (common->has_skip_arg)
+      {
+      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
+      }
+    cc += 1 + 2 + cc[1];
+    break;
+
+    case OP_PRUNE:
+    case OP_PRUNE_ARG:
+    case OP_SKIP:
+    case OP_SKIP_ARG:
+    case OP_THEN:
+    case OP_THEN_ARG:
+    case OP_COMMIT:
+    cc = compile_control_verb_matchingpath(common, cc, parent);
+    break;
+
+    case OP_FAIL:
+    case OP_ACCEPT:
+    case OP_ASSERT_ACCEPT:
+    cc = compile_fail_accept_matchingpath(common, cc, parent);
+    break;
+
+    case OP_CLOSE:
+    cc = compile_close_matchingpath(common, cc);
+    break;
+
+    case OP_SKIPZERO:
+    cc = bracketend(cc + 1);
+    break;
+
+    default:
+    SLJIT_ASSERT_STOP();
+    return;
+    }
+  if (cc == NULL)
+    return;
+  }
+
+if (has_then_trap)
+  {
+  /* Head item on backtrack. */
+  PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
+  BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
+  BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
+  common->then_trap = save_then_trap;
+  }
+SLJIT_ASSERT(cc == ccend);
+}
+
+#undef PUSH_BACKTRACK
+#undef PUSH_BACKTRACK_NOVALUE
+#undef BACKTRACK_AS
+
+#define COMPILE_BACKTRACKINGPATH(current) \
+  do \
+    { \
+    compile_backtrackingpath(common, (current)); \
+    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
+      return; \
+    } \
+  while (0)
+
+#define CURRENT_AS(type) ((type *)current)
+
+static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
+{
+DEFINE_COMPILER;
+pcre_uchar *cc = current->cc;
+pcre_uchar opcode;
+pcre_uchar type;
+int max = -1, min = -1;
+struct sljit_label *label = NULL;
+struct sljit_jump *jump = NULL;
+jump_list *jumplist = NULL;
+int private_data_ptr = PRIVATE_DATA(cc);
+int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
+int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
+int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
+
+cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &min, NULL);
+
+switch(opcode)
+  {
+  case OP_STAR:
+  case OP_PLUS:
+  case OP_UPTO:
+  case OP_CRRANGE:
+  if (type == OP_ANYNL || type == OP_EXTUNI)
+    {
+    SLJIT_ASSERT(private_data_ptr == 0);
+    set_jumps(current->topbacktracks, LABEL());
+    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+    free_stack(common, 1);
+    CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
+    }
+  else
+    {
+    if (opcode == OP_UPTO)
+      min = 0;
+    if (opcode <= OP_PLUS)
+      {
+      OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
+      jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
+      }
+    else
+      {
+      OP1(SLJIT_MOV, TMP1, 0, base, offset1);
+      OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
+      jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, min + 1);
+      OP2(SLJIT_SUB, base, offset1, TMP1, 0, SLJIT_IMM, 1);
+      }
+    skip_char_back(common);
+    OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
+    JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
+    if (opcode == OP_CRRANGE)
+      set_jumps(current->topbacktracks, LABEL());
+    JUMPHERE(jump);
+    if (private_data_ptr == 0)
+      free_stack(common, 2);
+    if (opcode == OP_PLUS)
+      set_jumps(current->topbacktracks, LABEL());
+    }
+  break;
+
+  case OP_MINSTAR:
+  case OP_MINPLUS:
+  OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
+  compile_char1_matchingpath(common, type, cc, &jumplist);
+  OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
+  JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
+  set_jumps(jumplist, LABEL());
+  if (private_data_ptr == 0)
+    free_stack(common, 1);
+  if (opcode == OP_MINPLUS)
+    set_jumps(current->topbacktracks, LABEL());
+  break;
+
+  case OP_MINUPTO:
+  case OP_CRMINRANGE:
+  if (opcode == OP_CRMINRANGE)
+    {
+    label = LABEL();
+    set_jumps(current->topbacktracks, label);
+    }
+  OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
+  compile_char1_matchingpath(common, type, cc, &jumplist);
+
+  OP1(SLJIT_MOV, TMP1, 0, base, offset1);
+  OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
+  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
+  OP1(SLJIT_MOV, base, offset1, TMP1, 0);
+
+  if (opcode == OP_CRMINRANGE)
+    CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min + 1, label);
+
+  if (opcode == OP_CRMINRANGE && max == 0)
+    JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
+  else
+    CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, max + 2, CURRENT_AS(iterator_backtrack)->matchingpath);
+
+  set_jumps(jumplist, LABEL());
+  if (private_data_ptr == 0)
+    free_stack(common, 2);
+  break;
+
+  case OP_QUERY:
+  OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
+  OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
+  CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
+  jump = JUMP(SLJIT_JUMP);
+  set_jumps(current->topbacktracks, LABEL());
+  OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
+  OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
+  JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
+  JUMPHERE(jump);
+  if (private_data_ptr == 0)
+    free_stack(common, 1);
+  break;
+
+  case OP_MINQUERY:
+  OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
+  OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
+  jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
+  compile_char1_matchingpath(common, type, cc, &jumplist);
+  JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
+  set_jumps(jumplist, LABEL());
+  JUMPHERE(jump);
+  if (private_data_ptr == 0)
+    free_stack(common, 1);
+  break;
+
+  case OP_EXACT:
+  case OP_POSPLUS:
+  case OP_CRPOSRANGE:
+  set_jumps(current->topbacktracks, LABEL());
+  break;
+
+  case OP_POSSTAR:
+  case OP_POSQUERY:
+  case OP_POSUPTO:
+  break;
+
+  default:
+  SLJIT_ASSERT_STOP();
+  break;
+  }
+}
+
+static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
+{
+DEFINE_COMPILER;
+pcre_uchar *cc = current->cc;
+BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
+pcre_uchar type;
+
+type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
+
+if ((type & 0x1) == 0)
+  {
+  /* Maximize case. */
+  set_jumps(current->topbacktracks, LABEL());
+  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+  free_stack(common, 1);
+  CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
+  return;
+  }
+
+OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
+set_jumps(current->topbacktracks, LABEL());
+free_stack(common, ref ? 2 : 3);
+}
+
+static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
+{
+DEFINE_COMPILER;
+
+if (CURRENT_AS(recurse_backtrack)->inlined_pattern)
+  compile_backtrackingpath(common, current->top);
+set_jumps(current->topbacktracks, LABEL());
+if (CURRENT_AS(recurse_backtrack)->inlined_pattern)
+  return;
+
+if (common->has_set_som && common->mark_ptr != 0)
+  {
+  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
+  free_stack(common, 2);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP2, 0);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
+  }
+else if (common->has_set_som || common->mark_ptr != 0)
+  {
+  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+  free_stack(common, 1);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr, TMP2, 0);
+  }
+}
+
+static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
+{
+DEFINE_COMPILER;
+pcre_uchar *cc = current->cc;
+pcre_uchar bra = OP_BRA;
+struct sljit_jump *brajump = NULL;
+
+SLJIT_ASSERT(*cc != OP_BRAMINZERO);
+if (*cc == OP_BRAZERO)
+  {
+  bra = *cc;
+  cc++;
+  }
+
+if (bra == OP_BRAZERO)
+  {
+  SLJIT_ASSERT(current->topbacktracks == NULL);
+  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+  }
+
+if (CURRENT_AS(assert_backtrack)->framesize < 0)
+  {
+  set_jumps(current->topbacktracks, LABEL());
+
+  if (bra == OP_BRAZERO)
+    {
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
+    CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
+    free_stack(common, 1);
+    }
+  return;
+  }
+
+if (bra == OP_BRAZERO)
+  {
+  if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
+    {
+    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
+    CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
+    free_stack(common, 1);
+    return;
+    }
+  free_stack(common, 1);
+  brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
+  }
+
+if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
+  {
+  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
+  add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(assert_backtrack)->framesize * sizeof(sljit_sw));
+
+  set_jumps(current->topbacktracks, LABEL());
+  }
+else
+  set_jumps(current->topbacktracks, LABEL());
+
+if (bra == OP_BRAZERO)
+  {
+  /* We know there is enough place on the stack. */
+  OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
+  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
+  JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
+  JUMPHERE(brajump);
+  }
+}
+
+static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
+{
+DEFINE_COMPILER;
+int opcode, stacksize, alt_count, alt_max;
+int offset = 0;
+int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
+int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
+pcre_uchar *cc = current->cc;
+pcre_uchar *ccbegin;
+pcre_uchar *ccprev;
+pcre_uchar bra = OP_BRA;
+pcre_uchar ket;
+assert_backtrack *assert;
+sljit_uw *next_update_addr = NULL;
+BOOL has_alternatives;
+BOOL needs_control_head = FALSE;
+struct sljit_jump *brazero = NULL;
+struct sljit_jump *alt1 = NULL;
+struct sljit_jump *alt2 = NULL;
+struct sljit_jump *once = NULL;
+struct sljit_jump *cond = NULL;
+struct sljit_label *rmin_label = NULL;
+struct sljit_label *exact_label = NULL;
+
+if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
+  {
+  bra = *cc;
+  cc++;
+  }
+
+opcode = *cc;
+ccbegin = bracketend(cc) - 1 - LINK_SIZE;
+ket = *ccbegin;
+if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
+  {
+  repeat_ptr = PRIVATE_DATA(ccbegin);
+  repeat_type = PRIVATE_DATA(ccbegin + 2);
+  repeat_count = PRIVATE_DATA(ccbegin + 3);
+  SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
+  if (repeat_type == OP_UPTO)
+    ket = OP_KETRMAX;
+  if (repeat_type == OP_MINUPTO)
+    ket = OP_KETRMIN;
+  }
+ccbegin = cc;
+cc += GET(cc, 1);
+has_alternatives = *cc == OP_ALT;
+if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
+  has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
+if (opcode == OP_CBRA || opcode == OP_SCBRA)
+  offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
+if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
+  opcode = OP_SCOND;
+if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
+  opcode = OP_ONCE;
+
+alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
+
+/* Decoding the needs_control_head in framesize. */
+if (opcode == OP_ONCE)
+  {
+  needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
+  CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
+  }
+
+if (ket != OP_KET && repeat_type != 0)
+  {
+  /* TMP1 is used in OP_KETRMIN below. */
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+  free_stack(common, 1);
+  if (repeat_type == OP_UPTO)
+    OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
+  else
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
+  }
+
+if (ket == OP_KETRMAX)
+  {
+  if (bra == OP_BRAZERO)
+    {
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+    free_stack(common, 1);
+    brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
+    }
+  }
+else if (ket == OP_KETRMIN)
+  {
+  if (bra != OP_BRAMINZERO)
+    {
+    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+    if (repeat_type != 0)
+      {
+      /* TMP1 was set a few lines above. */
+      CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
+      /* Drop STR_PTR for non-greedy plus quantifier. */
+      if (opcode != OP_ONCE)
+        free_stack(common, 1);
+      }
+    else if (opcode >= OP_SBRA || opcode == OP_ONCE)
+      {
+      /* Checking zero-length iteration. */
+      if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
+        CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
+      else
+        {
+        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
+        CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (CURRENT_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
+        }
+      /* Drop STR_PTR for non-greedy plus quantifier. */
+      if (opcode != OP_ONCE)
+        free_stack(common, 1);
+      }
+    else
+      JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
+    }
+  rmin_label = LABEL();
+  if (repeat_type != 0)
+    OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
+  }
+else if (bra == OP_BRAZERO)
+  {
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+  free_stack(common, 1);
+  brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
+  }
+else if (repeat_type == OP_EXACT)
+  {
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
+  exact_label = LABEL();
+  }
+
+if (offset != 0)
+  {
+  if (common->capture_last_ptr != 0)
+    {
+    SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
+    free_stack(common, 3);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
+    }
+  else if (common->optimized_cbracket[offset >> 1] == 0)
+    {
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
+    free_stack(common, 2);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
+    }
+  }
+
+if (SLJIT_UNLIKELY(opcode == OP_ONCE))
+  {
+  if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
+    {
+    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
+    add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
+    }
+  once = JUMP(SLJIT_JUMP);
+  }
+else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
+  {
+  if (has_alternatives)
+    {
+    /* Always exactly one alternative. */
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+    free_stack(common, 1);
+
+    alt_max = 2;
+    alt1 = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
+    }
+  }
+else if (has_alternatives)
+  {
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+  free_stack(common, 1);
+
+  if (alt_max > 4)
+    {
+    /* Table jump if alt_max is greater than 4. */
+    next_update_addr = allocate_read_only_data(common, alt_max * sizeof(sljit_uw));
+    if (SLJIT_UNLIKELY(next_update_addr == NULL))
+      return;
+    sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM1(TMP1), (sljit_sw)next_update_addr);
+    add_label_addr(common, next_update_addr++);
+    }
+  else
+    {
+    if (alt_max == 4)
+      alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
+    alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
+    }
+  }
+
+COMPILE_BACKTRACKINGPATH(current->top);
+if (current->topbacktracks)
+  set_jumps(current->topbacktracks, LABEL());
+
+if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
+  {
+  /* Conditional block always has at most one alternative. */
+  if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
+    {
+    SLJIT_ASSERT(has_alternatives);
+    assert = CURRENT_AS(bracket_backtrack)->u.assert;
+    if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
+      {
+      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
+      add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_sw));
+      }
+    cond = JUMP(SLJIT_JUMP);
+    set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
+    }
+  else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
+    {
+    SLJIT_ASSERT(has_alternatives);
+    cond = JUMP(SLJIT_JUMP);
+    set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
+    }
+  else
+    SLJIT_ASSERT(!has_alternatives);
+  }
+
+if (has_alternatives)
+  {
+  alt_count = sizeof(sljit_uw);
+  do
+    {
+    current->top = NULL;
+    current->topbacktracks = NULL;
+    current->nextbacktracks = NULL;
+    /* Conditional blocks always have an additional alternative, even if it is empty. */
+    if (*cc == OP_ALT)
+      {
+      ccprev = cc + 1 + LINK_SIZE;
+      cc += GET(cc, 1);
+      if (opcode != OP_COND && opcode != OP_SCOND)
+        {
+        if (opcode != OP_ONCE)
+          {
+          if (private_data_ptr != 0)
+            OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
+          else
+            OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+          }
+        else
+          OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
+        }
+      compile_matchingpath(common, ccprev, cc, current);
+      if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
+        return;
+      }
+
+    /* Instructions after the current alternative is successfully matched. */
+    /* There is a similar code in compile_bracket_matchingpath. */
+    if (opcode == OP_ONCE)
+      match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
+
+    stacksize = 0;
+    if (repeat_type == OP_MINUPTO)
+      {
+      /* We need to preserve the counter. TMP2 will be used below. */
+      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
+      stacksize++;
+      }
+    if (ket != OP_KET || bra != OP_BRA)
+      stacksize++;
+    if (offset != 0)
+      {
+      if (common->capture_last_ptr != 0)
+        stacksize++;
+      if (common->optimized_cbracket[offset >> 1] == 0)
+        stacksize += 2;
+      }
+    if (opcode != OP_ONCE)
+      stacksize++;
+
+    if (stacksize > 0)
+      allocate_stack(common, stacksize);
+
+    stacksize = 0;
+    if (repeat_type == OP_MINUPTO)
+      {
+      /* TMP2 was set above. */
+      OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
+      stacksize++;
+      }
+
+    if (ket != OP_KET || bra != OP_BRA)
+      {
+      if (ket != OP_KET)
+        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
+      else
+        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
+      stacksize++;
+      }
+
+    if (offset != 0)
+      stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
+
+    if (opcode != OP_ONCE)
+      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
+
+    if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
+      {
+      /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
+      SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
+      }
+
+    JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
+
+    if (opcode != OP_ONCE)
+      {
+      if (alt_max > 4)
+        add_label_addr(common, next_update_addr++);
+      else
+        {
+        if (alt_count != 2 * sizeof(sljit_uw))
+          {
+          JUMPHERE(alt1);
+          if (alt_max == 3 && alt_count == sizeof(sljit_uw))
+            alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
+          }
+        else
+          {
+          JUMPHERE(alt2);
+          if (alt_max == 4)
+            alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_uw));
+          }
+        }
+      alt_count += sizeof(sljit_uw);
+      }
+
+    COMPILE_BACKTRACKINGPATH(current->top);
+    if (current->topbacktracks)
+      set_jumps(current->topbacktracks, LABEL());
+    SLJIT_ASSERT(!current->nextbacktracks);
+    }
+  while (*cc == OP_ALT);
+
+  if (cond != NULL)
+    {
+    SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
+    assert = CURRENT_AS(bracket_backtrack)->u.assert;
+    if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
+      {
+      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
+      add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_sw));
+      }
+    JUMPHERE(cond);
+    }
+
+  /* Free the STR_PTR. */
+  if (private_data_ptr == 0)
+    free_stack(common, 1);
+  }
+
+if (offset != 0)
+  {
+  /* Using both tmp register is better for instruction scheduling. */
+  if (common->optimized_cbracket[offset >> 1] != 0)
+    {
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
+    free_stack(common, 2);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
+    }
+  else
+    {
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+    free_stack(common, 1);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
+    }
+  }
+else if (opcode == OP_SBRA || opcode == OP_SCOND)
+  {
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
+  free_stack(common, 1);
+  }
+else if (opcode == OP_ONCE)
+  {
+  cc = ccbegin + GET(ccbegin, 1);
+  stacksize = needs_control_head ? 1 : 0;
+
+  if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
+    {
+    /* Reset head and drop saved frame. */
+    stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
+    }
+  else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
+    {
+    /* The STR_PTR must be released. */
+    stacksize++;
+    }
+  free_stack(common, stacksize);
+
+  JUMPHERE(once);
+  /* Restore previous private_data_ptr */
+  if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracket_backtrack)->u.framesize * sizeof(sljit_sw));
+  else if (ket == OP_KETRMIN)
+    {
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
+    /* See the comment below. */
+    free_stack(common, 2);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
+    }
+  }
+
+if (repeat_type == OP_EXACT)
+  {
+  OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
+  CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
+  }
+else if (ket == OP_KETRMAX)
+  {
+  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+  if (bra != OP_BRAZERO)
+    free_stack(common, 1);
+
+  CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
+  if (bra == OP_BRAZERO)
+    {
+    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
+    JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
+    JUMPHERE(brazero);
+    free_stack(common, 1);
+    }
+  }
+else if (ket == OP_KETRMIN)
+  {
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+
+  /* OP_ONCE removes everything in case of a backtrack, so we don't
+  need to explicitly release the STR_PTR. The extra release would
+  affect badly the free_stack(2) above. */
+  if (opcode != OP_ONCE)
+    free_stack(common, 1);
+  CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
+  if (opcode == OP_ONCE)
+    free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
+  else if (bra == OP_BRAMINZERO)
+    free_stack(common, 1);
+  }
+else if (bra == OP_BRAZERO)
+  {
+  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+  JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
+  JUMPHERE(brazero);
+  }
+}
+
+static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
+{
+DEFINE_COMPILER;
+int offset;
+struct sljit_jump *jump;
+
+if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
+  {
+  if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
+    {
+    offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
+    if (common->capture_last_ptr != 0)
+      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
+    if (common->capture_last_ptr != 0)
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
+    }
+  set_jumps(current->topbacktracks, LABEL());
+  free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
+  return;
+  }
+
+OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
+add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
+
+if (current->topbacktracks)
+  {
+  jump = JUMP(SLJIT_JUMP);
+  set_jumps(current->topbacktracks, LABEL());
+  /* Drop the stack frame. */
+  free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
+  JUMPHERE(jump);
+  }
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracketpos_backtrack)->framesize * sizeof(sljit_sw));
+}
+
+static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
+{
+assert_backtrack backtrack;
+
+current->top = NULL;
+current->topbacktracks = NULL;
+current->nextbacktracks = NULL;
+if (current->cc[1] > OP_ASSERTBACK_NOT)
+  {
+  /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
+  compile_bracket_matchingpath(common, current->cc, current);
+  compile_bracket_backtrackingpath(common, current->top);
+  }
+else
+  {
+  memset(&backtrack, 0, sizeof(backtrack));
+  backtrack.common.cc = current->cc;
+  backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
+  /* Manual call of compile_assert_matchingpath. */
+  compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
+  }
+SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks);
+}
+
+static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
+{
+DEFINE_COMPILER;
+pcre_uchar opcode = *current->cc;
+struct sljit_label *loop;
+struct sljit_jump *jump;
+
+if (opcode == OP_THEN || opcode == OP_THEN_ARG)
+  {
+  if (common->then_trap != NULL)
+    {
+    SLJIT_ASSERT(common->control_head_ptr != 0);
+
+    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
+    jump = JUMP(SLJIT_JUMP);
+
+    loop = LABEL();
+    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), -(int)sizeof(sljit_sw));
+    JUMPHERE(jump);
+    CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), -(int)(2 * sizeof(sljit_sw)), TMP1, 0, loop);
+    CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), -(int)(3 * sizeof(sljit_sw)), TMP2, 0, loop);
+    add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
+    return;
+    }
+  else if (common->positive_assert)
+    {
+    add_jump(compiler, &common->positive_assert_quit, JUMP(SLJIT_JUMP));
+    return;
+    }
+  }
+
+if (common->local_exit)
+  {
+  if (common->quit_label == NULL)
+    add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
+  else
+    JUMPTO(SLJIT_JUMP, common->quit_label);
+  return;
+  }
+
+if (opcode == OP_SKIP_ARG)
+  {
+  SLJIT_ASSERT(common->control_head_ptr != 0);
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
+  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
+  sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark));
+  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
+
+  OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
+  add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, -1));
+  return;
+  }
+
+if (opcode == OP_SKIP)
+  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+else
+  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
+add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
+}
+
+static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
+{
+DEFINE_COMPILER;
+struct sljit_jump *jump;
+int size;
+
+if (CURRENT_AS(then_trap_backtrack)->then_trap)
+  {
+  common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
+  return;
+  }
+
+size = CURRENT_AS(then_trap_backtrack)->framesize;
+size = 3 + (size < 0 ? 0 : size);
+
+OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
+free_stack(common, size);
+jump = JUMP(SLJIT_JUMP);
+
+set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
+/* STACK_TOP is set by THEN. */
+if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
+  add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
+OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+free_stack(common, 3);
+
+JUMPHERE(jump);
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
+}
+
+static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
+{
+DEFINE_COMPILER;
+then_trap_backtrack *save_then_trap = common->then_trap;
+
+while (current)
+  {
+  if (current->nextbacktracks != NULL)
+    set_jumps(current->nextbacktracks, LABEL());
+  switch(*current->cc)
+    {
+    case OP_SET_SOM:
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+    free_stack(common, 1);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);
+    break;
+
+    case OP_STAR:
+    case OP_MINSTAR:
+    case OP_PLUS:
+    case OP_MINPLUS:
+    case OP_QUERY:
+    case OP_MINQUERY:
+    case OP_UPTO:
+    case OP_MINUPTO:
+    case OP_EXACT:
+    case OP_POSSTAR:
+    case OP_POSPLUS:
+    case OP_POSQUERY:
+    case OP_POSUPTO:
+    case OP_STARI:
+    case OP_MINSTARI:
+    case OP_PLUSI:
+    case OP_MINPLUSI:
+    case OP_QUERYI:
+    case OP_MINQUERYI:
+    case OP_UPTOI:
+    case OP_MINUPTOI:
+    case OP_EXACTI:
+    case OP_POSSTARI:
+    case OP_POSPLUSI:
+    case OP_POSQUERYI:
+    case OP_POSUPTOI:
+    case OP_NOTSTAR:
+    case OP_NOTMINSTAR:
+    case OP_NOTPLUS:
+    case OP_NOTMINPLUS:
+    case OP_NOTQUERY:
+    case OP_NOTMINQUERY:
+    case OP_NOTUPTO:
+    case OP_NOTMINUPTO:
+    case OP_NOTEXACT:
+    case OP_NOTPOSSTAR:
+    case OP_NOTPOSPLUS:
+    case OP_NOTPOSQUERY:
+    case OP_NOTPOSUPTO:
+    case OP_NOTSTARI:
+    case OP_NOTMINSTARI:
+    case OP_NOTPLUSI:
+    case OP_NOTMINPLUSI:
+    case OP_NOTQUERYI:
+    case OP_NOTMINQUERYI:
+    case OP_NOTUPTOI:
+    case OP_NOTMINUPTOI:
+    case OP_NOTEXACTI:
+    case OP_NOTPOSSTARI:
+    case OP_NOTPOSPLUSI:
+    case OP_NOTPOSQUERYI:
+    case OP_NOTPOSUPTOI:
+    case OP_TYPESTAR:
+    case OP_TYPEMINSTAR:
+    case OP_TYPEPLUS:
+    case OP_TYPEMINPLUS:
+    case OP_TYPEQUERY:
+    case OP_TYPEMINQUERY:
+    case OP_TYPEUPTO:
+    case OP_TYPEMINUPTO:
+    case OP_TYPEEXACT:
+    case OP_TYPEPOSSTAR:
+    case OP_TYPEPOSPLUS:
+    case OP_TYPEPOSQUERY:
+    case OP_TYPEPOSUPTO:
+    case OP_CLASS:
+    case OP_NCLASS:
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+    case OP_XCLASS:
+#endif
+    compile_iterator_backtrackingpath(common, current);
+    break;
+
+    case OP_REF:
+    case OP_REFI:
+    case OP_DNREF:
+    case OP_DNREFI:
+    compile_ref_iterator_backtrackingpath(common, current);
+    break;
+
+    case OP_RECURSE:
+    compile_recurse_backtrackingpath(common, current);
+    break;
+
+    case OP_ASSERT:
+    case OP_ASSERT_NOT:
+    case OP_ASSERTBACK:
+    case OP_ASSERTBACK_NOT:
+    compile_assert_backtrackingpath(common, current);
+    break;
+
+    case OP_ONCE:
+    case OP_ONCE_NC:
+    case OP_BRA:
+    case OP_CBRA:
+    case OP_COND:
+    case OP_SBRA:
+    case OP_SCBRA:
+    case OP_SCOND:
+    compile_bracket_backtrackingpath(common, current);
+    break;
+
+    case OP_BRAZERO:
+    if (current->cc[1] > OP_ASSERTBACK_NOT)
+      compile_bracket_backtrackingpath(common, current);
+    else
+      compile_assert_backtrackingpath(common, current);
+    break;
+
+    case OP_BRAPOS:
+    case OP_CBRAPOS:
+    case OP_SBRAPOS:
+    case OP_SCBRAPOS:
+    case OP_BRAPOSZERO:
+    compile_bracketpos_backtrackingpath(common, current);
+    break;
+
+    case OP_BRAMINZERO:
+    compile_braminzero_backtrackingpath(common, current);
+    break;
+
+    case OP_MARK:
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
+    if (common->has_skip_arg)
+      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+    free_stack(common, common->has_skip_arg ? 5 : 1);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
+    if (common->has_skip_arg)
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
+    break;
+
+    case OP_THEN:
+    case OP_THEN_ARG:
+    case OP_PRUNE:
+    case OP_PRUNE_ARG:
+    case OP_SKIP:
+    case OP_SKIP_ARG:
+    compile_control_verb_backtrackingpath(common, current);
+    break;
+
+    case OP_COMMIT:
+    if (!common->local_exit)
+      OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
+    if (common->quit_label == NULL)
+      add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
+    else
+      JUMPTO(SLJIT_JUMP, common->quit_label);
+    break;
+
+    case OP_CALLOUT:
+    case OP_FAIL:
+    case OP_ACCEPT:
+    case OP_ASSERT_ACCEPT:
+    set_jumps(current->topbacktracks, LABEL());
+    break;
+
+    case OP_THEN_TRAP:
+    /* A virtual opcode for then traps. */
+    compile_then_trap_backtrackingpath(common, current);
+    break;
+
+    default:
+    SLJIT_ASSERT_STOP();
+    break;
+    }
+  current = current->prev;
+  }
+common->then_trap = save_then_trap;
+}
+
+static SLJIT_INLINE void compile_recurse(compiler_common *common)
+{
+DEFINE_COMPILER;
+pcre_uchar *cc = common->start + common->currententry->start;
+pcre_uchar *ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
+pcre_uchar *ccend = bracketend(cc);
+BOOL needs_control_head;
+int framesize = get_framesize(common, cc, NULL, TRUE, &needs_control_head);
+int private_data_size = get_private_data_copy_length(common, ccbegin, ccend, needs_control_head);
+int alternativesize;
+BOOL needs_frame;
+backtrack_common altbacktrack;
+struct sljit_jump *jump;
+
+/* Recurse captures then. */
+common->then_trap = NULL;
+
+SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
+needs_frame = framesize >= 0;
+if (!needs_frame)
+  framesize = 0;
+alternativesize = *(cc + GET(cc, 1)) == OP_ALT ? 1 : 0;
+
+SLJIT_ASSERT(common->currententry->entry == NULL && common->recursive_head_ptr != 0);
+common->currententry->entry = LABEL();
+set_jumps(common->currententry->calls, common->currententry->entry);
+
+sljit_emit_fast_enter(compiler, TMP2, 0);
+allocate_stack(common, private_data_size + framesize + alternativesize);
+OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(private_data_size + framesize + alternativesize - 1), TMP2, 0);
+copy_private_data(common, ccbegin, ccend, TRUE, private_data_size + framesize + alternativesize, framesize + alternativesize, needs_control_head);
+if (needs_control_head)
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
+if (needs_frame)
+  init_frame(common, cc, NULL, framesize + alternativesize - 1, alternativesize, TRUE);
+
+if (alternativesize > 0)
+  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
+
+memset(&altbacktrack, 0, sizeof(backtrack_common));
+common->quit_label = NULL;
+common->accept_label = NULL;
+common->quit = NULL;
+common->accept = NULL;
+altbacktrack.cc = ccbegin;
+cc += GET(cc, 1);
+while (1)
+  {
+  altbacktrack.top = NULL;
+  altbacktrack.topbacktracks = NULL;
+
+  if (altbacktrack.cc != ccbegin)
+    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+
+  compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
+  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
+    return;
+
+  add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
+
+  compile_backtrackingpath(common, altbacktrack.top);
+  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
+    return;
+  set_jumps(altbacktrack.topbacktracks, LABEL());
+
+  if (*cc != OP_ALT)
+    break;
+
+  altbacktrack.cc = cc + 1 + LINK_SIZE;
+  cc += GET(cc, 1);
+  }
+
+/* None of them matched. */
+OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
+jump = JUMP(SLJIT_JUMP);
+
+if (common->quit != NULL)
+  {
+  set_jumps(common->quit, LABEL());
+  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
+  if (needs_frame)
+    {
+    OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
+    add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
+    OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
+    }
+  OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
+  common->quit = NULL;
+  add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
+  }
+
+set_jumps(common->accept, LABEL());
+OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
+if (needs_frame)
+  {
+  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
+  add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
+  OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
+  }
+OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);
+
+JUMPHERE(jump);
+if (common->quit != NULL)
+  set_jumps(common->quit, LABEL());
+copy_private_data(common, ccbegin, ccend, FALSE, private_data_size + framesize + alternativesize, framesize + alternativesize, needs_control_head);
+free_stack(common, private_data_size + framesize + alternativesize);
+if (needs_control_head)
+  {
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 2 * sizeof(sljit_sw));
+  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_sw));
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, TMP1, 0);
+  OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
+  }
+else
+  {
+  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_sw));
+  OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, TMP2, 0);
+  }
+sljit_emit_fast_return(compiler, SLJIT_MEM1(STACK_TOP), 0);
+}
+
+#undef COMPILE_BACKTRACKINGPATH
+#undef CURRENT_AS
+
+void
+PRIV(jit_compile)(const REAL_PCRE *re, PUBL(extra) *extra, int mode)
+{
+struct sljit_compiler *compiler;
+backtrack_common rootbacktrack;
+compiler_common common_data;
+compiler_common *common = &common_data;
+const pcre_uint8 *tables = re->tables;
+pcre_study_data *study;
+int private_data_size;
+pcre_uchar *ccend;
+executable_functions *functions;
+void *executable_func;
+sljit_uw executable_size;
+sljit_uw total_length;
+label_addr_list *label_addr;
+struct sljit_label *mainloop_label = NULL;
+struct sljit_label *continue_match_label;
+struct sljit_label *empty_match_found_label = NULL;
+struct sljit_label *empty_match_backtrack_label = NULL;
+struct sljit_label *reset_match_label;
+struct sljit_label *quit_label;
+struct sljit_jump *jump;
+struct sljit_jump *minlength_check_failed = NULL;
+struct sljit_jump *reqbyte_notfound = NULL;
+struct sljit_jump *empty_match = NULL;
+
+SLJIT_ASSERT((extra->flags & PCRE_EXTRA_STUDY_DATA) != 0);
+study = extra->study_data;
+
+if (!tables)
+  tables = PRIV(default_tables);
+
+memset(&rootbacktrack, 0, sizeof(backtrack_common));
+memset(common, 0, sizeof(compiler_common));
+rootbacktrack.cc = (pcre_uchar *)re + re->name_table_offset + re->name_count * re->name_entry_size;
+
+common->start = rootbacktrack.cc;
+common->read_only_data_head = NULL;
+common->fcc = tables + fcc_offset;
+common->lcc = (sljit_sw)(tables + lcc_offset);
+common->mode = mode;
+common->might_be_empty = study->minlength == 0;
+common->nltype = NLTYPE_FIXED;
+switch(re->options & PCRE_NEWLINE_BITS)
+  {
+  case 0:
+  /* Compile-time default */
+  switch(NEWLINE)
+    {
+    case -1: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
+    case -2: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
+    default: common->newline = NEWLINE; break;
+    }
+  break;
+  case PCRE_NEWLINE_CR: common->newline = CHAR_CR; break;
+  case PCRE_NEWLINE_LF: common->newline = CHAR_NL; break;
+  case PCRE_NEWLINE_CR+
+       PCRE_NEWLINE_LF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
+  case PCRE_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
+  case PCRE_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
+  default: return;
+  }
+common->nlmax = READ_CHAR_MAX;
+common->nlmin = 0;
+if ((re->options & PCRE_BSR_ANYCRLF) != 0)
+  common->bsr_nltype = NLTYPE_ANYCRLF;
+else if ((re->options & PCRE_BSR_UNICODE) != 0)
+  common->bsr_nltype = NLTYPE_ANY;
+else
+  {
+#ifdef BSR_ANYCRLF
+  common->bsr_nltype = NLTYPE_ANYCRLF;
+#else
+  common->bsr_nltype = NLTYPE_ANY;
+#endif
+  }
+common->bsr_nlmax = READ_CHAR_MAX;
+common->bsr_nlmin = 0;
+common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
+common->ctypes = (sljit_sw)(tables + ctypes_offset);
+common->name_table = ((pcre_uchar *)re) + re->name_table_offset;
+common->name_count = re->name_count;
+common->name_entry_size = re->name_entry_size;
+common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
+#ifdef SUPPORT_UTF
+/* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
+common->utf = (re->options & PCRE_UTF8) != 0;
+#ifdef SUPPORT_UCP
+common->use_ucp = (re->options & PCRE_UCP) != 0;
+#endif
+if (common->utf)
+  {
+  if (common->nltype == NLTYPE_ANY)
+    common->nlmax = 0x2029;
+  else if (common->nltype == NLTYPE_ANYCRLF)
+    common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
+  else
+    {
+    /* We only care about the first newline character. */
+    common->nlmax = common->newline & 0xff;
+    }
+
+  if (common->nltype == NLTYPE_FIXED)
+    common->nlmin = common->newline & 0xff;
+  else
+    common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
+
+  if (common->bsr_nltype == NLTYPE_ANY)
+    common->bsr_nlmax = 0x2029;
+  else
+    common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
+  common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
+  }
+#endif /* SUPPORT_UTF */
+ccend = bracketend(common->start);
+
+/* Calculate the local space size on the stack. */
+common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
+common->optimized_cbracket = (pcre_uint8 *)SLJIT_MALLOC(re->top_bracket + 1, compiler->allocator_data);
+if (!common->optimized_cbracket)
+  return;
+#if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
+memset(common->optimized_cbracket, 0, re->top_bracket + 1);
+#else
+memset(common->optimized_cbracket, 1, re->top_bracket + 1);
+#endif
+
+SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
+#if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
+common->capture_last_ptr = common->ovector_start;
+common->ovector_start += sizeof(sljit_sw);
+#endif
+if (!check_opcode_types(common, common->start, ccend))
+  {
+  SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
+  return;
+  }
+
+/* Checking flags and updating ovector_start. */
+if (mode == JIT_COMPILE && (re->flags & PCRE_REQCHSET) != 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
+  {
+  common->req_char_ptr = common->ovector_start;
+  common->ovector_start += sizeof(sljit_sw);
+  }
+if (mode != JIT_COMPILE)
+  {
+  common->start_used_ptr = common->ovector_start;
+  common->ovector_start += sizeof(sljit_sw);
+  if (mode == JIT_PARTIAL_SOFT_COMPILE)
+    {
+    common->hit_start = common->ovector_start;
+    common->ovector_start += 2 * sizeof(sljit_sw);
+    }
+  else
+    {
+    SLJIT_ASSERT(mode == JIT_PARTIAL_HARD_COMPILE);
+    common->needs_start_ptr = TRUE;
+    }
+  }
+if ((re->options & PCRE_FIRSTLINE) != 0)
+  {
+  common->first_line_end = common->ovector_start;
+  common->ovector_start += sizeof(sljit_sw);
+  }
+#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
+common->control_head_ptr = 1;
+#endif
+if (common->control_head_ptr != 0)
+  {
+  common->control_head_ptr = common->ovector_start;
+  common->ovector_start += sizeof(sljit_sw);
+  }
+if (common->needs_start_ptr && common->has_set_som)
+  {
+  /* Saving the real start pointer is necessary. */
+  common->start_ptr = common->ovector_start;
+  common->ovector_start += sizeof(sljit_sw);
+  }
+else
+  common->needs_start_ptr = FALSE;
+
+/* Aligning ovector to even number of sljit words. */
+if ((common->ovector_start & sizeof(sljit_sw)) != 0)
+  common->ovector_start += sizeof(sljit_sw);
+
+if (common->start_ptr == 0)
+  common->start_ptr = OVECTOR(0);
+
+/* Capturing brackets cannot be optimized if callouts are allowed. */
+if (common->capture_last_ptr != 0)
+  memset(common->optimized_cbracket, 0, re->top_bracket + 1);
+
+SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
+common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
+
+total_length = ccend - common->start;
+common->private_data_ptrs = (sljit_si *)SLJIT_MALLOC(total_length * (sizeof(sljit_si) + (common->has_then ? 1 : 0)), compiler->allocator_data);
+if (!common->private_data_ptrs)
+  {
+  SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
+  return;
+  }
+memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_si));
+
+private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
+set_private_data_ptrs(common, &private_data_size, ccend);
+if (private_data_size > SLJIT_MAX_LOCAL_SIZE)
+  {
+  SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
+  SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
+  return;
+  }
+
+if (common->has_then)
+  {
+  common->then_offsets = (pcre_uint8 *)(common->private_data_ptrs + total_length);
+  memset(common->then_offsets, 0, total_length);
+  set_then_offsets(common, common->start, NULL);
+  }
+
+compiler = sljit_create_compiler(NULL);
+if (!compiler)
+  {
+  SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
+  SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
+  return;
+  }
+common->compiler = compiler;
+
+/* Main pcre_jit_exec entry. */
+sljit_emit_enter(compiler, 0, 1, 5, 5, 0, 0, private_data_size);
+
+/* Register init. */
+reset_ovector(common, (re->top_bracket + 1) * 2);
+if (common->req_char_ptr != 0)
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);
+
+OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);
+OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
+OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
+OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
+OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
+OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
+OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, base));
+OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, limit));
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
+
+if (mode == JIT_PARTIAL_SOFT_COMPILE)
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
+if (common->mark_ptr != 0)
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
+if (common->control_head_ptr != 0)
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
+
+/* Main part of the matching */
+if ((re->options & PCRE_ANCHORED) == 0)
+  {
+  mainloop_label = mainloop_entry(common, (re->flags & PCRE_HASCRORLF) != 0, (re->options & PCRE_FIRSTLINE) != 0);
+  continue_match_label = LABEL();
+  /* Forward search if possible. */
+  if ((re->options & PCRE_NO_START_OPTIMIZE) == 0)
+    {
+    if (mode == JIT_COMPILE && fast_forward_first_n_chars(common, (re->options & PCRE_FIRSTLINE) != 0))
+      ;
+    else if ((re->flags & PCRE_FIRSTSET) != 0)
+      fast_forward_first_char(common, (pcre_uchar)re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0, (re->options & PCRE_FIRSTLINE) != 0);
+    else if ((re->flags & PCRE_STARTLINE) != 0)
+      fast_forward_newline(common, (re->options & PCRE_FIRSTLINE) != 0);
+    else if (study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)
+      fast_forward_start_bits(common, study->start_bits, (re->options & PCRE_FIRSTLINE) != 0);
+    }
+  }
+else
+  continue_match_label = LABEL();
+
+if (mode == JIT_COMPILE && study->minlength > 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
+  {
+  OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
+  OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength));
+  minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
+  }
+if (common->req_char_ptr != 0)
+  reqbyte_notfound = search_requested_char(common, (pcre_uchar)re->req_char, (re->flags & PCRE_RCH_CASELESS) != 0, (re->flags & PCRE_FIRSTSET) != 0);
+
+/* Store the current STR_PTR in OVECTOR(0). */
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
+/* Copy the limit of allowed recursions. */
+OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
+if (common->capture_last_ptr != 0)
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, -1);
+
+if (common->needs_start_ptr)
+  {
+  SLJIT_ASSERT(common->start_ptr != OVECTOR(0));
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
+  }
+else
+  SLJIT_ASSERT(common->start_ptr == OVECTOR(0));
+
+/* Copy the beginning of the string. */
+if (mode == JIT_PARTIAL_SOFT_COMPILE)
+  {
+  jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start + sizeof(sljit_sw), STR_PTR, 0);
+  JUMPHERE(jump);
+  }
+else if (mode == JIT_PARTIAL_HARD_COMPILE)
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
+
+compile_matchingpath(common, common->start, ccend, &rootbacktrack);
+if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
+  {
+  sljit_free_compiler(compiler);
+  SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
+  SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
+  free_read_only_data(common->read_only_data_head, compiler->allocator_data);
+  return;
+  }
+
+if (common->might_be_empty)
+  {
+  empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
+  empty_match_found_label = LABEL();
+  }
+
+common->accept_label = LABEL();
+if (common->accept != NULL)
+  set_jumps(common->accept, common->accept_label);
+
+/* This means we have a match. Update the ovector. */
+copy_ovector(common, re->top_bracket + 1);
+common->quit_label = common->forced_quit_label = LABEL();
+if (common->quit != NULL)
+  set_jumps(common->quit, common->quit_label);
+if (common->forced_quit != NULL)
+  set_jumps(common->forced_quit, common->forced_quit_label);
+if (minlength_check_failed != NULL)
+  SET_LABEL(minlength_check_failed, common->forced_quit_label);
+sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
+
+if (mode != JIT_COMPILE)
+  {
+  common->partialmatchlabel = LABEL();
+  set_jumps(common->partialmatch, common->partialmatchlabel);
+  return_with_partial_match(common, common->quit_label);
+  }
+
+if (common->might_be_empty)
+  empty_match_backtrack_label = LABEL();
+compile_backtrackingpath(common, rootbacktrack.top);
+if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
+  {
+  sljit_free_compiler(compiler);
+  SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
+  SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
+  free_read_only_data(common->read_only_data_head, compiler->allocator_data);
+  return;
+  }
+
+SLJIT_ASSERT(rootbacktrack.prev == NULL);
+reset_match_label = LABEL();
+
+if (mode == JIT_PARTIAL_SOFT_COMPILE)
+  {
+  /* Update hit_start only in the first time. */
+  jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);
+  JUMPHERE(jump);
+  }
+
+/* Check we have remaining characters. */
+if ((re->options & PCRE_ANCHORED) == 0 && (re->options & PCRE_FIRSTLINE) != 0)
+  {
+  SLJIT_ASSERT(common->first_line_end != 0);
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
+  }
+
+OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
+
+if ((re->options & PCRE_ANCHORED) == 0)
+  {
+  if (common->ff_newline_shortcut != NULL)
+    {
+    if ((re->options & PCRE_FIRSTLINE) == 0)
+      CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
+    /* There cannot be more newlines here. */
+    }
+  else
+    {
+    if ((re->options & PCRE_FIRSTLINE) == 0)
+      CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop_label);
+    else
+      CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, mainloop_label);
+    }
+  }
+
+/* No more remaining characters. */
+if (reqbyte_notfound != NULL)
+  JUMPHERE(reqbyte_notfound);
+
+if (mode == JIT_PARTIAL_SOFT_COMPILE)
+  CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
+
+OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
+JUMPTO(SLJIT_JUMP, common->quit_label);
+
+flush_stubs(common);
+
+if (common->might_be_empty)
+  {
+  JUMPHERE(empty_match);
+  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
+  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
+  CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_backtrack_label);
+  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
+  CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_found_label);
+  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
+  CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
+  JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
+  }
+
+common->currententry = common->entries;
+common->local_exit = TRUE;
+quit_label = common->quit_label;
+while (common->currententry != NULL)
+  {
+  /* Might add new entries. */
+  compile_recurse(common);
+  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
+    {
+    sljit_free_compiler(compiler);
+    SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
+    SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
+    free_read_only_data(common->read_only_data_head, compiler->allocator_data);
+    return;
+    }
+  flush_stubs(common);
+  common->currententry = common->currententry->next;
+  }
+common->local_exit = FALSE;
+common->quit_label = quit_label;
+
+/* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
+/* This is a (really) rare case. */
+set_jumps(common->stackalloc, LABEL());
+/* RETURN_ADDR is not a saved register. */
+sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
+OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
+OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
+OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top), STACK_TOP, 0);
+OP2(SLJIT_ADD, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit), SLJIT_IMM, STACK_GROWTH_RATE);
+
+sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize));
+jump = CMP(SLJIT_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
+OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
+OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
+OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top));
+OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit));
+OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
+sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
+
+/* Allocation failed. */
+JUMPHERE(jump);
+/* We break the return address cache here, but this is a really rare case. */
+OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_JIT_STACKLIMIT);
+JUMPTO(SLJIT_JUMP, common->quit_label);
+
+/* Call limit reached. */
+set_jumps(common->calllimit, LABEL());
+OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_MATCHLIMIT);
+JUMPTO(SLJIT_JUMP, common->quit_label);
+
+if (common->revertframes != NULL)
+  {
+  set_jumps(common->revertframes, LABEL());
+  do_revertframes(common);
+  }
+if (common->wordboundary != NULL)
+  {
+  set_jumps(common->wordboundary, LABEL());
+  check_wordboundary(common);
+  }
+if (common->anynewline != NULL)
+  {
+  set_jumps(common->anynewline, LABEL());
+  check_anynewline(common);
+  }
+if (common->hspace != NULL)
+  {
+  set_jumps(common->hspace, LABEL());
+  check_hspace(common);
+  }
+if (common->vspace != NULL)
+  {
+  set_jumps(common->vspace, LABEL());
+  check_vspace(common);
+  }
+if (common->casefulcmp != NULL)
+  {
+  set_jumps(common->casefulcmp, LABEL());
+  do_casefulcmp(common);
+  }
+if (common->caselesscmp != NULL)
+  {
+  set_jumps(common->caselesscmp, LABEL());
+  do_caselesscmp(common);
+  }
+if (common->reset_match != NULL)
+  {
+  set_jumps(common->reset_match, LABEL());
+  do_reset_match(common, (re->top_bracket + 1) * 2);
+  CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
+  OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
+  JUMPTO(SLJIT_JUMP, reset_match_label);
+  }
+#ifdef SUPPORT_UTF
+#ifdef COMPILE_PCRE8
+if (common->utfreadchar != NULL)
+  {
+  set_jumps(common->utfreadchar, LABEL());
+  do_utfreadchar(common);
+  }
+if (common->utfreadchar16 != NULL)
+  {
+  set_jumps(common->utfreadchar16, LABEL());
+  do_utfreadchar16(common);
+  }
+if (common->utfreadtype8 != NULL)
+  {
+  set_jumps(common->utfreadtype8, LABEL());
+  do_utfreadtype8(common);
+  }
+#endif /* COMPILE_PCRE8 */
+#endif /* SUPPORT_UTF */
+#ifdef SUPPORT_UCP
+if (common->getucd != NULL)
+  {
+  set_jumps(common->getucd, LABEL());
+  do_getucd(common);
+  }
+#endif
+
+SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
+SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
+
+executable_func = sljit_generate_code(compiler);
+executable_size = sljit_get_generated_code_size(compiler);
+label_addr = common->label_addrs;
+while (label_addr != NULL)
+  {
+  *label_addr->update_addr = sljit_get_label_addr(label_addr->label);
+  label_addr = label_addr->next;
+  }
+sljit_free_compiler(compiler);
+if (executable_func == NULL)
+  {
+  free_read_only_data(common->read_only_data_head, compiler->allocator_data);
+  return;
+  }
+
+/* Reuse the function descriptor if possible. */
+if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && extra->executable_jit != NULL)
+  functions = (executable_functions *)extra->executable_jit;
+else
+  {
+  /* Note: If your memory-checker has flagged the allocation below as a
+   * memory leak, it is probably because you either forgot to call
+   * pcre_free_study() (or pcre16_free_study()) on the pcre_extra (or
+   * pcre16_extra) object, or you called said function after having
+   * cleared the PCRE_EXTRA_EXECUTABLE_JIT bit from the "flags" field
+   * of the object. (The function will only free the JIT data if the
+   * bit remains set, as the bit indicates that the pointer to the data
+   * is valid.)
+   */
+  functions = SLJIT_MALLOC(sizeof(executable_functions), compiler->allocator_data);
+  if (functions == NULL)
+    {
+    /* This case is highly unlikely since we just recently
+    freed a lot of memory. Not impossible though. */
+    sljit_free_code(executable_func);
+    free_read_only_data(common->read_only_data_head, compiler->allocator_data);
+    return;
+    }
+  memset(functions, 0, sizeof(executable_functions));
+  functions->top_bracket = (re->top_bracket + 1) * 2;
+  functions->limit_match = (re->flags & PCRE_MLSET) != 0 ? re->limit_match : 0;
+  extra->executable_jit = functions;
+  extra->flags |= PCRE_EXTRA_EXECUTABLE_JIT;
+  }
+
+functions->executable_funcs[mode] = executable_func;
+functions->read_only_data_heads[mode] = common->read_only_data_head;
+functions->executable_sizes[mode] = executable_size;
+}
+
+static SLJIT_NOINLINE int jit_machine_stack_exec(jit_arguments *arguments, void *executable_func)
+{
+union {
+   void *executable_func;
+   jit_function call_executable_func;
+} convert_executable_func;
+pcre_uint8 local_space[MACHINE_STACK_SIZE];
+struct sljit_stack local_stack;
+
+local_stack.top = (sljit_sw)&local_space;
+local_stack.base = local_stack.top;
+local_stack.limit = local_stack.base + MACHINE_STACK_SIZE;
+local_stack.max_limit = local_stack.limit;
+arguments->stack = &local_stack;
+convert_executable_func.executable_func = executable_func;
+return convert_executable_func.call_executable_func(arguments);
+}
+
+int
+PRIV(jit_exec)(const PUBL(extra) *extra_data, const pcre_uchar *subject,
+  int length, int start_offset, int options, int *offsets, int offset_count)
+{
+executable_functions *functions = (executable_functions *)extra_data->executable_jit;
+union {
+   void *executable_func;
+   jit_function call_executable_func;
+} convert_executable_func;
+jit_arguments arguments;
+int max_offset_count;
+int retval;
+int mode = JIT_COMPILE;
+
+if ((options & PCRE_PARTIAL_HARD) != 0)
+  mode = JIT_PARTIAL_HARD_COMPILE;
+else if ((options & PCRE_PARTIAL_SOFT) != 0)
+  mode = JIT_PARTIAL_SOFT_COMPILE;
+
+if (functions->executable_funcs[mode] == NULL)
+  return PCRE_ERROR_JIT_BADOPTION;
+
+/* Sanity checks should be handled by pcre_exec. */
+arguments.str = subject + start_offset;
+arguments.begin = subject;
+arguments.end = subject + length;
+arguments.mark_ptr = NULL;
+/* JIT decreases this value less frequently than the interpreter. */
+arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (pcre_uint32)(extra_data->match_limit);
+if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match)
+  arguments.limit_match = functions->limit_match;
+arguments.notbol = (options & PCRE_NOTBOL) != 0;
+arguments.noteol = (options & PCRE_NOTEOL) != 0;
+arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
+arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
+arguments.offsets = offsets;
+arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL;
+arguments.real_offset_count = offset_count;
+
+/* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of
+the output vector for storing captured strings, with the remainder used as
+workspace. We don't need the workspace here. For compatibility, we limit the
+number of captured strings in the same way as pcre_exec(), so that the user
+gets the same result with and without JIT. */
+
+if (offset_count != 2)
+  offset_count = ((offset_count - (offset_count % 3)) * 2) / 3;
+max_offset_count = functions->top_bracket;
+if (offset_count > max_offset_count)
+  offset_count = max_offset_count;
+arguments.offset_count = offset_count;
+
+if (functions->callback)
+  arguments.stack = (struct sljit_stack *)functions->callback(functions->userdata);
+else
+  arguments.stack = (struct sljit_stack *)functions->userdata;
+
+if (arguments.stack == NULL)
+  retval = jit_machine_stack_exec(&arguments, functions->executable_funcs[mode]);
+else
+  {
+  convert_executable_func.executable_func = functions->executable_funcs[mode];
+  retval = convert_executable_func.call_executable_func(&arguments);
+  }
+
+if (retval * 2 > offset_count)
+  retval = 0;
+if ((extra_data->flags & PCRE_EXTRA_MARK) != 0)
+  *(extra_data->mark) = arguments.mark_ptr;
+
+return retval;
+}
+
+#if defined COMPILE_PCRE8
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre_jit_exec(const pcre *argument_re, const pcre_extra *extra_data,
+  PCRE_SPTR subject, int length, int start_offset, int options,
+  int *offsets, int offset_count, pcre_jit_stack *stack)
+#elif defined COMPILE_PCRE16
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre16_jit_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
+  PCRE_SPTR16 subject, int length, int start_offset, int options,
+  int *offsets, int offset_count, pcre16_jit_stack *stack)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre32_jit_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
+  PCRE_SPTR32 subject, int length, int start_offset, int options,
+  int *offsets, int offset_count, pcre32_jit_stack *stack)
+#endif
+{
+pcre_uchar *subject_ptr = (pcre_uchar *)subject;
+executable_functions *functions = (executable_functions *)extra_data->executable_jit;
+union {
+   void *executable_func;
+   jit_function call_executable_func;
+} convert_executable_func;
+jit_arguments arguments;
+int max_offset_count;
+int retval;
+int mode = JIT_COMPILE;
+
+SLJIT_UNUSED_ARG(argument_re);
+
+/* Plausibility checks */
+if ((options & ~PUBLIC_JIT_EXEC_OPTIONS) != 0) return PCRE_ERROR_JIT_BADOPTION;
+
+if ((options & PCRE_PARTIAL_HARD) != 0)
+  mode = JIT_PARTIAL_HARD_COMPILE;
+else if ((options & PCRE_PARTIAL_SOFT) != 0)
+  mode = JIT_PARTIAL_SOFT_COMPILE;
+
+if (functions->executable_funcs[mode] == NULL)
+  return PCRE_ERROR_JIT_BADOPTION;
+
+/* Sanity checks should be handled by pcre_exec. */
+arguments.stack = (struct sljit_stack *)stack;
+arguments.str = subject_ptr + start_offset;
+arguments.begin = subject_ptr;
+arguments.end = subject_ptr + length;
+arguments.mark_ptr = NULL;
+/* JIT decreases this value less frequently than the interpreter. */
+arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (pcre_uint32)(extra_data->match_limit);
+if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match)
+  arguments.limit_match = functions->limit_match;
+arguments.notbol = (options & PCRE_NOTBOL) != 0;
+arguments.noteol = (options & PCRE_NOTEOL) != 0;
+arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
+arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
+arguments.offsets = offsets;
+arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL;
+arguments.real_offset_count = offset_count;
+
+/* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of
+the output vector for storing captured strings, with the remainder used as
+workspace. We don't need the workspace here. For compatibility, we limit the
+number of captured strings in the same way as pcre_exec(), so that the user
+gets the same result with and without JIT. */
+
+if (offset_count != 2)
+  offset_count = ((offset_count - (offset_count % 3)) * 2) / 3;
+max_offset_count = functions->top_bracket;
+if (offset_count > max_offset_count)
+  offset_count = max_offset_count;
+arguments.offset_count = offset_count;
+
+convert_executable_func.executable_func = functions->executable_funcs[mode];
+retval = convert_executable_func.call_executable_func(&arguments);
+
+if (retval * 2 > offset_count)
+  retval = 0;
+if ((extra_data->flags & PCRE_EXTRA_MARK) != 0)
+  *(extra_data->mark) = arguments.mark_ptr;
+
+return retval;
+}
+
+void
+PRIV(jit_free)(void *executable_funcs)
+{
+int i;
+executable_functions *functions = (executable_functions *)executable_funcs;
+for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
+  {
+  if (functions->executable_funcs[i] != NULL)
+    sljit_free_code(functions->executable_funcs[i]);
+  free_read_only_data(functions->read_only_data_heads[i], NULL);
+  }
+SLJIT_FREE(functions, compiler->allocator_data);
+}
+
+int
+PRIV(jit_get_size)(void *executable_funcs)
+{
+int i;
+sljit_uw size = 0;
+sljit_uw *executable_sizes = ((executable_functions *)executable_funcs)->executable_sizes;
+for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
+  size += executable_sizes[i];
+return (int)size;
+}
+
+const char*
+PRIV(jit_get_target)(void)
+{
+return sljit_get_platform_name();
+}
+
+#if defined COMPILE_PCRE8
+PCRE_EXP_DECL pcre_jit_stack *
+pcre_jit_stack_alloc(int startsize, int maxsize)
+#elif defined COMPILE_PCRE16
+PCRE_EXP_DECL pcre16_jit_stack *
+pcre16_jit_stack_alloc(int startsize, int maxsize)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DECL pcre32_jit_stack *
+pcre32_jit_stack_alloc(int startsize, int maxsize)
+#endif
+{
+if (startsize < 1 || maxsize < 1)
+  return NULL;
+if (startsize > maxsize)
+  startsize = maxsize;
+startsize = (startsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
+maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
+return (PUBL(jit_stack)*)sljit_allocate_stack(startsize, maxsize, NULL);
+}
+
+#if defined COMPILE_PCRE8
+PCRE_EXP_DECL void
+pcre_jit_stack_free(pcre_jit_stack *stack)
+#elif defined COMPILE_PCRE16
+PCRE_EXP_DECL void
+pcre16_jit_stack_free(pcre16_jit_stack *stack)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DECL void
+pcre32_jit_stack_free(pcre32_jit_stack *stack)
+#endif
+{
+sljit_free_stack((struct sljit_stack *)stack, NULL);
+}
+
+#if defined COMPILE_PCRE8
+PCRE_EXP_DECL void
+pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
+#elif defined COMPILE_PCRE16
+PCRE_EXP_DECL void
+pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DECL void
+pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata)
+#endif
+{
+executable_functions *functions;
+if (extra != NULL &&
+    (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
+    extra->executable_jit != NULL)
+  {
+  functions = (executable_functions *)extra->executable_jit;
+  functions->callback = callback;
+  functions->userdata = userdata;
+  }
+}
+
+#if defined COMPILE_PCRE8
+PCRE_EXP_DECL void
+pcre_jit_free_unused_memory(void)
+#elif defined COMPILE_PCRE16
+PCRE_EXP_DECL void
+pcre16_jit_free_unused_memory(void)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DECL void
+pcre32_jit_free_unused_memory(void)
+#endif
+{
+sljit_free_unused_memory_exec();
+}
+
+#else  /* SUPPORT_JIT */
+
+/* These are dummy functions to avoid linking errors when JIT support is not
+being compiled. */
+
+#if defined COMPILE_PCRE8
+PCRE_EXP_DECL pcre_jit_stack *
+pcre_jit_stack_alloc(int startsize, int maxsize)
+#elif defined COMPILE_PCRE16
+PCRE_EXP_DECL pcre16_jit_stack *
+pcre16_jit_stack_alloc(int startsize, int maxsize)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DECL pcre32_jit_stack *
+pcre32_jit_stack_alloc(int startsize, int maxsize)
+#endif
+{
+(void)startsize;
+(void)maxsize;
+return NULL;
+}
+
+#if defined COMPILE_PCRE8
+PCRE_EXP_DECL void
+pcre_jit_stack_free(pcre_jit_stack *stack)
+#elif defined COMPILE_PCRE16
+PCRE_EXP_DECL void
+pcre16_jit_stack_free(pcre16_jit_stack *stack)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DECL void
+pcre32_jit_stack_free(pcre32_jit_stack *stack)
+#endif
+{
+(void)stack;
+}
+
+#if defined COMPILE_PCRE8
+PCRE_EXP_DECL void
+pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
+#elif defined COMPILE_PCRE16
+PCRE_EXP_DECL void
+pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DECL void
+pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata)
+#endif
+{
+(void)extra;
+(void)callback;
+(void)userdata;
+}
+
+#if defined COMPILE_PCRE8
+PCRE_EXP_DECL void
+pcre_jit_free_unused_memory(void)
+#elif defined COMPILE_PCRE16
+PCRE_EXP_DECL void
+pcre16_jit_free_unused_memory(void)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DECL void
+pcre32_jit_free_unused_memory(void)
+#endif
+{
+}
+
+#endif
+
+/* End of pcre_jit_compile.c */
index f19d9fbb902a96c8d8aa65d827f662bfda9ac985..998fe2325ef240905aba0bf722dbbcdf3d01b0e1 100644 (file)
@@ -70,7 +70,7 @@ Arguments:
   code            pointer to start of group (the bracket)
   startcode       pointer to start of the whole pattern's code
   options         the compiling options
-  int             RECURSE depth
+  recurses        chain of recurse_check to catch mutual recursion
 
 Returns:   the minimum length
            -1 if \C in UTF-8 mode or (*ACCEPT) was encountered
@@ -80,12 +80,13 @@ Returns:   the minimum length
 
 static int
 find_minlength(const REAL_PCRE *re, const pcre_uchar *code,
-  const pcre_uchar *startcode, int options, int recurse_depth)
+  const pcre_uchar *startcode, int options, recurse_check *recurses)
 {
 int length = -1;
 /* PCRE_UTF16 has the same value as PCRE_UTF8. */
 BOOL utf = (options & PCRE_UTF8) != 0;
 BOOL had_recurse = FALSE;
+recurse_check this_recurse;
 register int branchlength = 0;
 register pcre_uchar *cc = (pcre_uchar *)code + 1 + LINK_SIZE;
 
@@ -130,7 +131,7 @@ for (;;)
     case OP_SBRAPOS:
     case OP_ONCE:
     case OP_ONCE_NC:
-    d = find_minlength(re, cc, startcode, options, recurse_depth);
+    d = find_minlength(re, cc, startcode, options, recurses);
     if (d < 0) return d;
     branchlength += d;
     do cc += GET(cc, 1); while (*cc == OP_ALT);
@@ -393,7 +394,7 @@ for (;;)
         ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf, GET2(slot, 0));
         if (cs == NULL) return -2;
         do ce += GET(ce, 1); while (*ce == OP_ALT);
-        if (cc > cs && cc < ce)
+        if (cc > cs && cc < ce)     /* Simple recursion */
           {
           d = 0;
           had_recurse = TRUE;
@@ -401,8 +402,22 @@ for (;;)
           }
         else
           {
-          int dd = find_minlength(re, cs, startcode, options, recurse_depth);
-          if (dd < d) d = dd;
+          recurse_check *r = recurses;
+          for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
+          if (r != NULL)           /* Mutual recursion */
+            {
+            d = 0;
+            had_recurse = TRUE;
+            break;
+            }
+          else
+            {
+            int dd;
+            this_recurse.prev = recurses;
+            this_recurse.group = cs;
+            dd = find_minlength(re, cs, startcode, options, &this_recurse);
+            if (dd < d) d = dd;
+            }
           }
         slot += re->name_entry_size;
         }
@@ -418,14 +433,26 @@ for (;;)
       ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf, GET2(cc, 1));
       if (cs == NULL) return -2;
       do ce += GET(ce, 1); while (*ce == OP_ALT);
-      if (cc > cs && cc < ce)
+      if (cc > cs && cc < ce)    /* Simple recursion */
         {
         d = 0;
         had_recurse = TRUE;
         }
       else
         {
-        d = find_minlength(re, cs, startcode, options, recurse_depth);
+        recurse_check *r = recurses;
+        for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
+        if (r != NULL)           /* Mutual recursion */
+          {
+          d = 0;
+          had_recurse = TRUE;
+          }
+        else
+          {
+          this_recurse.prev = recurses;
+          this_recurse.group = cs;
+          d = find_minlength(re, cs, startcode, options, &this_recurse);
+          }
         }
       }
     else d = 0;
@@ -474,12 +501,21 @@ for (;;)
     case OP_RECURSE:
     cs = ce = (pcre_uchar *)startcode + GET(cc, 1);
     do ce += GET(ce, 1); while (*ce == OP_ALT);
-    if ((cc > cs && cc < ce) || recurse_depth > 10)
+    if (cc > cs && cc < ce)    /* Simple recursion */
       had_recurse = TRUE;
     else
       {
-      branchlength += find_minlength(re, cs, startcode, options,
-        recurse_depth + 1);
+      recurse_check *r = recurses;
+      for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
+      if (r != NULL)           /* Mutual recursion */
+        had_recurse = TRUE;
+      else
+        {
+        this_recurse.prev = recurses;
+        this_recurse.group = cs;
+        branchlength += find_minlength(re, cs, startcode, options,
+          &this_recurse);
+        }
       }
     cc += 1 + LINK_SIZE;
     break;
@@ -1503,7 +1539,7 @@ if ((re->options & PCRE_ANCHORED) == 0 &&
 
 /* Find the minimum length of subject string. */
 
-switch(min = find_minlength(re, code, code, re->options, 0))
+switch(min = find_minlength(re, code, code, re->options, NULL))
   {
   case -2: *errorptr = "internal error: missing capturing bracket"; return NULL;
   case -3: *errorptr = "internal error: opcode not recognized"; return NULL;
diff --git a/ext/pcre/pcrelib/sljit/sljitConfig.h b/ext/pcre/pcrelib/sljit/sljitConfig.h
new file mode 100644 (file)
index 0000000..10364c3
--- /dev/null
@@ -0,0 +1,126 @@
+/*
+ *    Stack-less Just-In-Time compiler
+ *
+ *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice, this list of
+ *      conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *      of conditions and the following disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _SLJIT_CONFIG_H_
+#define _SLJIT_CONFIG_H_
+
+/* --------------------------------------------------------------------- */
+/*  Custom defines                                                       */
+/* --------------------------------------------------------------------- */
+
+/* Put your custom defines here. This empty section will never change
+   which helps maintaining patches (with diff / patch utilities). */
+
+/* --------------------------------------------------------------------- */
+/*  Architecture                                                         */
+/* --------------------------------------------------------------------- */
+
+/* Architecture selection. */
+/* #define SLJIT_CONFIG_X86_32 1 */
+/* #define SLJIT_CONFIG_X86_64 1 */
+/* #define SLJIT_CONFIG_ARM_V5 1 */
+/* #define SLJIT_CONFIG_ARM_V7 1 */
+/* #define SLJIT_CONFIG_ARM_THUMB2 1 */
+/* #define SLJIT_CONFIG_ARM_64 1 */
+/* #define SLJIT_CONFIG_PPC_32 1 */
+/* #define SLJIT_CONFIG_PPC_64 1 */
+/* #define SLJIT_CONFIG_MIPS_32 1 */
+/* #define SLJIT_CONFIG_MIPS_64 1 */
+/* #define SLJIT_CONFIG_SPARC_32 1 */
+/* #define SLJIT_CONFIG_TILEGX 1 */
+
+/* #define SLJIT_CONFIG_AUTO 1 */
+/* #define SLJIT_CONFIG_UNSUPPORTED 1 */
+
+/* --------------------------------------------------------------------- */
+/*  Utilities                                                            */
+/* --------------------------------------------------------------------- */
+
+/* Useful for thread-safe compiling of global functions. */
+#ifndef SLJIT_UTIL_GLOBAL_LOCK
+/* Enabled by default */
+#define SLJIT_UTIL_GLOBAL_LOCK 1
+#endif
+
+/* Implements a stack like data structure (by using mmap / VirtualAlloc). */
+#ifndef SLJIT_UTIL_STACK
+/* Enabled by default */
+#define SLJIT_UTIL_STACK 1
+#endif
+
+/* Single threaded application. Does not require any locks. */
+#ifndef SLJIT_SINGLE_THREADED
+/* Disabled by default. */
+#define SLJIT_SINGLE_THREADED 0
+#endif
+
+/* --------------------------------------------------------------------- */
+/*  Configuration                                                        */
+/* --------------------------------------------------------------------- */
+
+/* If SLJIT_STD_MACROS_DEFINED is not defined, the application should
+   define SLJIT_MALLOC, SLJIT_FREE, SLJIT_MEMMOVE, and NULL. */
+#ifndef SLJIT_STD_MACROS_DEFINED
+/* Disabled by default. */
+#define SLJIT_STD_MACROS_DEFINED 0
+#endif
+
+/* Executable code allocation:
+   If SLJIT_EXECUTABLE_ALLOCATOR is not defined, the application should
+   define both SLJIT_MALLOC_EXEC and SLJIT_FREE_EXEC. */
+#ifndef SLJIT_EXECUTABLE_ALLOCATOR
+/* Enabled by default. */
+#define SLJIT_EXECUTABLE_ALLOCATOR 1
+#endif
+
+/* Return with error when an invalid argument is passed. */
+#ifndef SLJIT_ARGUMENT_CHECKS
+/* Disabled by default */
+#define SLJIT_ARGUMENT_CHECKS 0
+#endif
+
+/* Debug checks (assertions, etc.). */
+#ifndef SLJIT_DEBUG
+/* Enabled by default */
+#define SLJIT_DEBUG 1
+#endif
+
+/* Verbose operations. */
+#ifndef SLJIT_VERBOSE
+/* Enabled by default */
+#define SLJIT_VERBOSE 1
+#endif
+
+/*
+  SLJIT_IS_FPU_AVAILABLE
+    The availability of the FPU can be controlled by SLJIT_IS_FPU_AVAILABLE.
+      zero value - FPU is NOT present.
+      nonzero value - FPU is present.
+*/
+
+/* For further configurations, see the beginning of sljitConfigInternal.h */
+
+#endif
diff --git a/ext/pcre/pcrelib/sljit/sljitConfigInternal.h b/ext/pcre/pcrelib/sljit/sljitConfigInternal.h
new file mode 100644 (file)
index 0000000..3284012
--- /dev/null
@@ -0,0 +1,702 @@
+/*
+ *    Stack-less Just-In-Time compiler
+ *
+ *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice, this list of
+ *      conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *      of conditions and the following disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _SLJIT_CONFIG_INTERNAL_H_
+#define _SLJIT_CONFIG_INTERNAL_H_
+
+/*
+   SLJIT defines the following architecture dependent types and macros:
+
+   Types:
+     sljit_sb, sljit_ub : signed and unsigned 8 bit byte
+     sljit_sh, sljit_uh : signed and unsigned 16 bit half-word (short) type
+     sljit_si, sljit_ui : signed and unsigned 32 bit integer type
+     sljit_sw, sljit_uw : signed and unsigned machine word, enough to store a pointer
+     sljit_p : unsgined pointer value (usually the same as sljit_uw, but
+               some 64 bit ABIs may use 32 bit pointers)
+     sljit_s : single precision floating point value
+     sljit_d : double precision floating point value
+
+   Macros for feature detection (boolean):
+     SLJIT_32BIT_ARCHITECTURE : 32 bit architecture
+     SLJIT_64BIT_ARCHITECTURE : 64 bit architecture
+     SLJIT_LITTLE_ENDIAN : little endian architecture
+     SLJIT_BIG_ENDIAN : big endian architecture
+     SLJIT_UNALIGNED : allows unaligned memory accesses for non-fpu operations (only!)
+     SLJIT_INDIRECT_CALL : see SLJIT_FUNC_OFFSET() for more information
+
+   Constants:
+     SLJIT_NUMBER_OF_REGISTERS : number of available registers
+     SLJIT_NUMBER_OF_SCRATCH_REGISTERS : number of available scratch registers
+     SLJIT_NUMBER_OF_SAVED_REGISTERS : number of available saved registers
+     SLJIT_NUMBER_OF_FLOAT_REGISTERS : number of available floating point registers
+     SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS : number of available floating point scratch registers
+     SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS : number of available floating point saved registers
+     SLJIT_WORD_SHIFT : the shift required to apply when accessing a sljit_sw/sljit_uw array by index
+     SLJIT_DOUBLE_SHIFT : the shift required to apply when accessing
+                          a double precision floating point array by index
+     SLJIT_SINGLE_SHIFT : the shift required to apply when accessing
+                          a single precision floating point array by index
+     SLJIT_LOCALS_OFFSET : local space starting offset (SLJIT_SP + SLJIT_LOCALS_OFFSET)
+     SLJIT_RETURN_ADDRESS_OFFSET : a return instruction always adds this offset to the return address
+
+   Other macros:
+     SLJIT_CALL : C calling convention define for both calling JIT form C and C callbacks for JIT
+     SLJIT_W(number) : defining 64 bit constants on 64 bit architectures (compiler independent helper)
+*/
+
+/*****************/
+/* Sanity check. */
+/*****************/
+
+#if !((defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \
+       || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \
+       || (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) \
+       || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \
+       || (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) \
+       || (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \
+       || (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \
+       || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \
+       || (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \
+       || (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) \
+       || (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) \
+       || (defined SLJIT_CONFIG_TILEGX && SLJIT_CONFIG_TILEGX) \
+       || (defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO) \
+       || (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED))
+#error "An architecture must be selected"
+#endif
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \
+       + (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \
+       + (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) \
+       + (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \
+       + (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) \
+       + (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \
+       + (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \
+       + (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \
+       + (defined SLJIT_CONFIG_TILEGX && SLJIT_CONFIG_TILEGX) \
+       + (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \
+       + (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) \
+       + (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) \
+       + (defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO) \
+       + (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) >= 2
+#error "Multiple architectures are selected"
+#endif
+
+/********************************************************/
+/* Automatic CPU detection (requires compiler support). */
+/********************************************************/
+
+#if (defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO)
+
+#ifndef _WIN32
+
+#if defined(__i386__) || defined(__i386)
+#define SLJIT_CONFIG_X86_32 1
+#elif defined(__x86_64__)
+#define SLJIT_CONFIG_X86_64 1
+#elif defined(__arm__) || defined(__ARM__)
+#ifdef __thumb2__
+#define SLJIT_CONFIG_ARM_THUMB2 1
+#elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__)
+#define SLJIT_CONFIG_ARM_V7 1
+#else
+#define SLJIT_CONFIG_ARM_V5 1
+#endif
+#elif defined (__aarch64__)
+#define SLJIT_CONFIG_ARM_64 1
+#elif defined(__ppc64__) || defined(__powerpc64__) || defined(_ARCH_PPC64) || (defined(_POWER) && defined(__64BIT__))
+#define SLJIT_CONFIG_PPC_64 1
+#elif defined(__ppc__) || defined(__powerpc__) || defined(_ARCH_PPC) || defined(_ARCH_PWR) || defined(_ARCH_PWR2) || defined(_POWER)
+#define SLJIT_CONFIG_PPC_32 1
+#elif defined(__mips__) && !defined(_LP64)
+#define SLJIT_CONFIG_MIPS_32 1
+#elif defined(__mips64)
+#define SLJIT_CONFIG_MIPS_64 1
+#elif defined(__sparc__) || defined(__sparc)
+#define SLJIT_CONFIG_SPARC_32 1
+#elif defined(__tilegx__)
+#define SLJIT_CONFIG_TILEGX 1
+#else
+/* Unsupported architecture */
+#define SLJIT_CONFIG_UNSUPPORTED 1
+#endif
+
+#else /* !_WIN32 */
+
+#if defined(_M_X64) || defined(__x86_64__)
+#define SLJIT_CONFIG_X86_64 1
+#elif defined(_ARM_)
+#define SLJIT_CONFIG_ARM_V5 1
+#else
+#define SLJIT_CONFIG_X86_32 1
+#endif
+
+#endif /* !WIN32 */
+#endif /* SLJIT_CONFIG_AUTO */
+
+#if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
+#undef SLJIT_EXECUTABLE_ALLOCATOR
+#endif
+
+/******************************/
+/* CPU family type detection. */
+/******************************/
+
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \
+       || (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2)
+#define SLJIT_CONFIG_ARM_32 1
+#endif
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+#define SLJIT_CONFIG_X86 1
+#elif (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) || (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64)
+#define SLJIT_CONFIG_ARM 1
+#elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+#define SLJIT_CONFIG_PPC 1
+#elif (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) || (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+#define SLJIT_CONFIG_MIPS 1
+#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) || (defined SLJIT_CONFIG_SPARC_64 && SLJIT_CONFIG_SPARC_64)
+#define SLJIT_CONFIG_SPARC 1
+#endif
+
+/**********************************/
+/* External function definitions. */
+/**********************************/
+
+#if !(defined SLJIT_STD_MACROS_DEFINED && SLJIT_STD_MACROS_DEFINED)
+
+/* These libraries are needed for the macros below. */
+#include <stdlib.h>
+#include <string.h>
+
+#endif /* SLJIT_STD_MACROS_DEFINED */
+
+/* General macros:
+   Note: SLJIT is designed to be independent from them as possible.
+
+   In release mode (SLJIT_DEBUG is not defined) only the following
+   external functions are needed:
+*/
+
+#ifndef SLJIT_MALLOC
+#define SLJIT_MALLOC(size, allocator_data) malloc(size)
+#endif
+
+#ifndef SLJIT_FREE
+#define SLJIT_FREE(ptr, allocator_data) free(ptr)
+#endif
+
+#ifndef SLJIT_MEMMOVE
+#define SLJIT_MEMMOVE(dest, src, len) memmove(dest, src, len)
+#endif
+
+#ifndef SLJIT_ZEROMEM
+#define SLJIT_ZEROMEM(dest, len) memset(dest, 0, len)
+#endif
+
+/***************************/
+/* Compiler helper macros. */
+/***************************/
+
+#if !defined(SLJIT_LIKELY) && !defined(SLJIT_UNLIKELY)
+
+#if defined(__GNUC__) && (__GNUC__ >= 3)
+#define SLJIT_LIKELY(x)                __builtin_expect((x), 1)
+#define SLJIT_UNLIKELY(x)      __builtin_expect((x), 0)
+#else
+#define SLJIT_LIKELY(x)                (x)
+#define SLJIT_UNLIKELY(x)      (x)
+#endif
+
+#endif /* !defined(SLJIT_LIKELY) && !defined(SLJIT_UNLIKELY) */
+
+#ifndef SLJIT_INLINE
+/* Inline functions. Some old compilers do not support them. */
+#if defined(__SUNPRO_C) && __SUNPRO_C <= 0x510
+#define SLJIT_INLINE
+#else
+#define SLJIT_INLINE __inline
+#endif
+#endif /* !SLJIT_INLINE */
+
+#ifndef SLJIT_NOINLINE
+/* Not inline functions. */
+#if defined(__GNUC__)
+#define SLJIT_NOINLINE __attribute__ ((noinline))
+#else
+#define SLJIT_NOINLINE
+#endif
+#endif /* !SLJIT_INLINE */
+
+#ifndef SLJIT_CONST
+/* Const variables. */
+#define SLJIT_CONST const
+#endif
+
+#ifndef SLJIT_UNUSED_ARG
+/* Unused arguments. */
+#define SLJIT_UNUSED_ARG(arg) (void)arg
+#endif
+
+/*********************************/
+/* Type of public API functions. */
+/*********************************/
+
+#if (defined SLJIT_CONFIG_STATIC && SLJIT_CONFIG_STATIC)
+/* Static ABI functions. For all-in-one programs. */
+
+#if defined(__GNUC__)
+/* Disable unused warnings in gcc. */
+#define SLJIT_API_FUNC_ATTRIBUTE static __attribute__((unused))
+#else
+#define SLJIT_API_FUNC_ATTRIBUTE static
+#endif
+
+#else
+#define SLJIT_API_FUNC_ATTRIBUTE
+#endif /* (defined SLJIT_CONFIG_STATIC && SLJIT_CONFIG_STATIC) */
+
+/****************************/
+/* Instruction cache flush. */
+/****************************/
+
+#ifndef SLJIT_CACHE_FLUSH
+
+#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
+
+/* Not required to implement on archs with unified caches. */
+#define SLJIT_CACHE_FLUSH(from, to)
+
+#elif defined __APPLE__
+
+/* Supported by all macs since Mac OS 10.5.
+   However, it does not work on non-jailbroken iOS devices,
+   although the compilation is successful. */
+
+#define SLJIT_CACHE_FLUSH(from, to) \
+       sys_icache_invalidate((char*)(from), (char*)(to) - (char*)(from))
+
+#elif defined __ANDROID__
+
+/* Android lacks __clear_cache; instead, cacheflush should be used. */
+
+#define SLJIT_CACHE_FLUSH(from, to) \
+    cacheflush((long)(from), (long)(to), 0)
+
+#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC)
+
+/* The __clear_cache() implementation of GCC is a dummy function on PowerPC. */
+#define SLJIT_CACHE_FLUSH(from, to) \
+       ppc_cache_flush((from), (to))
+
+#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+
+/* The __clear_cache() implementation of GCC is a dummy function on Sparc. */
+#define SLJIT_CACHE_FLUSH(from, to) \
+       sparc_cache_flush((from), (to))
+
+#else
+
+/* Calls __ARM_NR_cacheflush on ARM-Linux. */
+#define SLJIT_CACHE_FLUSH(from, to) \
+       __clear_cache((char*)(from), (char*)(to))
+
+#endif
+
+#endif /* !SLJIT_CACHE_FLUSH */
+
+/******************************************************/
+/* Byte/half/int/word/single/double type definitions. */
+/******************************************************/
+
+/* 8 bit byte type. */
+typedef unsigned char sljit_ub;
+typedef signed char sljit_sb;
+
+/* 16 bit half-word type. */
+typedef unsigned short int sljit_uh;
+typedef signed short int sljit_sh;
+
+/* 32 bit integer type. */
+typedef unsigned int sljit_ui;
+typedef signed int sljit_si;
+
+/* Machine word type. Enough for storing a pointer.
+     32 bit for 32 bit machines.
+     64 bit for 64 bit machines. */
+#if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
+/* Just to have something. */
+#define SLJIT_WORD_SHIFT 0
+typedef unsigned long int sljit_uw;
+typedef long int sljit_sw;
+#elif !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \
+       && !(defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \
+       && !(defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \
+       && !(defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) \
+       && !(defined SLJIT_CONFIG_TILEGX && SLJIT_CONFIG_TILEGX)
+#define SLJIT_32BIT_ARCHITECTURE 1
+#define SLJIT_WORD_SHIFT 2
+typedef unsigned int sljit_uw;
+typedef int sljit_sw;
+#else
+#define SLJIT_64BIT_ARCHITECTURE 1
+#define SLJIT_WORD_SHIFT 3
+#ifdef _WIN32
+typedef unsigned __int64 sljit_uw;
+typedef __int64 sljit_sw;
+#else
+typedef unsigned long int sljit_uw;
+typedef long int sljit_sw;
+#endif
+#endif
+
+typedef sljit_uw sljit_p;
+
+/* Floating point types. */
+typedef float sljit_s;
+typedef double sljit_d;
+
+/* Shift for pointer sized data. */
+#define SLJIT_POINTER_SHIFT SLJIT_WORD_SHIFT
+
+/* Shift for double precision sized data. */
+#define SLJIT_DOUBLE_SHIFT 3
+#define SLJIT_SINGLE_SHIFT 2
+
+#ifndef SLJIT_W
+
+/* Defining long constants. */
+#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE)
+#define SLJIT_W(w)     (w##ll)
+#else
+#define SLJIT_W(w)     (w)
+#endif
+
+#endif /* !SLJIT_W */
+
+/*************************/
+/* Endianness detection. */
+/*************************/
+
+#if !defined(SLJIT_BIG_ENDIAN) && !defined(SLJIT_LITTLE_ENDIAN)
+
+/* These macros are mostly useful for the applications. */
+#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \
+       || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+
+#ifdef __LITTLE_ENDIAN__
+#define SLJIT_LITTLE_ENDIAN 1
+#else
+#define SLJIT_BIG_ENDIAN 1
+#endif
+
+#elif (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \
+       || (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+
+#ifdef __MIPSEL__
+#define SLJIT_LITTLE_ENDIAN 1
+#else
+#define SLJIT_BIG_ENDIAN 1
+#endif
+
+#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+
+#define SLJIT_BIG_ENDIAN 1
+
+#else
+#define SLJIT_LITTLE_ENDIAN 1
+#endif
+
+#endif /* !defined(SLJIT_BIG_ENDIAN) && !defined(SLJIT_LITTLE_ENDIAN) */
+
+/* Sanity check. */
+#if (defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN) && (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
+#error "Exactly one endianness must be selected"
+#endif
+
+#if !(defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN) && !(defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
+#error "Exactly one endianness must be selected"
+#endif
+
+#ifndef SLJIT_UNALIGNED
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \
+       || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \
+       || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \
+       || (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) \
+       || (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \
+       || (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \
+       || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+#define SLJIT_UNALIGNED 1
+#endif
+
+#endif /* !SLJIT_UNALIGNED */
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+/* Auto detect SSE2 support using CPUID.
+   On 64 bit x86 cpus, sse2 must be present. */
+#define SLJIT_DETECT_SSE2 1
+#endif
+
+/*****************************************************************************************/
+/* Calling convention of functions generated by SLJIT or called from the generated code. */
+/*****************************************************************************************/
+
+#ifndef SLJIT_CALL
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+
+#if defined(__GNUC__) && !defined(__APPLE__)
+
+#define SLJIT_CALL __attribute__ ((fastcall))
+#define SLJIT_X86_32_FASTCALL 1
+
+#elif defined(_MSC_VER)
+
+#define SLJIT_CALL __fastcall
+#define SLJIT_X86_32_FASTCALL 1
+
+#elif defined(__BORLANDC__)
+
+#define SLJIT_CALL __msfastcall
+#define SLJIT_X86_32_FASTCALL 1
+
+#else /* Unknown compiler. */
+
+/* The cdecl attribute is the default. */
+#define SLJIT_CALL
+
+#endif
+
+#else /* Non x86-32 architectures. */
+
+#define SLJIT_CALL
+
+#endif /* SLJIT_CONFIG_X86_32 */
+
+#endif /* !SLJIT_CALL */
+
+#ifndef SLJIT_INDIRECT_CALL
+#if ((defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) && (defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN)) \
+       || ((defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) && defined _AIX)
+/* It seems certain ppc compilers use an indirect addressing for functions
+   which makes things complicated. */
+#define SLJIT_INDIRECT_CALL 1
+#endif
+#endif /* SLJIT_INDIRECT_CALL */
+
+/* The offset which needs to be substracted from the return address to
+determine the next executed instruction after return. */
+#ifndef SLJIT_RETURN_ADDRESS_OFFSET
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+#define SLJIT_RETURN_ADDRESS_OFFSET 8
+#else
+#define SLJIT_RETURN_ADDRESS_OFFSET 0
+#endif
+#endif /* SLJIT_RETURN_ADDRESS_OFFSET */
+
+/***************************************************/
+/* Functions of the built-in executable allocator. */
+/***************************************************/
+
+#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR)
+SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size);
+SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr);
+SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void);
+#define SLJIT_MALLOC_EXEC(size) sljit_malloc_exec(size)
+#define SLJIT_FREE_EXEC(ptr) sljit_free_exec(ptr)
+#endif
+
+/**********************************************/
+/* Registers and locals offset determination. */
+/**********************************************/
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+
+#define SLJIT_NUMBER_OF_REGISTERS 10
+#define SLJIT_NUMBER_OF_SAVED_REGISTERS 7
+#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
+#define SLJIT_LOCALS_OFFSET_BASE ((2 + 4) * sizeof(sljit_sw))
+#else
+/* Maximum 3 arguments are passed on the stack, +1 for double alignment. */
+#define SLJIT_LOCALS_OFFSET_BASE ((3 + 1 + 4) * sizeof(sljit_sw))
+#endif /* SLJIT_X86_32_FASTCALL */
+
+#elif (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+
+#ifndef _WIN64
+#define SLJIT_NUMBER_OF_REGISTERS 12
+#define SLJIT_NUMBER_OF_SAVED_REGISTERS 6
+#define SLJIT_LOCALS_OFFSET_BASE (sizeof(sljit_sw))
+#else
+#define SLJIT_NUMBER_OF_REGISTERS 12
+#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8
+#define SLJIT_LOCALS_OFFSET_BASE ((4 + 2) * sizeof(sljit_sw))
+#endif /* _WIN64 */
+
+#elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
+
+#define SLJIT_NUMBER_OF_REGISTERS 11
+#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8
+#define SLJIT_LOCALS_OFFSET_BASE 0
+
+#elif (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2)
+
+#define SLJIT_NUMBER_OF_REGISTERS 11
+#define SLJIT_NUMBER_OF_SAVED_REGISTERS 7
+#define SLJIT_LOCALS_OFFSET_BASE 0
+
+#elif (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64)
+
+#define SLJIT_NUMBER_OF_REGISTERS 25
+#define SLJIT_NUMBER_OF_SAVED_REGISTERS 10
+#define SLJIT_LOCALS_OFFSET_BASE (2 * sizeof(sljit_sw))
+
+#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC)
+
+#define SLJIT_NUMBER_OF_REGISTERS 22
+#define SLJIT_NUMBER_OF_SAVED_REGISTERS 17
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) || (defined _AIX)
+#define SLJIT_LOCALS_OFFSET_BASE ((6 + 8) * sizeof(sljit_sw))
+#elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
+/* Add +1 for double alignment. */
+#define SLJIT_LOCALS_OFFSET_BASE ((3 + 1) * sizeof(sljit_sw))
+#else
+#define SLJIT_LOCALS_OFFSET_BASE (3 * sizeof(sljit_sw))
+#endif /* SLJIT_CONFIG_PPC_64 || _AIX */
+
+#elif (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS)
+
+#define SLJIT_NUMBER_OF_REGISTERS 17
+#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+#define SLJIT_LOCALS_OFFSET_BASE (4 * sizeof(sljit_sw))
+#else
+#define SLJIT_LOCALS_OFFSET_BASE 0
+#endif
+
+#elif (defined SLJIT_CONFIG_SPARC && SLJIT_CONFIG_SPARC)
+
+#define SLJIT_NUMBER_OF_REGISTERS 18
+#define SLJIT_NUMBER_OF_SAVED_REGISTERS 14
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+/* Add +1 for double alignment. */
+#define SLJIT_LOCALS_OFFSET_BASE ((23 + 1) * sizeof(sljit_sw))
+#endif
+
+#elif (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
+
+#define SLJIT_NUMBER_OF_REGISTERS 0
+#define SLJIT_NUMBER_OF_SAVED_REGISTERS 0
+#define SLJIT_LOCALS_OFFSET_BASE 0
+
+#endif
+
+#define SLJIT_LOCALS_OFFSET (SLJIT_LOCALS_OFFSET_BASE)
+
+#define SLJIT_NUMBER_OF_SCRATCH_REGISTERS \
+       (SLJIT_NUMBER_OF_REGISTERS - SLJIT_NUMBER_OF_SAVED_REGISTERS)
+
+#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 6
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && (defined _WIN64)
+#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 1
+#else
+#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0
+#endif
+
+#define SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS \
+       (SLJIT_NUMBER_OF_FLOAT_REGISTERS - SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS)
+
+/*************************************/
+/* Debug and verbose related macros. */
+/*************************************/
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+#include <stdio.h>
+#endif
+
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+
+#if !defined(SLJIT_ASSERT) || !defined(SLJIT_ASSERT_STOP)
+
+/* SLJIT_HALT_PROCESS must halt the process. */
+#ifndef SLJIT_HALT_PROCESS
+#include <stdlib.h>
+
+#define SLJIT_HALT_PROCESS() \
+       abort();
+#endif /* !SLJIT_HALT_PROCESS */
+
+#include <stdio.h>
+
+#endif /* !SLJIT_ASSERT || !SLJIT_ASSERT_STOP */
+
+/* Feel free to redefine these two macros. */
+#ifndef SLJIT_ASSERT
+
+#define SLJIT_ASSERT(x) \
+       do { \
+               if (SLJIT_UNLIKELY(!(x))) { \
+                       printf("Assertion failed at " __FILE__ ":%d\n", __LINE__); \
+                       SLJIT_HALT_PROCESS(); \
+               } \
+       } while (0)
+
+#endif /* !SLJIT_ASSERT */
+
+#ifndef SLJIT_ASSERT_STOP
+
+#define SLJIT_ASSERT_STOP() \
+       do { \
+               printf("Should never been reached " __FILE__ ":%d\n", __LINE__); \
+               SLJIT_HALT_PROCESS(); \
+       } while (0)
+
+#endif /* !SLJIT_ASSERT_STOP */
+
+#else /* (defined SLJIT_DEBUG && SLJIT_DEBUG) */
+
+/* Forcing empty, but valid statements. */
+#undef SLJIT_ASSERT
+#undef SLJIT_ASSERT_STOP
+
+#define SLJIT_ASSERT(x) \
+       do { } while (0)
+#define SLJIT_ASSERT_STOP() \
+       do { } while (0)
+
+#endif /* (defined SLJIT_DEBUG && SLJIT_DEBUG) */
+
+#ifndef SLJIT_COMPILE_ASSERT
+
+/* Should be improved eventually. */
+#define SLJIT_COMPILE_ASSERT(x, description) \
+       SLJIT_ASSERT(x)
+
+#endif /* !SLJIT_COMPILE_ASSERT */
+
+#endif
diff --git a/ext/pcre/pcrelib/sljit/sljitExecAllocator.c b/ext/pcre/pcrelib/sljit/sljitExecAllocator.c
new file mode 100644 (file)
index 0000000..f24ed33
--- /dev/null
@@ -0,0 +1,312 @@
+/*
+ *    Stack-less Just-In-Time compiler
+ *
+ *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice, this list of
+ *      conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *      of conditions and the following disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+   This file contains a simple executable memory allocator
+
+   It is assumed, that executable code blocks are usually medium (or sometimes
+   large) memory blocks, and the allocator is not too frequently called (less
+   optimized than other allocators). Thus, using it as a generic allocator is
+   not suggested.
+
+   How does it work:
+     Memory is allocated in continuous memory areas called chunks by alloc_chunk()
+     Chunk format:
+     [ block ][ block ] ... [ block ][ block terminator ]
+
+   All blocks and the block terminator is started with block_header. The block
+   header contains the size of the previous and the next block. These sizes
+   can also contain special values.
+     Block size:
+       0 - The block is a free_block, with a different size member.
+       1 - The block is a block terminator.
+       n - The block is used at the moment, and the value contains its size.
+     Previous block size:
+       0 - This is the first block of the memory chunk.
+       n - The size of the previous block.
+
+   Using these size values we can go forward or backward on the block chain.
+   The unused blocks are stored in a chain list pointed by free_blocks. This
+   list is useful if we need to find a suitable memory area when the allocator
+   is called.
+
+   When a block is freed, the new free block is connected to its adjacent free
+   blocks if possible.
+
+     [ free block ][ used block ][ free block ]
+   and "used block" is freed, the three blocks are connected together:
+     [           one big free block           ]
+*/
+
+/* --------------------------------------------------------------------- */
+/*  System (OS) functions                                                */
+/* --------------------------------------------------------------------- */
+
+/* 64 KByte. */
+#define CHUNK_SIZE     0x10000
+
+/*
+   alloc_chunk / free_chunk :
+     * allocate executable system memory chunks
+     * the size is always divisible by CHUNK_SIZE
+   allocator_grab_lock / allocator_release_lock :
+     * make the allocator thread safe
+     * can be empty if the OS (or the application) does not support threading
+     * only the allocator requires this lock, sljit is fully thread safe
+       as it only uses local variables
+*/
+
+#ifdef _WIN32
+
+static SLJIT_INLINE void* alloc_chunk(sljit_uw size)
+{
+       return VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
+}
+
+static SLJIT_INLINE void free_chunk(void* chunk, sljit_uw size)
+{
+       SLJIT_UNUSED_ARG(size);
+       VirtualFree(chunk, 0, MEM_RELEASE);
+}
+
+#else
+
+static SLJIT_INLINE void* alloc_chunk(sljit_uw size)
+{
+       void* retval;
+
+#ifdef MAP_ANON
+       retval = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0);
+#else
+       if (dev_zero < 0) {
+               if (open_dev_zero())
+                       return NULL;
+       }
+       retval = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE, dev_zero, 0);
+#endif
+
+       return (retval != MAP_FAILED) ? retval : NULL;
+}
+
+static SLJIT_INLINE void free_chunk(void* chunk, sljit_uw size)
+{
+       munmap(chunk, size);
+}
+
+#endif
+
+/* --------------------------------------------------------------------- */
+/*  Common functions                                                     */
+/* --------------------------------------------------------------------- */
+
+#define CHUNK_MASK     (~(CHUNK_SIZE - 1))
+
+struct block_header {
+       sljit_uw size;
+       sljit_uw prev_size;
+};
+
+struct free_block {
+       struct block_header header;
+       struct free_block *next;
+       struct free_block *prev;
+       sljit_uw size;
+};
+
+#define AS_BLOCK_HEADER(base, offset) \
+       ((struct block_header*)(((sljit_ub*)base) + offset))
+#define AS_FREE_BLOCK(base, offset) \
+       ((struct free_block*)(((sljit_ub*)base) + offset))
+#define MEM_START(base)                ((void*)(((sljit_ub*)base) + sizeof(struct block_header)))
+#define ALIGN_SIZE(size)       (((size) + sizeof(struct block_header) + 7) & ~7)
+
+static struct free_block* free_blocks;
+static sljit_uw allocated_size;
+static sljit_uw total_size;
+
+static SLJIT_INLINE void sljit_insert_free_block(struct free_block *free_block, sljit_uw size)
+{
+       free_block->header.size = 0;
+       free_block->size = size;
+
+       free_block->next = free_blocks;
+       free_block->prev = 0;
+       if (free_blocks)
+               free_blocks->prev = free_block;
+       free_blocks = free_block;
+}
+
+static SLJIT_INLINE void sljit_remove_free_block(struct free_block *free_block)
+{
+       if (free_block->next)
+               free_block->next->prev = free_block->prev;
+
+       if (free_block->prev)
+               free_block->prev->next = free_block->next;
+       else {
+               SLJIT_ASSERT(free_blocks == free_block);
+               free_blocks = free_block->next;
+       }
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size)
+{
+       struct block_header *header;
+       struct block_header *next_header;
+       struct free_block *free_block;
+       sljit_uw chunk_size;
+
+       allocator_grab_lock();
+       if (size < sizeof(struct free_block))
+               size = sizeof(struct free_block);
+       size = ALIGN_SIZE(size);
+
+       free_block = free_blocks;
+       while (free_block) {
+               if (free_block->size >= size) {
+                       chunk_size = free_block->size;
+                       if (chunk_size > size + 64) {
+                               /* We just cut a block from the end of the free block. */
+                               chunk_size -= size;
+                               free_block->size = chunk_size;
+                               header = AS_BLOCK_HEADER(free_block, chunk_size);
+                               header->prev_size = chunk_size;
+                               AS_BLOCK_HEADER(header, size)->prev_size = size;
+                       }
+                       else {
+                               sljit_remove_free_block(free_block);
+                               header = (struct block_header*)free_block;
+                               size = chunk_size;
+                       }
+                       allocated_size += size;
+                       header->size = size;
+                       allocator_release_lock();
+                       return MEM_START(header);
+               }
+               free_block = free_block->next;
+       }
+
+       chunk_size = (size + sizeof(struct block_header) + CHUNK_SIZE - 1) & CHUNK_MASK;
+       header = (struct block_header*)alloc_chunk(chunk_size);
+       if (!header) {
+               allocator_release_lock();
+               return NULL;
+       }
+
+       chunk_size -= sizeof(struct block_header);
+       total_size += chunk_size;
+
+       header->prev_size = 0;
+       if (chunk_size > size + 64) {
+               /* Cut the allocated space into a free and a used block. */
+               allocated_size += size;
+               header->size = size;
+               chunk_size -= size;
+
+               free_block = AS_FREE_BLOCK(header, size);
+               free_block->header.prev_size = size;
+               sljit_insert_free_block(free_block, chunk_size);
+               next_header = AS_BLOCK_HEADER(free_block, chunk_size);
+       }
+       else {
+               /* All space belongs to this allocation. */
+               allocated_size += chunk_size;
+               header->size = chunk_size;
+               next_header = AS_BLOCK_HEADER(header, chunk_size);
+       }
+       next_header->size = 1;
+       next_header->prev_size = chunk_size;
+       allocator_release_lock();
+       return MEM_START(header);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr)
+{
+       struct block_header *header;
+       struct free_block* free_block;
+
+       allocator_grab_lock();
+       header = AS_BLOCK_HEADER(ptr, -(sljit_sw)sizeof(struct block_header));
+       allocated_size -= header->size;
+
+       /* Connecting free blocks together if possible. */
+
+       /* If header->prev_size == 0, free_block will equal to header.
+          In this case, free_block->header.size will be > 0. */
+       free_block = AS_FREE_BLOCK(header, -(sljit_sw)header->prev_size);
+       if (SLJIT_UNLIKELY(!free_block->header.size)) {
+               free_block->size += header->size;
+               header = AS_BLOCK_HEADER(free_block, free_block->size);
+               header->prev_size = free_block->size;
+       }
+       else {
+               free_block = (struct free_block*)header;
+               sljit_insert_free_block(free_block, header->size);
+       }
+
+       header = AS_BLOCK_HEADER(free_block, free_block->size);
+       if (SLJIT_UNLIKELY(!header->size)) {
+               free_block->size += ((struct free_block*)header)->size;
+               sljit_remove_free_block((struct free_block*)header);
+               header = AS_BLOCK_HEADER(free_block, free_block->size);
+               header->prev_size = free_block->size;
+       }
+
+       /* The whole chunk is free. */
+       if (SLJIT_UNLIKELY(!free_block->header.prev_size && header->size == 1)) {
+               /* If this block is freed, we still have (allocated_size / 2) free space. */
+               if (total_size - free_block->size > (allocated_size * 3 / 2)) {
+                       total_size -= free_block->size;
+                       sljit_remove_free_block(free_block);
+                       free_chunk(free_block, free_block->size + sizeof(struct block_header));
+               }
+       }
+
+       allocator_release_lock();
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void)
+{
+       struct free_block* free_block;
+       struct free_block* next_free_block;
+
+       allocator_grab_lock();
+
+       free_block = free_blocks;
+       while (free_block) {
+               next_free_block = free_block->next;
+               if (!free_block->header.prev_size && 
+                               AS_BLOCK_HEADER(free_block, free_block->size)->size == 1) {
+                       total_size -= free_block->size;
+                       sljit_remove_free_block(free_block);
+                       free_chunk(free_block, free_block->size + sizeof(struct block_header));
+               }
+               free_block = next_free_block;
+       }
+
+       SLJIT_ASSERT((total_size && free_blocks) || (!total_size && !free_blocks));
+       allocator_release_lock();
+}
diff --git a/ext/pcre/pcrelib/sljit/sljitLir.c b/ext/pcre/pcrelib/sljit/sljitLir.c
new file mode 100644 (file)
index 0000000..5039a7e
--- /dev/null
@@ -0,0 +1,2025 @@
+/*
+ *    Stack-less Just-In-Time compiler
+ *
+ *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice, this list of
+ *      conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *      of conditions and the following disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "sljitLir.h"
+
+#define CHECK_ERROR() \
+       do { \
+               if (SLJIT_UNLIKELY(compiler->error)) \
+                       return compiler->error; \
+       } while (0)
+
+#define CHECK_ERROR_PTR() \
+       do { \
+               if (SLJIT_UNLIKELY(compiler->error)) \
+                       return NULL; \
+       } while (0)
+
+#define FAIL_IF(expr) \
+       do { \
+               if (SLJIT_UNLIKELY(expr)) \
+                       return compiler->error; \
+       } while (0)
+
+#define PTR_FAIL_IF(expr) \
+       do { \
+               if (SLJIT_UNLIKELY(expr)) \
+                       return NULL; \
+       } while (0)
+
+#define FAIL_IF_NULL(ptr) \
+       do { \
+               if (SLJIT_UNLIKELY(!(ptr))) { \
+                       compiler->error = SLJIT_ERR_ALLOC_FAILED; \
+                       return SLJIT_ERR_ALLOC_FAILED; \
+               } \
+       } while (0)
+
+#define PTR_FAIL_IF_NULL(ptr) \
+       do { \
+               if (SLJIT_UNLIKELY(!(ptr))) { \
+                       compiler->error = SLJIT_ERR_ALLOC_FAILED; \
+                       return NULL; \
+               } \
+       } while (0)
+
+#define PTR_FAIL_WITH_EXEC_IF(ptr) \
+       do { \
+               if (SLJIT_UNLIKELY(!(ptr))) { \
+                       compiler->error = SLJIT_ERR_EX_ALLOC_FAILED; \
+                       return NULL; \
+               } \
+       } while (0)
+
+#if !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
+
+#define GET_OPCODE(op) \
+       ((op) & ~(SLJIT_INT_OP | SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))
+
+#define GET_FLAGS(op) \
+       ((op) & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C))
+
+#define GET_ALL_FLAGS(op) \
+       ((op) & (SLJIT_INT_OP | SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))
+
+#define TYPE_CAST_NEEDED(op) \
+       (((op) >= SLJIT_MOV_UB && (op) <= SLJIT_MOV_SH) || ((op) >= SLJIT_MOVU_UB && (op) <= SLJIT_MOVU_SH))
+
+#define BUF_SIZE       4096
+
+#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE)
+#define ABUF_SIZE      2048
+#else
+#define ABUF_SIZE      4096
+#endif
+
+/* Parameter parsing. */
+#define REG_MASK               0x3f
+#define OFFS_REG(reg)          (((reg) >> 8) & REG_MASK)
+#define OFFS_REG_MASK          (REG_MASK << 8)
+#define TO_OFFS_REG(reg)       ((reg) << 8)
+/* When reg cannot be unused. */
+#define FAST_IS_REG(reg)       ((reg) <= REG_MASK)
+/* When reg can be unused. */
+#define SLOW_IS_REG(reg)       ((reg) > 0 && (reg) <= REG_MASK)
+
+/* Jump flags. */
+#define JUMP_LABEL     0x1
+#define JUMP_ADDR      0x2
+/* SLJIT_REWRITABLE_JUMP is 0x1000. */
+
+#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
+#      define PATCH_MB 0x4
+#      define PATCH_MW 0x8
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+#      define PATCH_MD 0x10
+#endif
+#endif
+
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
+#      define IS_BL            0x4
+#      define PATCH_B          0x8
+#endif
+
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+#      define CPOOL_SIZE       512
+#endif
+
+#if (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2)
+#      define IS_COND          0x04
+#      define IS_BL            0x08
+       /* conditional + imm8 */
+#      define PATCH_TYPE1      0x10
+       /* conditional + imm20 */
+#      define PATCH_TYPE2      0x20
+       /* IT + imm24 */
+#      define PATCH_TYPE3      0x30
+       /* imm11 */
+#      define PATCH_TYPE4      0x40
+       /* imm24 */
+#      define PATCH_TYPE5      0x50
+       /* BL + imm24 */
+#      define PATCH_BL         0x60
+       /* 0xf00 cc code for branches */
+#endif
+
+#if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64)
+#      define IS_COND          0x004
+#      define IS_CBZ           0x008
+#      define IS_BL            0x010
+#      define PATCH_B          0x020
+#      define PATCH_COND       0x040
+#      define PATCH_ABS48      0x080
+#      define PATCH_ABS64      0x100
+#endif
+
+#if (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC)
+#      define IS_COND          0x004
+#      define IS_CALL          0x008
+#      define PATCH_B          0x010
+#      define PATCH_ABS_B      0x020
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+#      define PATCH_ABS32      0x040
+#      define PATCH_ABS48      0x080
+#endif
+#      define REMOVE_COND      0x100
+#endif
+
+#if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS)
+#      define IS_MOVABLE       0x004
+#      define IS_JAL           0x008
+#      define IS_CALL          0x010
+#      define IS_BIT26_COND    0x020
+#      define IS_BIT16_COND    0x040
+
+#      define IS_COND          (IS_BIT26_COND | IS_BIT16_COND)
+
+#      define PATCH_B          0x080
+#      define PATCH_J          0x100
+
+#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+#      define PATCH_ABS32      0x200
+#      define PATCH_ABS48      0x400
+#endif
+
+       /* instruction types */
+#      define MOVABLE_INS      0
+       /* 1 - 31 last destination register */
+       /* no destination (i.e: store) */
+#      define UNMOVABLE_INS    32
+       /* FPU status register */
+#      define FCSR_FCC         33
+#endif
+
+#if (defined SLJIT_CONFIG_TILEGX && SLJIT_CONFIG_TILEGX)
+#      define IS_JAL           0x04
+#      define IS_COND          0x08
+
+#      define PATCH_B          0x10
+#      define PATCH_J          0x20
+#endif
+
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+#      define IS_MOVABLE       0x04
+#      define IS_COND          0x08
+#      define IS_CALL          0x10
+
+#      define PATCH_B          0x20
+#      define PATCH_CALL       0x40
+
+       /* instruction types */
+#      define MOVABLE_INS      0
+       /* 1 - 31 last destination register */
+       /* no destination (i.e: store) */
+#      define UNMOVABLE_INS    32
+
+#      define DST_INS_MASK     0xff
+
+       /* ICC_SET is the same as SET_FLAGS. */
+#      define ICC_IS_SET       (1 << 23)
+#      define FCC_IS_SET       (1 << 24)
+#endif
+
+/* Stack management. */
+
+#define GET_SAVED_REGISTERS_SIZE(scratches, saveds, extra) \
+       (((scratches < SLJIT_NUMBER_OF_SCRATCH_REGISTERS ? 0 : (scratches - SLJIT_NUMBER_OF_SCRATCH_REGISTERS)) + \
+               (saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? saveds : SLJIT_NUMBER_OF_SAVED_REGISTERS) + \
+               extra) * sizeof(sljit_sw))
+
+#define ADJUST_LOCAL_OFFSET(p, i) \
+       if ((p) == (SLJIT_MEM1(SLJIT_SP))) \
+               (i) += SLJIT_LOCALS_OFFSET;
+
+#endif /* !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) */
+
+/* Utils can still be used even if SLJIT_CONFIG_UNSUPPORTED is set. */
+#include "sljitUtils.c"
+
+#if !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
+
+#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR)
+#include "sljitExecAllocator.c"
+#endif
+
+/* Argument checking features. */
+
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+
+/* Returns with error when an invalid argument is passed. */
+
+#define CHECK_ARGUMENT(x) \
+       do { \
+               if (SLJIT_UNLIKELY(!(x))) \
+                       return 1; \
+       } while (0)
+
+#define CHECK_RETURN_TYPE sljit_si
+#define CHECK_RETURN_OK return 0
+
+#define CHECK(x) \
+       do { \
+               if (SLJIT_UNLIKELY(x)) { \
+                       compiler->error = SLJIT_ERR_BAD_ARGUMENT; \
+                       return SLJIT_ERR_BAD_ARGUMENT; \
+               } \
+       } while (0)
+
+#define CHECK_PTR(x) \
+       do { \
+               if (SLJIT_UNLIKELY(x)) { \
+                       compiler->error = SLJIT_ERR_BAD_ARGUMENT; \
+                       return NULL; \
+               } \
+       } while (0)
+
+#define CHECK_REG_INDEX(x) \
+       do { \
+               if (SLJIT_UNLIKELY(x)) { \
+                       return -2; \
+               } \
+       } while (0)
+
+#elif (defined SLJIT_DEBUG && SLJIT_DEBUG)
+
+/* Assertion failure occures if an invalid argument is passed. */
+#undef SLJIT_ARGUMENT_CHECKS
+#define SLJIT_ARGUMENT_CHECKS 1
+
+#define CHECK_ARGUMENT(x) SLJIT_ASSERT(x)
+#define CHECK_RETURN_TYPE void
+#define CHECK_RETURN_OK return
+#define CHECK(x) x
+#define CHECK_PTR(x) x
+#define CHECK_REG_INDEX(x) x
+
+#elif (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+
+/* Arguments are not checked. */
+#define CHECK_RETURN_TYPE void
+#define CHECK_RETURN_OK return
+#define CHECK(x) x
+#define CHECK_PTR(x) x
+#define CHECK_REG_INDEX(x) x
+
+#else
+
+/* Arguments are not checked. */
+#define CHECK(x)
+#define CHECK_PTR(x)
+#define CHECK_REG_INDEX(x)
+
+#endif /* SLJIT_ARGUMENT_CHECKS */
+
+/* --------------------------------------------------------------------- */
+/*  Public functions                                                     */
+/* --------------------------------------------------------------------- */
+
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
+#define SLJIT_NEEDS_COMPILER_INIT 1
+static sljit_si compiler_initialized = 0;
+/* A thread safe initialization. */
+static void init_compiler(void);
+#endif
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allocator_data)
+{
+       struct sljit_compiler *compiler = (struct sljit_compiler*)SLJIT_MALLOC(sizeof(struct sljit_compiler), allocator_data);
+       if (!compiler)
+               return NULL;
+       SLJIT_ZEROMEM(compiler, sizeof(struct sljit_compiler));
+
+       SLJIT_COMPILE_ASSERT(
+               sizeof(sljit_sb) == 1 && sizeof(sljit_ub) == 1
+               && sizeof(sljit_sh) == 2 && sizeof(sljit_uh) == 2
+               && sizeof(sljit_si) == 4 && sizeof(sljit_ui) == 4
+               && (sizeof(sljit_p) == 4 || sizeof(sljit_p) == 8)
+               && sizeof(sljit_p) <= sizeof(sljit_sw)
+               && (sizeof(sljit_sw) == 4 || sizeof(sljit_sw) == 8)
+               && (sizeof(sljit_uw) == 4 || sizeof(sljit_uw) == 8),
+               invalid_integer_types);
+       SLJIT_COMPILE_ASSERT(SLJIT_INT_OP == SLJIT_SINGLE_OP,
+               int_op_and_single_op_must_be_the_same);
+       SLJIT_COMPILE_ASSERT(SLJIT_REWRITABLE_JUMP != SLJIT_SINGLE_OP,
+               rewritable_jump_and_single_op_must_not_be_the_same);
+
+       /* Only the non-zero members must be set. */
+       compiler->error = SLJIT_SUCCESS;
+
+       compiler->allocator_data = allocator_data;
+       compiler->buf = (struct sljit_memory_fragment*)SLJIT_MALLOC(BUF_SIZE, allocator_data);
+       compiler->abuf = (struct sljit_memory_fragment*)SLJIT_MALLOC(ABUF_SIZE, allocator_data);
+
+       if (!compiler->buf || !compiler->abuf) {
+               if (compiler->buf)
+                       SLJIT_FREE(compiler->buf, allocator_data);
+               if (compiler->abuf)
+                       SLJIT_FREE(compiler->abuf, allocator_data);
+               SLJIT_FREE(compiler, allocator_data);
+               return NULL;
+       }
+
+       compiler->buf->next = NULL;
+       compiler->buf->used_size = 0;
+       compiler->abuf->next = NULL;
+       compiler->abuf->used_size = 0;
+
+       compiler->scratches = -1;
+       compiler->saveds = -1;
+       compiler->fscratches = -1;
+       compiler->fsaveds = -1;
+       compiler->local_size = -1;
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+       compiler->args = -1;
+#endif
+
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+       compiler->cpool = (sljit_uw*)SLJIT_MALLOC(CPOOL_SIZE * sizeof(sljit_uw)
+               + CPOOL_SIZE * sizeof(sljit_ub), allocator_data);
+       if (!compiler->cpool) {
+               SLJIT_FREE(compiler->buf, allocator_data);
+               SLJIT_FREE(compiler->abuf, allocator_data);
+               SLJIT_FREE(compiler, allocator_data);
+               return NULL;
+       }
+       compiler->cpool_unique = (sljit_ub*)(compiler->cpool + CPOOL_SIZE);
+       compiler->cpool_diff = 0xffffffff;
+#endif
+
+#if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS)
+       compiler->delay_slot = UNMOVABLE_INS;
+#endif
+
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+       compiler->delay_slot = UNMOVABLE_INS;
+#endif
+
+#if (defined SLJIT_NEEDS_COMPILER_INIT && SLJIT_NEEDS_COMPILER_INIT)
+       if (!compiler_initialized) {
+               init_compiler();
+               compiler_initialized = 1;
+       }
+#endif
+
+       return compiler;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_free_compiler(struct sljit_compiler *compiler)
+{
+       struct sljit_memory_fragment *buf;
+       struct sljit_memory_fragment *curr;
+       void *allocator_data = compiler->allocator_data;
+       SLJIT_UNUSED_ARG(allocator_data);
+
+       buf = compiler->buf;
+       while (buf) {
+               curr = buf;
+               buf = buf->next;
+               SLJIT_FREE(curr, allocator_data);
+       }
+
+       buf = compiler->abuf;
+       while (buf) {
+               curr = buf;
+               buf = buf->next;
+               SLJIT_FREE(curr, allocator_data);
+       }
+
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+       SLJIT_FREE(compiler->cpool, allocator_data);
+#endif
+       SLJIT_FREE(compiler, allocator_data);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_compiler_memory_error(struct sljit_compiler *compiler)
+{
+       if (compiler->error == SLJIT_SUCCESS)
+               compiler->error = SLJIT_ERR_ALLOC_FAILED;
+}
+
+#if (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code)
+{
+       /* Remove thumb mode flag. */
+       SLJIT_FREE_EXEC((void*)((sljit_uw)code & ~0x1));
+}
+#elif (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code)
+{
+       /* Resolve indirection. */
+       code = (void*)(*(sljit_uw*)code);
+       SLJIT_FREE_EXEC(code);
+}
+#else
+SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code)
+{
+       SLJIT_FREE_EXEC(code);
+}
+#endif
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_label(struct sljit_jump *jump, struct sljit_label* label)
+{
+       if (SLJIT_LIKELY(!!jump) && SLJIT_LIKELY(!!label)) {
+               jump->flags &= ~JUMP_ADDR;
+               jump->flags |= JUMP_LABEL;
+               jump->u.label = label;
+       }
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw target)
+{
+       if (SLJIT_LIKELY(!!jump)) {
+               jump->flags &= ~JUMP_LABEL;
+               jump->flags |= JUMP_ADDR;
+               jump->u.target = target;
+       }
+}
+
+/* --------------------------------------------------------------------- */
+/*  Private functions                                                    */
+/* --------------------------------------------------------------------- */
+
+static void* ensure_buf(struct sljit_compiler *compiler, sljit_uw size)
+{
+       sljit_ub *ret;
+       struct sljit_memory_fragment *new_frag;
+
+       SLJIT_ASSERT(size <= 256);
+       if (compiler->buf->used_size + size <= (BUF_SIZE - (sljit_uw)SLJIT_OFFSETOF(struct sljit_memory_fragment, memory))) {
+               ret = compiler->buf->memory + compiler->buf->used_size;
+               compiler->buf->used_size += size;
+               return ret;
+       }
+       new_frag = (struct sljit_memory_fragment*)SLJIT_MALLOC(BUF_SIZE, compiler->allocator_data);
+       PTR_FAIL_IF_NULL(new_frag);
+       new_frag->next = compiler->buf;
+       compiler->buf = new_frag;
+       new_frag->used_size = size;
+       return new_frag->memory;
+}
+
+static void* ensure_abuf(struct sljit_compiler *compiler, sljit_uw size)
+{
+       sljit_ub *ret;
+       struct sljit_memory_fragment *new_frag;
+
+       SLJIT_ASSERT(size <= 256);
+       if (compiler->abuf->used_size + size <= (ABUF_SIZE - (sljit_uw)SLJIT_OFFSETOF(struct sljit_memory_fragment, memory))) {
+               ret = compiler->abuf->memory + compiler->abuf->used_size;
+               compiler->abuf->used_size += size;
+               return ret;
+       }
+       new_frag = (struct sljit_memory_fragment*)SLJIT_MALLOC(ABUF_SIZE, compiler->allocator_data);
+       PTR_FAIL_IF_NULL(new_frag);
+       new_frag->next = compiler->abuf;
+       compiler->abuf = new_frag;
+       new_frag->used_size = size;
+       return new_frag->memory;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void* sljit_alloc_memory(struct sljit_compiler *compiler, sljit_si size)
+{
+       CHECK_ERROR_PTR();
+
+#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE)
+       if (size <= 0 || size > 128)
+               return NULL;
+       size = (size + 7) & ~7;
+#else
+       if (size <= 0 || size > 64)
+               return NULL;
+       size = (size + 3) & ~3;
+#endif
+       return ensure_abuf(compiler, size);
+}
+
+static SLJIT_INLINE void reverse_buf(struct sljit_compiler *compiler)
+{
+       struct sljit_memory_fragment *buf = compiler->buf;
+       struct sljit_memory_fragment *prev = NULL;
+       struct sljit_memory_fragment *tmp;
+
+       do {
+               tmp = buf->next;
+               buf->next = prev;
+               prev = buf;
+               buf = tmp;
+       } while (buf != NULL);
+
+       compiler->buf = prev;
+}
+
+static SLJIT_INLINE void set_emit_enter(struct sljit_compiler *compiler,
+       sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds,
+       sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
+{
+       SLJIT_UNUSED_ARG(args);
+       SLJIT_UNUSED_ARG(local_size);
+
+       compiler->options = options;
+       compiler->scratches = scratches;
+       compiler->saveds = saveds;
+       compiler->fscratches = fscratches;
+       compiler->fsaveds = fsaveds;
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+       compiler->logical_local_size = local_size;
+#endif
+}
+
+static SLJIT_INLINE void set_set_context(struct sljit_compiler *compiler,
+       sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds,
+       sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
+{
+       SLJIT_UNUSED_ARG(args);
+       SLJIT_UNUSED_ARG(local_size);
+
+       compiler->options = options;
+       compiler->scratches = scratches;
+       compiler->saveds = saveds;
+       compiler->fscratches = fscratches;
+       compiler->fsaveds = fsaveds;
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+       compiler->logical_local_size = local_size;
+#endif
+}
+
+static SLJIT_INLINE void set_label(struct sljit_label *label, struct sljit_compiler *compiler)
+{
+       label->next = NULL;
+       label->size = compiler->size;
+       if (compiler->last_label)
+               compiler->last_label->next = label;
+       else
+               compiler->labels = label;
+       compiler->last_label = label;
+}
+
+static SLJIT_INLINE void set_jump(struct sljit_jump *jump, struct sljit_compiler *compiler, sljit_si flags)
+{
+       jump->next = NULL;
+       jump->flags = flags;
+       if (compiler->last_jump)
+               compiler->last_jump->next = jump;
+       else
+               compiler->jumps = jump;
+       compiler->last_jump = jump;
+}
+
+static SLJIT_INLINE void set_const(struct sljit_const *const_, struct sljit_compiler *compiler)
+{
+       const_->next = NULL;
+       const_->addr = compiler->size;
+       if (compiler->last_const)
+               compiler->last_const->next = const_;
+       else
+               compiler->consts = const_;
+       compiler->last_const = const_;
+}
+
+#define ADDRESSING_DEPENDS_ON(exp, reg) \
+       (((exp) & SLJIT_MEM) && (((exp) & REG_MASK) == reg || OFFS_REG(exp) == reg))
+
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+#define FUNCTION_CHECK_OP() \
+       CHECK_ARGUMENT(!GET_FLAGS(op) || !(op & SLJIT_KEEP_FLAGS)); \
+       switch (GET_OPCODE(op)) { \
+       case SLJIT_NOT: \
+       case SLJIT_CLZ: \
+       case SLJIT_AND: \
+       case SLJIT_OR: \
+       case SLJIT_XOR: \
+       case SLJIT_SHL: \
+       case SLJIT_LSHR: \
+       case SLJIT_ASHR: \
+               CHECK_ARGUMENT(!(op & (SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C))); \
+               break; \
+       case SLJIT_NEG: \
+               CHECK_ARGUMENT(!(op & (SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_C))); \
+               break; \
+       case SLJIT_MUL: \
+               CHECK_ARGUMENT(!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_C))); \
+               break; \
+       case SLJIT_ADD: \
+               CHECK_ARGUMENT(!(op & (SLJIT_SET_U | SLJIT_SET_S))); \
+               break; \
+       case SLJIT_SUB: \
+               break; \
+       case SLJIT_ADDC: \
+       case SLJIT_SUBC: \
+               CHECK_ARGUMENT(!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O))); \
+               break; \
+       case SLJIT_BREAKPOINT: \
+       case SLJIT_NOP: \
+       case SLJIT_LUMUL: \
+       case SLJIT_LSMUL: \
+       case SLJIT_MOV: \
+       case SLJIT_MOV_UI: \
+       case SLJIT_MOV_P: \
+       case SLJIT_MOVU: \
+       case SLJIT_MOVU_UI: \
+       case SLJIT_MOVU_P: \
+               /* Nothing allowed */ \
+               CHECK_ARGUMENT(!(op & (SLJIT_INT_OP | SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))); \
+               break; \
+       default: \
+               /* Only SLJIT_INT_OP or SLJIT_SINGLE_OP is allowed. */ \
+               CHECK_ARGUMENT(!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))); \
+               break; \
+       }
+
+#define FUNCTION_CHECK_FOP() \
+       CHECK_ARGUMENT(!GET_FLAGS(op) || !(op & SLJIT_KEEP_FLAGS)); \
+       switch (GET_OPCODE(op)) { \
+       case SLJIT_DCMP: \
+               CHECK_ARGUMENT(!(op & (SLJIT_SET_U | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))); \
+               CHECK_ARGUMENT((op & (SLJIT_SET_E | SLJIT_SET_S))); \
+               break; \
+       default: \
+               /* Only SLJIT_INT_OP or SLJIT_SINGLE_OP is allowed. */ \
+               CHECK_ARGUMENT(!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))); \
+               break; \
+       }
+
+#define FUNCTION_CHECK_IS_REG(r) \
+       (((r) >= SLJIT_R0 && (r) < (SLJIT_R0 + compiler->scratches)) || \
+       ((r) > (SLJIT_S0 - compiler->saveds) && (r) <= SLJIT_S0))
+
+#define FUNCTION_CHECK_IS_REG_OR_UNUSED(r) \
+       ((r) == SLJIT_UNUSED || \
+       ((r) >= SLJIT_R0 && (r) < (SLJIT_R0 + compiler->scratches)) || \
+       ((r) > (SLJIT_S0 - compiler->saveds) && (r) <= SLJIT_S0))
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+#define CHECK_NOT_VIRTUAL_REGISTER(p) \
+       CHECK_ARGUMENT((p) < SLJIT_R3 || (p) > SLJIT_R6);
+#else
+#define CHECK_NOT_VIRTUAL_REGISTER(p)
+#endif
+
+#define FUNCTION_CHECK_SRC(p, i) \
+       CHECK_ARGUMENT(compiler->scratches != -1 && compiler->saveds != -1); \
+       if (FUNCTION_CHECK_IS_REG(p)) \
+               CHECK_ARGUMENT((i) == 0); \
+       else if ((p) == SLJIT_IMM) \
+               ; \
+       else if ((p) == (SLJIT_MEM1(SLJIT_SP))) \
+               CHECK_ARGUMENT((i) >= 0 && (i) < compiler->logical_local_size); \
+       else { \
+               CHECK_ARGUMENT((p) & SLJIT_MEM); \
+               CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG_OR_UNUSED((p) & REG_MASK)); \
+               CHECK_NOT_VIRTUAL_REGISTER((p) & REG_MASK); \
+               if ((p) & OFFS_REG_MASK) { \
+                       CHECK_ARGUMENT(((p) & REG_MASK) != SLJIT_UNUSED); \
+                       CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(OFFS_REG(p))); \
+                       CHECK_NOT_VIRTUAL_REGISTER(OFFS_REG(p)); \
+                       CHECK_ARGUMENT(!((i) & ~0x3)); \
+               } \
+               CHECK_ARGUMENT(!((p) & ~(SLJIT_MEM | SLJIT_IMM | REG_MASK | OFFS_REG_MASK))); \
+       }
+
+#define FUNCTION_CHECK_DST(p, i) \
+       CHECK_ARGUMENT(compiler->scratches != -1 && compiler->saveds != -1); \
+       if (FUNCTION_CHECK_IS_REG_OR_UNUSED(p)) \
+               CHECK_ARGUMENT((i) == 0); \
+       else if ((p) == (SLJIT_MEM1(SLJIT_SP))) \
+               CHECK_ARGUMENT((i) >= 0 && (i) < compiler->logical_local_size); \
+       else { \
+               CHECK_ARGUMENT((p) & SLJIT_MEM); \
+               CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG_OR_UNUSED((p) & REG_MASK)); \
+               CHECK_NOT_VIRTUAL_REGISTER((p) & REG_MASK); \
+               if ((p) & OFFS_REG_MASK) { \
+                       CHECK_ARGUMENT(((p) & REG_MASK) != SLJIT_UNUSED); \
+                       CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(OFFS_REG(p))); \
+                       CHECK_NOT_VIRTUAL_REGISTER(OFFS_REG(p)); \
+                       CHECK_ARGUMENT(!((i) & ~0x3)); \
+               } \
+               CHECK_ARGUMENT(!((p) & ~(SLJIT_MEM | SLJIT_IMM | REG_MASK | OFFS_REG_MASK))); \
+       }
+
+#define FUNCTION_FCHECK(p, i) \
+       CHECK_ARGUMENT(compiler->fscratches != -1 && compiler->fsaveds != -1); \
+       if (((p) >= SLJIT_FR0 && (p) < (SLJIT_FR0 + compiler->fscratches)) || \
+                       ((p) > (SLJIT_FS0 - compiler->fsaveds) && (p) <= SLJIT_FS0)) \
+               CHECK_ARGUMENT(i == 0); \
+       else if ((p) == (SLJIT_MEM1(SLJIT_SP))) \
+               CHECK_ARGUMENT((i) >= 0 && (i) < compiler->logical_local_size); \
+       else { \
+               CHECK_ARGUMENT((p) & SLJIT_MEM); \
+               CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG_OR_UNUSED((p) & REG_MASK)); \
+               CHECK_NOT_VIRTUAL_REGISTER((p) & REG_MASK); \
+               if ((p) & OFFS_REG_MASK) { \
+                       CHECK_ARGUMENT(((p) & REG_MASK) != SLJIT_UNUSED); \
+                       CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(OFFS_REG(p))); \
+                       CHECK_NOT_VIRTUAL_REGISTER(OFFS_REG(p)); \
+                       CHECK_ARGUMENT(((p) & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_SP) && !(i & ~0x3)); \
+               } \
+               CHECK_ARGUMENT(!((p) & ~(SLJIT_MEM | SLJIT_IMM | REG_MASK | OFFS_REG_MASK))); \
+       }
+
+#define FUNCTION_CHECK_OP1() \
+       if (GET_OPCODE(op) >= SLJIT_MOVU && GET_OPCODE(op) <= SLJIT_MOVU_P) { \
+               CHECK_ARGUMENT(!(src & SLJIT_MEM) || (src & REG_MASK) != SLJIT_SP); \
+               CHECK_ARGUMENT(!(dst & SLJIT_MEM) || (dst & REG_MASK) != SLJIT_SP); \
+               if ((src & SLJIT_MEM) && (src & REG_MASK)) \
+                       CHECK_ARGUMENT((dst & REG_MASK) != (src & REG_MASK) && OFFS_REG(dst) != (src & REG_MASK)); \
+       }
+
+#endif /* SLJIT_ARGUMENT_CHECKS */
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_compiler_verbose(struct sljit_compiler *compiler, FILE* verbose)
+{
+       compiler->verbose = verbose;
+}
+
+#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE)
+#ifdef _WIN64
+#      define SLJIT_PRINT_D    "I64"
+#else
+#      define SLJIT_PRINT_D    "l"
+#endif
+#else
+#      define SLJIT_PRINT_D    ""
+#endif
+
+#define sljit_verbose_reg(compiler, r) \
+       do { \
+               if ((r) < (SLJIT_R0 + compiler->scratches)) \
+                       fprintf(compiler->verbose, "r%d", (r) - SLJIT_R0); \
+               else \
+                       fprintf(compiler->verbose, "s%d", SLJIT_NUMBER_OF_REGISTERS - (r)); \
+       } while (0)
+
+#define sljit_verbose_param(compiler, p, i) \
+       if ((p) & SLJIT_IMM) \
+               fprintf(compiler->verbose, "#%" SLJIT_PRINT_D "d", (i)); \
+       else if ((p) & SLJIT_MEM) { \
+               if ((p) & REG_MASK) { \
+                       fputc('[', compiler->verbose); \
+                       sljit_verbose_reg(compiler, (p) & REG_MASK); \
+                       if ((p) & OFFS_REG_MASK) { \
+                               fprintf(compiler->verbose, " + "); \
+                               sljit_verbose_reg(compiler, OFFS_REG(p)); \
+                               if (i) \
+                                       fprintf(compiler->verbose, " * %d", 1 << (i)); \
+                       } \
+                       else if (i) \
+                               fprintf(compiler->verbose, " + %" SLJIT_PRINT_D "d", (i)); \
+                       fputc(']', compiler->verbose); \
+               } \
+               else \
+                       fprintf(compiler->verbose, "[#%" SLJIT_PRINT_D "d]", (i)); \
+       } else if (p) \
+               sljit_verbose_reg(compiler, p); \
+       else \
+               fprintf(compiler->verbose, "unused");
+
+#define sljit_verbose_fparam(compiler, p, i) \
+       if ((p) & SLJIT_MEM) { \
+               if ((p) & REG_MASK) { \
+                       fputc('[', compiler->verbose); \
+                       sljit_verbose_reg(compiler, (p) & REG_MASK); \
+                       if ((p) & OFFS_REG_MASK) { \
+                               fprintf(compiler->verbose, " + "); \
+                               sljit_verbose_reg(compiler, OFFS_REG(p)); \
+                               if (i) \
+                                       fprintf(compiler->verbose, "%d", 1 << (i)); \
+                       } \
+                       else if (i) \
+                               fprintf(compiler->verbose, "%" SLJIT_PRINT_D "d", (i)); \
+                       fputc(']', compiler->verbose); \
+               } \
+               else \
+                       fprintf(compiler->verbose, "[#%" SLJIT_PRINT_D "d]", (i)); \
+       } \
+       else { \
+               if ((p) < (SLJIT_FR0 + compiler->fscratches)) \
+                       fprintf(compiler->verbose, "fr%d", (p) - SLJIT_FR0); \
+               else \
+                       fprintf(compiler->verbose, "fs%d", SLJIT_NUMBER_OF_FLOAT_REGISTERS - (p)); \
+       }
+
+static SLJIT_CONST char* op0_names[] = {
+       (char*)"breakpoint", (char*)"nop",
+       (char*)"lumul", (char*)"lsmul", (char*)"ludiv", (char*)"lsdiv",
+};
+
+static SLJIT_CONST char* op1_names[] = {
+       (char*)"mov", (char*)"mov_ub", (char*)"mov_sb", (char*)"mov_uh",
+       (char*)"mov_sh", (char*)"mov_ui", (char*)"mov_si", (char*)"mov_p",
+       (char*)"movu", (char*)"movu_ub", (char*)"movu_sb", (char*)"movu_uh",
+       (char*)"movu_sh", (char*)"movu_ui", (char*)"movu_si", (char*)"movu_p",
+       (char*)"not", (char*)"neg", (char*)"clz",
+};
+
+static SLJIT_CONST char* op2_names[] = {
+       (char*)"add", (char*)"addc", (char*)"sub", (char*)"subc",
+       (char*)"mul", (char*)"and", (char*)"or", (char*)"xor",
+       (char*)"shl", (char*)"lshr", (char*)"ashr",
+};
+
+static SLJIT_CONST char* fop1_names[] = {
+       (char*)"mov", (char*)"conv", (char*)"conv", (char*)"conv",
+       (char*)"conv", (char*)"conv", (char*)"cmp", (char*)"neg",
+       (char*)"abs",
+};
+
+static SLJIT_CONST char* fop2_names[] = {
+       (char*)"add", (char*)"sub", (char*)"mul", (char*)"div"
+};
+
+#define JUMP_PREFIX(type) \
+       ((type & 0xff) <= SLJIT_MUL_NOT_OVERFLOW ? ((type & SLJIT_INT_OP) ? "i_" : "") \
+       : ((type & 0xff) <= SLJIT_D_ORDERED ? ((type & SLJIT_SINGLE_OP) ? "s_" : "d_") : ""))
+
+static char* jump_names[] = {
+       (char*)"equal", (char*)"not_equal",
+       (char*)"less", (char*)"greater_equal",
+       (char*)"greater", (char*)"less_equal",
+       (char*)"sig_less", (char*)"sig_greater_equal",
+       (char*)"sig_greater", (char*)"sig_less_equal",
+       (char*)"overflow", (char*)"not_overflow",
+       (char*)"mul_overflow", (char*)"mul_not_overflow",
+       (char*)"equal", (char*)"not_equal",
+       (char*)"less", (char*)"greater_equal",
+       (char*)"greater", (char*)"less_equal",
+       (char*)"unordered", (char*)"ordered",
+       (char*)"jump", (char*)"fast_call",
+       (char*)"call0", (char*)"call1", (char*)"call2", (char*)"call3"
+};
+
+#endif /* SLJIT_VERBOSE */
+
+/* --------------------------------------------------------------------- */
+/*  Arch dependent                                                       */
+/* --------------------------------------------------------------------- */
+
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \
+       || (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_generate_code(struct sljit_compiler *compiler)
+{
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+       struct sljit_jump *jump;
+#endif
+
+       SLJIT_UNUSED_ARG(compiler);
+
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+       CHECK_ARGUMENT(compiler->size > 0);
+       jump = compiler->jumps;
+       while (jump) {
+               /* All jumps have target. */
+               CHECK_ARGUMENT(jump->flags & (JUMP_LABEL | JUMP_ADDR));
+               jump = jump->next;
+       }
+#endif
+       CHECK_RETURN_OK;
+}
+
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_enter(struct sljit_compiler *compiler,
+       sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds,
+       sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
+{
+       SLJIT_UNUSED_ARG(compiler);
+
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+       CHECK_ARGUMENT(!(options & ~SLJIT_DOUBLE_ALIGNMENT));
+       CHECK_ARGUMENT(args >= 0 && args <= 3);
+       CHECK_ARGUMENT(scratches >= 0 && scratches <= SLJIT_NUMBER_OF_REGISTERS);
+       CHECK_ARGUMENT(saveds >= 0 && saveds <= SLJIT_NUMBER_OF_REGISTERS);
+       CHECK_ARGUMENT(scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS);
+       CHECK_ARGUMENT(args <= saveds);
+       CHECK_ARGUMENT(fscratches >= 0 && fscratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS);
+       CHECK_ARGUMENT(fsaveds >= 0 && fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS);
+       CHECK_ARGUMENT(fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS);
+       CHECK_ARGUMENT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+       if (SLJIT_UNLIKELY(!!compiler->verbose))
+               fprintf(compiler->verbose, "  enter options:none args:%d scratches:%d saveds:%d fscratches:%d fsaveds:%d local_size:%d\n",
+                       args, scratches, saveds, fscratches, fsaveds, local_size);
+#endif
+       CHECK_RETURN_OK;
+}
+
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_set_context(struct sljit_compiler *compiler,
+       sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds,
+       sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
+{
+       if (SLJIT_UNLIKELY(compiler->skip_checks)) {
+               compiler->skip_checks = 0;
+               CHECK_RETURN_OK;
+       }
+
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+       CHECK_ARGUMENT(!(options & ~SLJIT_DOUBLE_ALIGNMENT));
+       CHECK_ARGUMENT(args >= 0 && args <= 3);
+       CHECK_ARGUMENT(scratches >= 0 && scratches <= SLJIT_NUMBER_OF_REGISTERS);
+       CHECK_ARGUMENT(saveds >= 0 && saveds <= SLJIT_NUMBER_OF_REGISTERS);
+       CHECK_ARGUMENT(scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS);
+       CHECK_ARGUMENT(args <= saveds);
+       CHECK_ARGUMENT(fscratches >= 0 && fscratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS);
+       CHECK_ARGUMENT(fsaveds >= 0 && fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS);
+       CHECK_ARGUMENT(fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS);
+       CHECK_ARGUMENT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+       if (SLJIT_UNLIKELY(!!compiler->verbose))
+               fprintf(compiler->verbose, "  set_context options:none args:%d scratches:%d saveds:%d fscratches:%d fsaveds:%d local_size:%d\n",
+                       args, scratches, saveds, fscratches, fsaveds, local_size);
+#endif
+       CHECK_RETURN_OK;
+}
+
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
+{
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+       CHECK_ARGUMENT(compiler->scratches >= 0);
+       if (op != SLJIT_UNUSED) {
+               CHECK_ARGUMENT(op >= SLJIT_MOV && op <= SLJIT_MOV_P);
+               FUNCTION_CHECK_SRC(src, srcw);
+       }
+       else
+               CHECK_ARGUMENT(src == 0 && srcw == 0);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+       if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+               if (op == SLJIT_UNUSED)
+                       fprintf(compiler->verbose, "  return\n");
+               else {
+                       fprintf(compiler->verbose, "  return.%s ", op1_names[op - SLJIT_OP1_BASE]);
+                       sljit_verbose_param(compiler, src, srcw);
+                       fprintf(compiler->verbose, "\n");
+               }
+       }
+#endif
+       CHECK_RETURN_OK;
+}
+
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw)
+{
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+       FUNCTION_CHECK_DST(dst, dstw);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+       if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+               fprintf(compiler->verbose, "  fast_enter ");
+               sljit_verbose_param(compiler, dst, dstw);
+               fprintf(compiler->verbose, "\n");
+       }
+#endif
+       CHECK_RETURN_OK;
+}
+
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw)
+{
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+       FUNCTION_CHECK_SRC(src, srcw);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+       if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+               fprintf(compiler->verbose, "  fast_return ");
+               sljit_verbose_param(compiler, src, srcw);
+               fprintf(compiler->verbose, "\n");
+       }
+#endif
+       CHECK_RETURN_OK;
+}
+
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op)
+{
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+       CHECK_ARGUMENT((op >= SLJIT_BREAKPOINT && op <= SLJIT_LSMUL)
+               || ((op & ~SLJIT_INT_OP) >= SLJIT_LUDIV && (op & ~SLJIT_INT_OP) <= SLJIT_LSDIV));
+       CHECK_ARGUMENT(op < SLJIT_LUMUL || compiler->scratches >= 2);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+       if (SLJIT_UNLIKELY(!!compiler->verbose))
+               fprintf(compiler->verbose, "  %s%s\n", !(op & SLJIT_INT_OP) ? "" : "i", op0_names[GET_OPCODE(op) - SLJIT_OP0_BASE]);
+#endif
+       CHECK_RETURN_OK;
+}
+
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       if (SLJIT_UNLIKELY(compiler->skip_checks)) {
+               compiler->skip_checks = 0;
+               CHECK_RETURN_OK;
+       }
+
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+       CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_CLZ);
+       FUNCTION_CHECK_OP();
+       FUNCTION_CHECK_SRC(src, srcw);
+       FUNCTION_CHECK_DST(dst, dstw);
+       FUNCTION_CHECK_OP1();
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+       if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+               fprintf(compiler->verbose, "  %s%s%s%s%s%s%s%s ", !(op & SLJIT_INT_OP) ? "" : "i", op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE],
+                       !(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_SET_U) ? "" : ".u", !(op & SLJIT_SET_S) ? "" : ".s",
+                       !(op & SLJIT_SET_O) ? "" : ".o", !(op & SLJIT_SET_C) ? "" : ".c", !(op & SLJIT_KEEP_FLAGS) ? "" : ".k");
+               sljit_verbose_param(compiler, dst, dstw);
+               fprintf(compiler->verbose, ", ");
+               sljit_verbose_param(compiler, src, srcw);
+               fprintf(compiler->verbose, "\n");
+       }
+#endif
+       CHECK_RETURN_OK;
+}
+
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       if (SLJIT_UNLIKELY(compiler->skip_checks)) {
+               compiler->skip_checks = 0;
+               CHECK_RETURN_OK;
+       }
+
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+       CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_ADD && GET_OPCODE(op) <= SLJIT_ASHR);
+       FUNCTION_CHECK_OP();
+       FUNCTION_CHECK_SRC(src1, src1w);
+       FUNCTION_CHECK_SRC(src2, src2w);
+       FUNCTION_CHECK_DST(dst, dstw);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+       if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+               fprintf(compiler->verbose, "  %s%s%s%s%s%s%s%s ", !(op & SLJIT_INT_OP) ? "" : "i", op2_names[GET_OPCODE(op) - SLJIT_OP2_BASE],
+                       !(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_SET_U) ? "" : ".u", !(op & SLJIT_SET_S) ? "" : ".s",
+                       !(op & SLJIT_SET_O) ? "" : ".o", !(op & SLJIT_SET_C) ? "" : ".c", !(op & SLJIT_KEEP_FLAGS) ? "" : ".k");
+               sljit_verbose_param(compiler, dst, dstw);
+               fprintf(compiler->verbose, ", ");
+               sljit_verbose_param(compiler, src1, src1w);
+               fprintf(compiler->verbose, ", ");
+               sljit_verbose_param(compiler, src2, src2w);
+               fprintf(compiler->verbose, "\n");
+       }
+#endif
+       CHECK_RETURN_OK;
+}
+
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_register_index(sljit_si reg)
+{
+       SLJIT_UNUSED_ARG(reg);
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+       CHECK_ARGUMENT(reg > 0 && reg <= SLJIT_NUMBER_OF_REGISTERS);
+#endif
+       CHECK_RETURN_OK;
+}
+
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_float_register_index(sljit_si reg)
+{
+       SLJIT_UNUSED_ARG(reg);
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+       CHECK_ARGUMENT(reg > 0 && reg <= SLJIT_NUMBER_OF_FLOAT_REGISTERS);
+#endif
+       CHECK_RETURN_OK;
+}
+
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_custom(struct sljit_compiler *compiler,
+       void *instruction, sljit_si size)
+{
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+       int i;
+#endif
+
+       SLJIT_UNUSED_ARG(compiler);
+
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+       CHECK_ARGUMENT(instruction);
+#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
+       CHECK_ARGUMENT(size > 0 && size < 16);
+#elif (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2)
+       CHECK_ARGUMENT((size == 2 && (((sljit_sw)instruction) & 0x1) == 0)
+               || (size == 4 && (((sljit_sw)instruction) & 0x3) == 0));
+#else
+       CHECK_ARGUMENT(size == 4 && (((sljit_sw)instruction) & 0x3) == 0);
+#endif
+
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+       if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+               fprintf(compiler->verbose, "  op_custom");
+               for (i = 0; i < size; i++)
+                       fprintf(compiler->verbose, " 0x%x", ((sljit_ub*)instruction)[i]);
+               fprintf(compiler->verbose, "\n");
+       }
+#endif
+       CHECK_RETURN_OK;
+}
+
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       if (SLJIT_UNLIKELY(compiler->skip_checks)) {
+               compiler->skip_checks = 0;
+               CHECK_RETURN_OK;
+       }
+
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+       CHECK_ARGUMENT(sljit_is_fpu_available());
+       CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_DMOV && GET_OPCODE(op) <= SLJIT_DABS);
+       FUNCTION_CHECK_FOP();
+       FUNCTION_FCHECK(src, srcw);
+       FUNCTION_FCHECK(dst, dstw);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+       if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+               if (GET_OPCODE(op) == SLJIT_CONVD_FROMS)
+                       fprintf(compiler->verbose, "  %s%s ", fop1_names[SLJIT_CONVD_FROMS - SLJIT_FOP1_BASE],
+                               (op & SLJIT_SINGLE_OP) ? "s.fromd" : "d.froms");
+               else
+                       fprintf(compiler->verbose, "  %s%s ", (op & SLJIT_SINGLE_OP) ? "s" : "d",
+                               fop1_names[GET_OPCODE(op) - SLJIT_FOP1_BASE]);
+
+               sljit_verbose_fparam(compiler, dst, dstw);
+               fprintf(compiler->verbose, ", ");
+               sljit_verbose_fparam(compiler, src, srcw);
+               fprintf(compiler->verbose, "\n");
+       }
+#endif
+       CHECK_RETURN_OK;
+}
+
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       if (SLJIT_UNLIKELY(compiler->skip_checks)) {
+               compiler->skip_checks = 0;
+               CHECK_RETURN_OK;
+       }
+
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+       CHECK_ARGUMENT(sljit_is_fpu_available());
+       CHECK_ARGUMENT(GET_OPCODE(op) == SLJIT_DCMP);
+       FUNCTION_CHECK_FOP();
+       FUNCTION_FCHECK(src1, src1w);
+       FUNCTION_FCHECK(src2, src2w);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+       if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+               fprintf(compiler->verbose, "  %s%s%s%s ", (op & SLJIT_SINGLE_OP) ? "s" : "d", fop1_names[SLJIT_DCMP - SLJIT_FOP1_BASE],
+                       (op & SLJIT_SET_E) ? ".e" : "", (op & SLJIT_SET_S) ? ".s" : "");
+               sljit_verbose_fparam(compiler, src1, src1w);
+               fprintf(compiler->verbose, ", ");
+               sljit_verbose_fparam(compiler, src2, src2w);
+               fprintf(compiler->verbose, "\n");
+       }
+#endif
+       CHECK_RETURN_OK;
+}
+
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       if (SLJIT_UNLIKELY(compiler->skip_checks)) {
+               compiler->skip_checks = 0;
+               CHECK_RETURN_OK;
+       }
+
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+       CHECK_ARGUMENT(sljit_is_fpu_available());
+       CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_CONVW_FROMD && GET_OPCODE(op) <= SLJIT_CONVI_FROMD);
+       FUNCTION_CHECK_FOP();
+       FUNCTION_FCHECK(src, srcw);
+       FUNCTION_CHECK_DST(dst, dstw);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+       if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+               fprintf(compiler->verbose, "  %s%s.from%s ", fop1_names[GET_OPCODE(op) - SLJIT_FOP1_BASE],
+                       (GET_OPCODE(op) == SLJIT_CONVI_FROMD) ? "i" : "w",
+                       (op & SLJIT_SINGLE_OP) ? "s" : "d");
+               sljit_verbose_param(compiler, dst, dstw);
+               fprintf(compiler->verbose, ", ");
+               sljit_verbose_fparam(compiler, src, srcw);
+               fprintf(compiler->verbose, "\n");
+       }
+#endif
+       CHECK_RETURN_OK;
+}
+
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       if (SLJIT_UNLIKELY(compiler->skip_checks)) {
+               compiler->skip_checks = 0;
+               CHECK_RETURN_OK;
+       }
+
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+       CHECK_ARGUMENT(sljit_is_fpu_available());
+       CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_CONVD_FROMW && GET_OPCODE(op) <= SLJIT_CONVD_FROMI);
+       FUNCTION_CHECK_FOP();
+       FUNCTION_CHECK_SRC(src, srcw);
+       FUNCTION_FCHECK(dst, dstw);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+       if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+               fprintf(compiler->verbose, "  %s%s.from%s ", fop1_names[GET_OPCODE(op) - SLJIT_FOP1_BASE],
+                       (op & SLJIT_SINGLE_OP) ? "s" : "d",
+                       (GET_OPCODE(op) == SLJIT_CONVD_FROMI) ? "i" : "w");
+               sljit_verbose_fparam(compiler, dst, dstw);
+               fprintf(compiler->verbose, ", ");
+               sljit_verbose_param(compiler, src, srcw);
+               fprintf(compiler->verbose, "\n");
+       }
+#endif
+       CHECK_RETURN_OK;
+}
+
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+       CHECK_ARGUMENT(sljit_is_fpu_available());
+       CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_DADD && GET_OPCODE(op) <= SLJIT_DDIV);
+       FUNCTION_CHECK_FOP();
+       FUNCTION_FCHECK(src1, src1w);
+       FUNCTION_FCHECK(src2, src2w);
+       FUNCTION_FCHECK(dst, dstw);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+       if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+               fprintf(compiler->verbose, "  %s%s ", (op & SLJIT_SINGLE_OP) ? "s" : "d", fop2_names[GET_OPCODE(op) - SLJIT_FOP2_BASE]);
+               sljit_verbose_fparam(compiler, dst, dstw);
+               fprintf(compiler->verbose, ", ");
+               sljit_verbose_fparam(compiler, src1, src1w);
+               fprintf(compiler->verbose, ", ");
+               sljit_verbose_fparam(compiler, src2, src2w);
+               fprintf(compiler->verbose, "\n");
+       }
+#endif
+       CHECK_RETURN_OK;
+}
+
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_label(struct sljit_compiler *compiler)
+{
+       SLJIT_UNUSED_ARG(compiler);
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+       if (SLJIT_UNLIKELY(!!compiler->verbose))
+               fprintf(compiler->verbose, "label:\n");
+#endif
+       CHECK_RETURN_OK;
+}
+
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type)
+{
+       if (SLJIT_UNLIKELY(compiler->skip_checks)) {
+               compiler->skip_checks = 0;
+               CHECK_RETURN_OK;
+       }
+
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+       CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_INT_OP)));
+       CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_CALL3);
+       CHECK_ARGUMENT((type & 0xff) < SLJIT_JUMP || !(type & SLJIT_INT_OP));
+       CHECK_ARGUMENT((type & 0xff) <= SLJIT_CALL0 || ((type & 0xff) - SLJIT_CALL0) <= compiler->scratches);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+       if (SLJIT_UNLIKELY(!!compiler->verbose))
+               fprintf(compiler->verbose, "  jump%s.%s%s\n", !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r",
+                       JUMP_PREFIX(type), jump_names[type & 0xff]);
+#endif
+       CHECK_RETURN_OK;
+}
+
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_cmp(struct sljit_compiler *compiler, sljit_si type,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+       CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_INT_OP)));
+       CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_SIG_LESS_EQUAL);
+       FUNCTION_CHECK_SRC(src1, src1w);
+       FUNCTION_CHECK_SRC(src2, src2w);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+       if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+               fprintf(compiler->verbose, "  cmp%s.%s%s ", !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r",
+                       (type & SLJIT_INT_OP) ? "i_" : "", jump_names[type & 0xff]);
+               sljit_verbose_param(compiler, src1, src1w);
+               fprintf(compiler->verbose, ", ");
+               sljit_verbose_param(compiler, src2, src2w);
+               fprintf(compiler->verbose, "\n");
+       }
+#endif
+       CHECK_RETURN_OK;
+}
+
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_si type,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+       CHECK_ARGUMENT(sljit_is_fpu_available());
+       CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_SINGLE_OP)));
+       CHECK_ARGUMENT((type & 0xff) >= SLJIT_D_EQUAL && (type & 0xff) <= SLJIT_D_ORDERED);
+       FUNCTION_FCHECK(src1, src1w);
+       FUNCTION_FCHECK(src2, src2w);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+       if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+               fprintf(compiler->verbose, "  fcmp%s.%s%s ", !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r",
+                       (type & SLJIT_SINGLE_OP) ? "s_" : "d_", jump_names[type & 0xff]);
+               sljit_verbose_fparam(compiler, src1, src1w);
+               fprintf(compiler->verbose, ", ");
+               sljit_verbose_fparam(compiler, src2, src2w);
+               fprintf(compiler->verbose, "\n");
+       }
+#endif
+       CHECK_RETURN_OK;
+}
+
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw)
+{
+       if (SLJIT_UNLIKELY(compiler->skip_checks)) {
+               compiler->skip_checks = 0;
+               CHECK_RETURN_OK;
+       }
+
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+       CHECK_ARGUMENT(type >= SLJIT_JUMP && type <= SLJIT_CALL3);
+       CHECK_ARGUMENT(type <= SLJIT_CALL0 || (type - SLJIT_CALL0) <= compiler->scratches);
+       FUNCTION_CHECK_SRC(src, srcw);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+       if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+               fprintf(compiler->verbose, "  ijump.%s ", jump_names[type]);
+               sljit_verbose_param(compiler, src, srcw);
+               fprintf(compiler->verbose, "\n");
+       }
+#endif
+       CHECK_RETURN_OK;
+}
+
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw,
+       sljit_si type)
+{
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+       CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_INT_OP)));
+       CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_D_ORDERED);
+       CHECK_ARGUMENT(op == SLJIT_MOV || GET_OPCODE(op) == SLJIT_MOV_UI || GET_OPCODE(op) == SLJIT_MOV_SI
+               || (GET_OPCODE(op) >= SLJIT_AND && GET_OPCODE(op) <= SLJIT_XOR));
+       CHECK_ARGUMENT((op & (SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C)) == 0);
+       CHECK_ARGUMENT((op & (SLJIT_SET_E | SLJIT_KEEP_FLAGS)) != (SLJIT_SET_E | SLJIT_KEEP_FLAGS));
+       if (GET_OPCODE(op) < SLJIT_ADD) {
+               CHECK_ARGUMENT(src == SLJIT_UNUSED && srcw == 0);
+       } else {
+               CHECK_ARGUMENT(src == dst && srcw == dstw);
+       }
+       FUNCTION_CHECK_DST(dst, dstw);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+       if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+               fprintf(compiler->verbose, "  flags.%s%s%s%s ", !(op & SLJIT_INT_OP) ? "" : "i",
+                       GET_OPCODE(op) >= SLJIT_OP2_BASE ? op2_names[GET_OPCODE(op) - SLJIT_OP2_BASE] : op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE],
+                       !(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_KEEP_FLAGS) ? "" : ".k");
+               sljit_verbose_param(compiler, dst, dstw);
+               if (src != SLJIT_UNUSED) {
+                       fprintf(compiler->verbose, ", ");
+                       sljit_verbose_param(compiler, src, srcw);
+               }
+               fprintf(compiler->verbose, ", %s%s\n", JUMP_PREFIX(type), jump_names[type & 0xff]);
+       }
+#endif
+       CHECK_RETURN_OK;
+}
+
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset)
+{
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+       FUNCTION_CHECK_DST(dst, dstw);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+       if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+               fprintf(compiler->verbose, "  local_base ");
+               sljit_verbose_param(compiler, dst, dstw);
+               fprintf(compiler->verbose, ", #%" SLJIT_PRINT_D "d\n", offset);
+       }
+#endif
+       CHECK_RETURN_OK;
+}
+
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value)
+{
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+       FUNCTION_CHECK_DST(dst, dstw);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+       if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+               fprintf(compiler->verbose, "  const ");
+               sljit_verbose_param(compiler, dst, dstw);
+               fprintf(compiler->verbose, ", #%" SLJIT_PRINT_D "d\n", init_value);
+       }
+#endif
+       CHECK_RETURN_OK;
+}
+
+#endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_VERBOSE */
+
+#define SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw) \
+       SLJIT_COMPILE_ASSERT(!(SLJIT_CONVW_FROMD & 0x1) && !(SLJIT_CONVD_FROMW & 0x1), \
+               invalid_float_opcodes); \
+       if (GET_OPCODE(op) >= SLJIT_CONVW_FROMD && GET_OPCODE(op) <= SLJIT_DCMP) { \
+               if (GET_OPCODE(op) == SLJIT_DCMP) { \
+                       CHECK(check_sljit_emit_fop1_cmp(compiler, op, dst, dstw, src, srcw)); \
+                       ADJUST_LOCAL_OFFSET(dst, dstw); \
+                       ADJUST_LOCAL_OFFSET(src, srcw); \
+                       return sljit_emit_fop1_cmp(compiler, op, dst, dstw, src, srcw); \
+               } \
+               if ((GET_OPCODE(op) | 0x1) == SLJIT_CONVI_FROMD) { \
+                       CHECK(check_sljit_emit_fop1_convw_fromd(compiler, op, dst, dstw, src, srcw)); \
+                       ADJUST_LOCAL_OFFSET(dst, dstw); \
+                       ADJUST_LOCAL_OFFSET(src, srcw); \
+                       return sljit_emit_fop1_convw_fromd(compiler, op, dst, dstw, src, srcw); \
+               } \
+               CHECK(check_sljit_emit_fop1_convd_fromw(compiler, op, dst, dstw, src, srcw)); \
+               ADJUST_LOCAL_OFFSET(dst, dstw); \
+               ADJUST_LOCAL_OFFSET(src, srcw); \
+               return sljit_emit_fop1_convd_fromw(compiler, op, dst, dstw, src, srcw); \
+       } \
+       CHECK(check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw)); \
+       ADJUST_LOCAL_OFFSET(dst, dstw); \
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+static SLJIT_INLINE sljit_si emit_mov_before_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
+{
+       /* Return if don't need to do anything. */
+       if (op == SLJIT_UNUSED)
+               return SLJIT_SUCCESS;
+
+#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE)
+       /* At the moment the pointer size is always equal to sljit_sw. May be changed in the future. */
+       if (src == SLJIT_RETURN_REG && (op == SLJIT_MOV || op == SLJIT_MOV_P))
+               return SLJIT_SUCCESS;
+#else
+       if (src == SLJIT_RETURN_REG && (op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI || op == SLJIT_MOV_P))
+               return SLJIT_SUCCESS;
+#endif
+
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \
+               || (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+       compiler->skip_checks = 1;
+#endif
+       return sljit_emit_op1(compiler, op, SLJIT_RETURN_REG, 0, src, srcw);
+}
+
+/* CPU description section */
+
+#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE)
+#define SLJIT_CPUINFO_PART1 " 32bit ("
+#elif (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE)
+#define SLJIT_CPUINFO_PART1 " 64bit ("
+#else
+#error "Internal error: CPU type info missing"
+#endif
+
+#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
+#define SLJIT_CPUINFO_PART2 "little endian + "
+#elif (defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN)
+#define SLJIT_CPUINFO_PART2 "big endian + "
+#else
+#error "Internal error: CPU type info missing"
+#endif
+
+#if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED)
+#define SLJIT_CPUINFO_PART3 "unaligned)"
+#else
+#define SLJIT_CPUINFO_PART3 "aligned)"
+#endif
+
+#define SLJIT_CPUINFO SLJIT_CPUINFO_PART1 SLJIT_CPUINFO_PART2 SLJIT_CPUINFO_PART3
+
+#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
+#      include "sljitNativeX86_common.c"
+#elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+#      include "sljitNativeARM_32.c"
+#elif (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
+#      include "sljitNativeARM_32.c"
+#elif (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2)
+#      include "sljitNativeARM_T2_32.c"
+#elif (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64)
+#      include "sljitNativeARM_64.c"
+#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC)
+#      include "sljitNativePPC_common.c"
+#elif (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS)
+#      include "sljitNativeMIPS_common.c"
+#elif (defined SLJIT_CONFIG_SPARC && SLJIT_CONFIG_SPARC)
+#      include "sljitNativeSPARC_common.c"
+#elif (defined SLJIT_CONFIG_TILEGX && SLJIT_CONFIG_TILEGX)
+#      include "sljitNativeTILEGX_64.c"
+#endif
+
+#if !(defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS)
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_si type,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       /* Default compare for most architectures. */
+       sljit_si flags, tmp_src, condition;
+       sljit_sw tmp_srcw;
+
+       CHECK_ERROR_PTR();
+       CHECK_PTR(check_sljit_emit_cmp(compiler, type, src1, src1w, src2, src2w));
+
+       condition = type & 0xff;
+#if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64)
+       if ((condition == SLJIT_EQUAL || condition == SLJIT_NOT_EQUAL)) {
+               if ((src1 & SLJIT_IMM) && !src1w) {
+                       src1 = src2;
+                       src1w = src2w;
+                       src2 = SLJIT_IMM;
+                       src2w = 0;
+               }
+               if ((src2 & SLJIT_IMM) && !src2w)
+                       return emit_cmp_to0(compiler, type, src1, src1w);
+       }
+#endif
+
+       if (SLJIT_UNLIKELY((src1 & SLJIT_IMM) && !(src2 & SLJIT_IMM))) {
+               /* Immediate is prefered as second argument by most architectures. */
+               switch (condition) {
+               case SLJIT_LESS:
+                       condition = SLJIT_GREATER;
+                       break;
+               case SLJIT_GREATER_EQUAL:
+                       condition = SLJIT_LESS_EQUAL;
+                       break;
+               case SLJIT_GREATER:
+                       condition = SLJIT_LESS;
+                       break;
+               case SLJIT_LESS_EQUAL:
+                       condition = SLJIT_GREATER_EQUAL;
+                       break;
+               case SLJIT_SIG_LESS:
+                       condition = SLJIT_SIG_GREATER;
+                       break;
+               case SLJIT_SIG_GREATER_EQUAL:
+                       condition = SLJIT_SIG_LESS_EQUAL;
+                       break;
+               case SLJIT_SIG_GREATER:
+                       condition = SLJIT_SIG_LESS;
+                       break;
+               case SLJIT_SIG_LESS_EQUAL:
+                       condition = SLJIT_SIG_GREATER_EQUAL;
+                       break;
+               }
+               type = condition | (type & (SLJIT_INT_OP | SLJIT_REWRITABLE_JUMP));
+               tmp_src = src1;
+               src1 = src2;
+               src2 = tmp_src;
+               tmp_srcw = src1w;
+               src1w = src2w;
+               src2w = tmp_srcw;
+       }
+
+       if (condition <= SLJIT_NOT_ZERO)
+               flags = SLJIT_SET_E;
+       else if (condition <= SLJIT_LESS_EQUAL)
+               flags = SLJIT_SET_U;
+       else
+               flags = SLJIT_SET_S;
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+               || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+       compiler->skip_checks = 1;
+#endif
+       PTR_FAIL_IF(sljit_emit_op2(compiler, SLJIT_SUB | flags | (type & SLJIT_INT_OP),
+               SLJIT_UNUSED, 0, src1, src1w, src2, src2w));
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+               || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+       compiler->skip_checks = 1;
+#endif
+       return sljit_emit_jump(compiler, condition | (type & SLJIT_REWRITABLE_JUMP));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_si type,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       sljit_si flags, condition;
+
+       CHECK_ERROR_PTR();
+       CHECK_PTR(check_sljit_emit_fcmp(compiler, type, src1, src1w, src2, src2w));
+
+       condition = type & 0xff;
+       flags = (condition <= SLJIT_D_NOT_EQUAL) ? SLJIT_SET_E : SLJIT_SET_S;
+       if (type & SLJIT_SINGLE_OP)
+               flags |= SLJIT_SINGLE_OP;
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+               || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+       compiler->skip_checks = 1;
+#endif
+       sljit_emit_fop1(compiler, SLJIT_DCMP | flags, src1, src1w, src2, src2w);
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+               || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+       compiler->skip_checks = 1;
+#endif
+       return sljit_emit_jump(compiler, condition | (type & SLJIT_REWRITABLE_JUMP));
+}
+
+#endif
+
+#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset)
+{
+       CHECK_ERROR();
+       CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset));
+
+       ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset);
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+               || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+       compiler->skip_checks = 1;
+#endif
+       if (offset != 0)
+               return sljit_emit_op2(compiler, SLJIT_ADD | SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
+       return sljit_emit_op1(compiler, SLJIT_MOV, dst, dstw, SLJIT_SP, 0);
+}
+
+#endif
+
+#else /* SLJIT_CONFIG_UNSUPPORTED */
+
+/* Empty function bodies for those machines, which are not (yet) supported. */
+
+SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
+{
+       return "unsupported";
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void)
+{
+       SLJIT_ASSERT_STOP();
+       return NULL;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_free_compiler(struct sljit_compiler *compiler)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_ASSERT_STOP();
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void* sljit_alloc_memory(struct sljit_compiler *compiler, sljit_si size)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(size);
+       SLJIT_ASSERT_STOP();
+       return NULL;
+}
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_compiler_verbose(struct sljit_compiler *compiler, FILE* verbose)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(verbose);
+       SLJIT_ASSERT_STOP();
+}
+#endif
+
+SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_ASSERT_STOP();
+       return NULL;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code)
+{
+       SLJIT_UNUSED_ARG(code);
+       SLJIT_ASSERT_STOP();
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler,
+       sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds,
+       sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(options);
+       SLJIT_UNUSED_ARG(args);
+       SLJIT_UNUSED_ARG(scratches);
+       SLJIT_UNUSED_ARG(saveds);
+       SLJIT_UNUSED_ARG(fscratches);
+       SLJIT_UNUSED_ARG(fsaveds);
+       SLJIT_UNUSED_ARG(local_size);
+       SLJIT_ASSERT_STOP();
+       return SLJIT_ERR_UNSUPPORTED;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compiler,
+       sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds,
+       sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(options);
+       SLJIT_UNUSED_ARG(args);
+       SLJIT_UNUSED_ARG(scratches);
+       SLJIT_UNUSED_ARG(saveds);
+       SLJIT_UNUSED_ARG(fscratches);
+       SLJIT_UNUSED_ARG(fsaveds);
+       SLJIT_UNUSED_ARG(local_size);
+       SLJIT_ASSERT_STOP();
+       return SLJIT_ERR_UNSUPPORTED;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(op);
+       SLJIT_UNUSED_ARG(src);
+       SLJIT_UNUSED_ARG(srcw);
+       SLJIT_ASSERT_STOP();
+       return SLJIT_ERR_UNSUPPORTED;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(dst);
+       SLJIT_UNUSED_ARG(dstw);
+       SLJIT_ASSERT_STOP();
+       return SLJIT_ERR_UNSUPPORTED;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(src);
+       SLJIT_UNUSED_ARG(srcw);
+       SLJIT_ASSERT_STOP();
+       return SLJIT_ERR_UNSUPPORTED;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(op);
+       SLJIT_ASSERT_STOP();
+       return SLJIT_ERR_UNSUPPORTED;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(op);
+       SLJIT_UNUSED_ARG(dst);
+       SLJIT_UNUSED_ARG(dstw);
+       SLJIT_UNUSED_ARG(src);
+       SLJIT_UNUSED_ARG(srcw);
+       SLJIT_ASSERT_STOP();
+       return SLJIT_ERR_UNSUPPORTED;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(op);
+       SLJIT_UNUSED_ARG(dst);
+       SLJIT_UNUSED_ARG(dstw);
+       SLJIT_UNUSED_ARG(src1);
+       SLJIT_UNUSED_ARG(src1w);
+       SLJIT_UNUSED_ARG(src2);
+       SLJIT_UNUSED_ARG(src2w);
+       SLJIT_ASSERT_STOP();
+       return SLJIT_ERR_UNSUPPORTED;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
+{
+       SLJIT_ASSERT_STOP();
+       return reg;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
+       void *instruction, sljit_si size)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(instruction);
+       SLJIT_UNUSED_ARG(size);
+       SLJIT_ASSERT_STOP();
+       return SLJIT_ERR_UNSUPPORTED;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
+{
+       SLJIT_ASSERT_STOP();
+       return 0;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(op);
+       SLJIT_UNUSED_ARG(dst);
+       SLJIT_UNUSED_ARG(dstw);
+       SLJIT_UNUSED_ARG(src);
+       SLJIT_UNUSED_ARG(srcw);
+       SLJIT_ASSERT_STOP();
+       return SLJIT_ERR_UNSUPPORTED;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(op);
+       SLJIT_UNUSED_ARG(dst);
+       SLJIT_UNUSED_ARG(dstw);
+       SLJIT_UNUSED_ARG(src1);
+       SLJIT_UNUSED_ARG(src1w);
+       SLJIT_UNUSED_ARG(src2);
+       SLJIT_UNUSED_ARG(src2w);
+       SLJIT_ASSERT_STOP();
+       return SLJIT_ERR_UNSUPPORTED;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_ASSERT_STOP();
+       return NULL;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(type);
+       SLJIT_ASSERT_STOP();
+       return NULL;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_si type,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(type);
+       SLJIT_UNUSED_ARG(src1);
+       SLJIT_UNUSED_ARG(src1w);
+       SLJIT_UNUSED_ARG(src2);
+       SLJIT_UNUSED_ARG(src2w);
+       SLJIT_ASSERT_STOP();
+       return NULL;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_si type,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(type);
+       SLJIT_UNUSED_ARG(src1);
+       SLJIT_UNUSED_ARG(src1w);
+       SLJIT_UNUSED_ARG(src2);
+       SLJIT_UNUSED_ARG(src2w);
+       SLJIT_ASSERT_STOP();
+       return NULL;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_label(struct sljit_jump *jump, struct sljit_label* label)
+{
+       SLJIT_UNUSED_ARG(jump);
+       SLJIT_UNUSED_ARG(label);
+       SLJIT_ASSERT_STOP();
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw target)
+{
+       SLJIT_UNUSED_ARG(jump);
+       SLJIT_UNUSED_ARG(target);
+       SLJIT_ASSERT_STOP();
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(type);
+       SLJIT_UNUSED_ARG(src);
+       SLJIT_UNUSED_ARG(srcw);
+       SLJIT_ASSERT_STOP();
+       return SLJIT_ERR_UNSUPPORTED;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw,
+       sljit_si type)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(op);
+       SLJIT_UNUSED_ARG(dst);
+       SLJIT_UNUSED_ARG(dstw);
+       SLJIT_UNUSED_ARG(src);
+       SLJIT_UNUSED_ARG(srcw);
+       SLJIT_UNUSED_ARG(type);
+       SLJIT_ASSERT_STOP();
+       return SLJIT_ERR_UNSUPPORTED;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(dst);
+       SLJIT_UNUSED_ARG(dstw);
+       SLJIT_UNUSED_ARG(offset);
+       SLJIT_ASSERT_STOP();
+       return SLJIT_ERR_UNSUPPORTED;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw initval)
+{
+       SLJIT_UNUSED_ARG(compiler);
+       SLJIT_UNUSED_ARG(dst);
+       SLJIT_UNUSED_ARG(dstw);
+       SLJIT_UNUSED_ARG(initval);
+       SLJIT_ASSERT_STOP();
+       return NULL;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
+{
+       SLJIT_UNUSED_ARG(addr);
+       SLJIT_UNUSED_ARG(new_addr);
+       SLJIT_ASSERT_STOP();
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
+{
+       SLJIT_UNUSED_ARG(addr);
+       SLJIT_UNUSED_ARG(new_constant);
+       SLJIT_ASSERT_STOP();
+}
+
+#endif
diff --git a/ext/pcre/pcrelib/sljit/sljitLir.h b/ext/pcre/pcrelib/sljit/sljitLir.h
new file mode 100644 (file)
index 0000000..24c0f60
--- /dev/null
@@ -0,0 +1,1199 @@
+/*
+ *    Stack-less Just-In-Time compiler
+ *
+ *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice, this list of
+ *      conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *      of conditions and the following disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _SLJIT_LIR_H_
+#define _SLJIT_LIR_H_
+
+/*
+   ------------------------------------------------------------------------
+    Stack-Less JIT compiler for multiple architectures (x86, ARM, PowerPC)
+   ------------------------------------------------------------------------
+
+   Short description
+    Advantages:
+      - The execution can be continued from any LIR instruction. In other
+        words, it is possible to jump to any label from anywhere, even from
+        a code fragment, which is compiled later, if both compiled code
+        shares the same context. See sljit_emit_enter for more details
+      - Supports self modifying code: target of (conditional) jump and call
+        instructions and some constant values can be dynamically modified
+        during runtime
+        - although it is not suggested to do it frequently
+        - can be used for inline caching: save an important value once
+          in the instruction stream
+        - since this feature limits the optimization possibilities, a
+          special flag must be passed at compile time when these
+          instructions are emitted
+      - A fixed stack space can be allocated for local variables
+      - The compiler is thread-safe
+      - The compiler is highly configurable through preprocessor macros.
+        You can disable unneeded features (multithreading in single
+        threaded applications), and you can use your own system functions
+        (including memory allocators). See sljitConfig.h
+    Disadvantages:
+      - No automatic register allocation, and temporary results are
+        not stored on the stack. (hence the name comes)
+    In practice:
+      - This approach is very effective for interpreters
+        - One of the saved registers typically points to a stack interface
+        - It can jump to any exception handler anytime (even if it belongs
+          to another function)
+        - Hot paths can be modified during runtime reflecting the changes
+          of the fastest execution path of the dynamic language
+        - SLJIT supports complex memory addressing modes
+        - mainly position and context independent code (except some cases)
+
+    For valgrind users:
+      - pass --smc-check=all argument to valgrind, since JIT is a "self-modifying code"
+*/
+
+#if !(defined SLJIT_NO_DEFAULT_CONFIG && SLJIT_NO_DEFAULT_CONFIG)
+#include "sljitConfig.h"
+#endif
+
+/* The following header file defines useful macros for fine tuning
+sljit based code generators. They are listed in the beginning
+of sljitConfigInternal.h */
+
+#include "sljitConfigInternal.h"
+
+/* --------------------------------------------------------------------- */
+/*  Error codes                                                          */
+/* --------------------------------------------------------------------- */
+
+/* Indicates no error. */
+#define SLJIT_SUCCESS                  0
+/* After the call of sljit_generate_code(), the error code of the compiler
+   is set to this value to avoid future sljit calls (in debug mode at least).
+   The complier should be freed after sljit_generate_code(). */
+#define SLJIT_ERR_COMPILED             1
+/* Cannot allocate non executable memory. */
+#define SLJIT_ERR_ALLOC_FAILED         2
+/* Cannot allocate executable memory.
+   Only for sljit_generate_code() */
+#define SLJIT_ERR_EX_ALLOC_FAILED      3
+/* Return value for SLJIT_CONFIG_UNSUPPORTED placeholder architecture. */
+#define SLJIT_ERR_UNSUPPORTED          4
+/* An ivalid argument is passed to any SLJIT function. */
+#define SLJIT_ERR_BAD_ARGUMENT         5
+
+/* --------------------------------------------------------------------- */
+/*  Registers                                                            */
+/* --------------------------------------------------------------------- */
+
+/*
+  Scratch (R) registers: registers whose may not preserve their values
+  across function calls.
+
+  Saved (S) registers: registers whose preserve their values across
+  function calls.
+
+  The scratch and saved register sets are overlap. The last scratch register
+  is the first saved register, the one before the last is the second saved
+  register, and so on.
+
+  If an architecture provides two scratch and three saved registers,
+  its scratch and saved register sets are the following:
+
+     R0   |  [S4]  |   R0 and S4 represent the same physical register
+     R1   |  [S3]  |   R1 and S3 represent the same physical register
+    [R2]  |   S2   |   R2 and S2 represent the same physical register
+    [R3]  |   S1   |   R3 and S1 represent the same physical register
+    [R4]  |   S0   |   R4 and S0 represent the same physical register
+
+  Note: SLJIT_NUMBER_OF_SCRATCH_REGISTERS would be 2 and
+        SLJIT_NUMBER_OF_SAVED_REGISTERS would be 3 for this architecture.
+
+  Note: On all supported architectures SLJIT_NUMBER_OF_REGISTERS >= 10
+        and SLJIT_NUMBER_OF_SAVED_REGISTERS >= 5. However, 4 registers
+        are virtual on x86-32. See below.
+
+  The purpose of this definition is convenience. Although a register
+  is either scratch register or saved register, SLJIT allows accessing
+  them from the other set. For example, four registers can be used as
+  scratch registers and the fifth one as saved register on the architecture
+  above. Of course the last two scratch registers (R2 and R3) from this
+  four will be saved on the stack, because they are defined as saved
+  registers in the application binary interface. Still R2 and R3 can be
+  used for referencing to these registers instead of S2 and S1, which
+  makes easier to write platform independent code. Scratch registers
+  can be saved registers in a similar way, but these extra saved
+  registers will not be preserved across function calls! Hence the
+  application must save them on those platforms, where the number of
+  saved registers is too low. This can be done by copy them onto
+  the stack and restore them after a function call.
+
+  Note: To emphasize that registers assigned to R2-R4 are saved
+        registers, they are enclosed by square brackets. S3-S4
+        are marked in a similar way.
+
+  Note: sljit_emit_enter and sljit_set_context defines whether a register
+        is S or R register. E.g: when 3 scratches and 1 saved is mapped
+        by sljit_emit_enter, the allowed register set will be: R0-R2 and
+        S0. Although S2 is mapped to the same position as R2, it does not
+        available in the current configuration. Furthermore the R3 (S1)
+        register does not available as well.
+*/
+
+/* When SLJIT_UNUSED is specified as destination, the result is discarded. */
+#define SLJIT_UNUSED           0
+
+/* Scratch registers. */
+#define SLJIT_R0       1
+#define SLJIT_R1       2
+#define SLJIT_R2       3
+/* Note: on x86-32, R3 - R6 (same as S3 - S6) are emulated (they
+   are allocated on the stack). These registers are called virtual
+   and cannot be used for memory addressing (cannot be part of
+   any SLJIT_MEM1, SLJIT_MEM2 construct). There is no such
+   limitation on other CPUs. See sljit_get_register_index(). */
+#define SLJIT_R3       4
+#define SLJIT_R4       5
+#define SLJIT_R5       6
+#define SLJIT_R6       7
+#define SLJIT_R7       8
+#define SLJIT_R8       9
+#define SLJIT_R9       10
+/* All R registers provided by the architecture can be accessed by SLJIT_R(i)
+   The i parameter must be >= 0 and < SLJIT_NUMBER_OF_REGISTERS. */
+#define SLJIT_R(i)     (1 + (i))
+
+/* Saved registers. */
+#define SLJIT_S0       (SLJIT_NUMBER_OF_REGISTERS)
+#define SLJIT_S1       (SLJIT_NUMBER_OF_REGISTERS - 1)
+#define SLJIT_S2       (SLJIT_NUMBER_OF_REGISTERS - 2)
+/* Note: on x86-32, S3 - S6 (same as R3 - R6) are emulated (they
+   are allocated on the stack). These registers are called virtual
+   and cannot be used for memory addressing (cannot be part of
+   any SLJIT_MEM1, SLJIT_MEM2 construct). There is no such
+   limitation on other CPUs. See sljit_get_register_index(). */
+#define SLJIT_S3       (SLJIT_NUMBER_OF_REGISTERS - 3)
+#define SLJIT_S4       (SLJIT_NUMBER_OF_REGISTERS - 4)
+#define SLJIT_S5       (SLJIT_NUMBER_OF_REGISTERS - 5)
+#define SLJIT_S6       (SLJIT_NUMBER_OF_REGISTERS - 6)
+#define SLJIT_S7       (SLJIT_NUMBER_OF_REGISTERS - 7)
+#define SLJIT_S8       (SLJIT_NUMBER_OF_REGISTERS - 8)
+#define SLJIT_S9       (SLJIT_NUMBER_OF_REGISTERS - 9)
+/* All S registers provided by the architecture can be accessed by SLJIT_S(i)
+   The i parameter must be >= 0 and < SLJIT_NUMBER_OF_SAVED_REGISTERS. */
+#define SLJIT_S(i)     (SLJIT_NUMBER_OF_REGISTERS - (i))
+
+/* Registers >= SLJIT_FIRST_SAVED_REG are saved registers. */
+#define SLJIT_FIRST_SAVED_REG (SLJIT_S0 - SLJIT_NUMBER_OF_SAVED_REGISTERS + 1)
+
+/* The SLJIT_SP provides direct access to the linear stack space allocated by
+   sljit_emit_enter. It can only be used in the following form: SLJIT_MEM1(SLJIT_SP).
+   The immediate offset is extended by the relative stack offset automatically.
+   The sljit_get_local_base can be used to obtain the absolute offset. */
+#define SLJIT_SP       (SLJIT_NUMBER_OF_REGISTERS + 1)
+
+/* Return with machine word. */
+
+#define SLJIT_RETURN_REG       SLJIT_R0
+
+/* x86 prefers specific registers for special purposes. In case of shift
+   by register it supports only SLJIT_R2 for shift argument
+   (which is the src2 argument of sljit_emit_op2). If another register is
+   used, sljit must exchange data between registers which cause a minor
+   slowdown. Other architectures has no such limitation. */
+
+#define SLJIT_PREF_SHIFT_REG   SLJIT_R2
+
+/* --------------------------------------------------------------------- */
+/*  Floating point registers                                             */
+/* --------------------------------------------------------------------- */
+
+/* Each floating point register can store a double or single precision
+   value. The FR and FS register sets are overlap in the same way as R
+   and S register sets. See above. */
+
+/* Note: SLJIT_UNUSED as destination is not valid for floating point
+   operations, since they cannot be used for setting flags. */
+
+/* Floating point scratch registers. */
+#define SLJIT_FR0      1
+#define SLJIT_FR1      2
+#define SLJIT_FR2      3
+#define SLJIT_FR3      4
+#define SLJIT_FR4      5
+#define SLJIT_FR5      6
+/* All FR registers provided by the architecture can be accessed by SLJIT_FR(i)
+   The i parameter must be >= 0 and < SLJIT_NUMBER_OF_FLOAT_REGISTERS. */
+#define SLJIT_FR(i)    (1 + (i))
+
+/* Floating point saved registers. */
+#define SLJIT_FS0      (SLJIT_NUMBER_OF_FLOAT_REGISTERS)
+#define SLJIT_FS1      (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 1)
+#define SLJIT_FS2      (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 2)
+#define SLJIT_FS3      (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 3)
+#define SLJIT_FS4      (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 4)
+#define SLJIT_FS5      (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 5)
+/* All S registers provided by the architecture can be accessed by SLJIT_FS(i)
+   The i parameter must be >= 0 and < SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS. */
+#define SLJIT_FS(i)    (SLJIT_NUMBER_OF_FLOAT_REGISTERS - (i))
+
+/* Float registers >= SLJIT_FIRST_SAVED_FLOAT_REG are saved registers. */
+#define SLJIT_FIRST_SAVED_FLOAT_REG (SLJIT_FS0 - SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS + 1)
+
+/* --------------------------------------------------------------------- */
+/*  Main structures and functions                                        */
+/* --------------------------------------------------------------------- */
+
+/*
+       The following structures are private, and can be changed in the
+       future. Keeping them here allows code inlining.
+*/
+
+struct sljit_memory_fragment {
+       struct sljit_memory_fragment *next;
+       sljit_uw used_size;
+       /* Must be aligned to sljit_sw. */
+       sljit_ub memory[1];
+};
+
+struct sljit_label {
+       struct sljit_label *next;
+       sljit_uw addr;
+       /* The maximum size difference. */
+       sljit_uw size;
+};
+
+struct sljit_jump {
+       struct sljit_jump *next;
+       sljit_uw addr;
+       sljit_sw flags;
+       union {
+               sljit_uw target;
+               struct sljit_label* label;
+       } u;
+};
+
+struct sljit_const {
+       struct sljit_const *next;
+       sljit_uw addr;
+};
+
+struct sljit_compiler {
+       sljit_si error;
+       sljit_si options;
+
+       struct sljit_label *labels;
+       struct sljit_jump *jumps;
+       struct sljit_const *consts;
+       struct sljit_label *last_label;
+       struct sljit_jump *last_jump;
+       struct sljit_const *last_const;
+
+       void *allocator_data;
+       struct sljit_memory_fragment *buf;
+       struct sljit_memory_fragment *abuf;
+
+       /* Used scratch registers. */
+       sljit_si scratches;
+       /* Used saved registers. */
+       sljit_si saveds;
+       /* Used float scratch registers. */
+       sljit_si fscratches;
+       /* Used float saved registers. */
+       sljit_si fsaveds;
+       /* Local stack size. */
+       sljit_si local_size;
+       /* Code size. */
+       sljit_uw size;
+       /* For statistical purposes. */
+       sljit_uw executable_size;
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+       sljit_si args;
+#endif
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+       sljit_si mode32;
+#endif
+
+#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
+       sljit_si flags_saved;
+#endif
+
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+       /* Constant pool handling. */
+       sljit_uw *cpool;
+       sljit_ub *cpool_unique;
+       sljit_uw cpool_diff;
+       sljit_uw cpool_fill;
+       /* Other members. */
+       /* Contains pointer, "ldr pc, [...]" pairs. */
+       sljit_uw patches;
+#endif
+
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
+       /* Temporary fields. */
+       sljit_uw shift_imm;
+       sljit_si cache_arg;
+       sljit_sw cache_argw;
+#endif
+
+#if (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2)
+       sljit_si cache_arg;
+       sljit_sw cache_argw;
+#endif
+
+#if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64)
+       sljit_si cache_arg;
+       sljit_sw cache_argw;
+#endif
+
+#if (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC)
+       sljit_sw imm;
+       sljit_si cache_arg;
+       sljit_sw cache_argw;
+#endif
+
+#if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS)
+       sljit_si delay_slot;
+       sljit_si cache_arg;
+       sljit_sw cache_argw;
+#endif
+
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+       sljit_si delay_slot;
+       sljit_si cache_arg;
+       sljit_sw cache_argw;
+#endif
+
+#if (defined SLJIT_CONFIG_TILEGX && SLJIT_CONFIG_TILEGX)
+       sljit_si cache_arg;
+       sljit_sw cache_argw;
+#endif
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+       FILE* verbose;
+#endif
+
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \
+               || (defined SLJIT_DEBUG && SLJIT_DEBUG)
+       /* Local size passed to the functions. */
+       sljit_si logical_local_size;
+#endif
+
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \
+               || (defined SLJIT_DEBUG && SLJIT_DEBUG) \
+               || (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+       sljit_si skip_checks;
+#endif
+};
+
+/* --------------------------------------------------------------------- */
+/*  Main functions                                                       */
+/* --------------------------------------------------------------------- */
+
+/* Creates an sljit compiler. The allocator_data is required by some
+   custom memory managers. This pointer is passed to SLJIT_MALLOC
+   and SLJIT_FREE macros. Most allocators (including the default
+   one) ignores this value, and it is recommended to pass NULL
+   as a dummy value for allocator_data.
+
+   Returns NULL if failed. */
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allocator_data);
+
+/* Frees everything except the compiled machine code. */
+SLJIT_API_FUNC_ATTRIBUTE void sljit_free_compiler(struct sljit_compiler *compiler);
+
+/* Returns the current error code. If an error is occurred, future sljit
+   calls which uses the same compiler argument returns early with the same
+   error code. Thus there is no need for checking the error after every
+   call, it is enough to do it before the code is compiled. Removing
+   these checks increases the performance of the compiling process. */
+static SLJIT_INLINE sljit_si sljit_get_compiler_error(struct sljit_compiler *compiler) { return compiler->error; }
+
+/* Sets the compiler error code to SLJIT_ERR_ALLOC_FAILED except
+   if an error was detected before. After the error code is set
+   the compiler behaves as if the allocation failure happened
+   during an sljit function call. This can greatly simplify error
+   checking, since only the compiler status needs to be checked
+   after the compilation. */
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_compiler_memory_error(struct sljit_compiler *compiler);
+
+/*
+   Allocate a small amount of memory. The size must be <= 64 bytes on 32 bit,
+   and <= 128 bytes on 64 bit architectures. The memory area is owned by the
+   compiler, and freed by sljit_free_compiler. The returned pointer is
+   sizeof(sljit_sw) aligned. Excellent for allocating small blocks during
+   the compiling, and no need to worry about freeing them. The size is
+   enough to contain at most 16 pointers. If the size is outside of the range,
+   the function will return with NULL. However, this return value does not
+   indicate that there is no more memory (does not set the current error code
+   of the compiler to out-of-memory status).
+*/
+SLJIT_API_FUNC_ATTRIBUTE void* sljit_alloc_memory(struct sljit_compiler *compiler, sljit_si size);
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+/* Passing NULL disables verbose. */
+SLJIT_API_FUNC_ATTRIBUTE void sljit_compiler_verbose(struct sljit_compiler *compiler, FILE* verbose);
+#endif
+
+SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler);
+SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code);
+
+/*
+   After the machine code generation is finished we can retrieve the allocated
+   executable memory size, although this area may not be fully filled with
+   instructions depending on some optimizations. This function is useful only
+   for statistical purposes.
+
+   Before a successful code generation, this function returns with 0.
+*/
+static SLJIT_INLINE sljit_uw sljit_get_generated_code_size(struct sljit_compiler *compiler) { return compiler->executable_size; }
+
+/* Instruction generation. Returns with any error code. If there is no
+   error, they return with SLJIT_SUCCESS. */
+
+/*
+   The executable code is a function call from the viewpoint of the C
+   language. The function calls must obey to the ABI (Application
+   Binary Interface) of the platform, which specify the purpose of
+   all machine registers and stack handling among other things. The
+   sljit_emit_enter function emits the necessary instructions for
+   setting up a new context for the executable code and moves function
+   arguments to the saved registers. Furthermore the options argument
+   can be used to pass configuration options to the compiler. The
+   available options are listed before sljit_emit_enter.
+
+   The number of sljit_sw arguments passed to the generated function
+   are specified in the "args" parameter. The number of arguments must
+   be less than or equal to 3. The first argument goes to SLJIT_S0,
+   the second goes to SLJIT_S1 and so on. The register set used by
+   the function must be declared as well. The number of scratch and
+   saved registers used by the function must be passed to sljit_emit_enter.
+   Only R registers between R0 and "scratches" argument can be used
+   later. E.g. if "scratches" is set to 2, the register set will be
+   limited to R0 and R1. The S registers and the floating point
+   registers ("fscratches" and "fsaveds") are specified in a similar
+   way. The sljit_emit_enter is also capable of allocating a stack
+   space for local variables. The "local_size" argument contains the
+   size in bytes of this local area and its staring address is stored
+   in SLJIT_SP. The memory area between SLJIT_SP (inclusive) and
+   SLJIT_SP + local_size (exclusive) can be modified freely until
+   the function returns. The stack space is not initialized.
+
+   Note: the following conditions must met:
+         0 <= scratches <= SLJIT_NUMBER_OF_REGISTERS
+         0 <= saveds <= SLJIT_NUMBER_OF_REGISTERS
+         scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS
+         0 <= fscratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS
+         0 <= fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS
+         fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS
+
+   Note: every call of sljit_emit_enter and sljit_set_context
+         overwrites the previous context.
+*/
+
+/* The absolute address returned by sljit_get_local_base with
+offset 0 is aligned to sljit_d. Otherwise it is aligned to sljit_uw. */
+#define SLJIT_DOUBLE_ALIGNMENT 0x00000001
+
+/* The local_size must be >= 0 and <= SLJIT_MAX_LOCAL_SIZE. */
+#define SLJIT_MAX_LOCAL_SIZE   65536
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler,
+       sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds,
+       sljit_si fscratches, sljit_si fsaveds, sljit_si local_size);
+
+/* The machine code has a context (which contains the local stack space size,
+   number of used registers, etc.) which initialized by sljit_emit_enter. Several
+   functions (like sljit_emit_return) requres this context to be able to generate
+   the appropriate code. However, some code fragments (like inline cache) may have
+   no normal entry point so their context is unknown for the compiler. Their context
+   can be provided to the compiler by the sljit_set_context function.
+
+   Note: every call of sljit_emit_enter and sljit_set_context overwrites
+         the previous context. */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compiler,
+       sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds,
+       sljit_si fscratches, sljit_si fsaveds, sljit_si local_size);
+
+/* Return from machine code.  The op argument can be SLJIT_UNUSED which means the
+   function does not return with anything or any opcode between SLJIT_MOV and
+   SLJIT_MOV_P (see sljit_emit_op1). As for src and srcw they must be 0 if op
+   is SLJIT_UNUSED, otherwise see below the description about source and
+   destination arguments. */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si src, sljit_sw srcw);
+
+/* Fast calling mechanism for utility functions (see SLJIT_FAST_CALL). All registers and
+   even the stack frame is passed to the callee. The return address is preserved in
+   dst/dstw by sljit_emit_fast_enter (the type of the value stored by this function
+   is sljit_p), and sljit_emit_fast_return can use this as a return value later. */
+
+/* Note: only for sljit specific, non ABI compilant calls. Fast, since only a few machine
+   instructions are needed. Excellent for small uility functions, where saving registers
+   and setting up a new stack frame would cost too much performance. However, it is still
+   possible to return to the address of the caller (or anywhere else). */
+
+/* Note: flags are not changed (unlike sljit_emit_enter / sljit_emit_return). */
+
+/* Note: although sljit_emit_fast_return could be replaced by an ijump, it is not suggested,
+   since many architectures do clever branch prediction on call / return instruction pairs. */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw);
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw);
+
+/*
+   Source and destination values for arithmetical instructions
+    imm              - a simple immediate value (cannot be used as a destination)
+    reg              - any of the registers (immediate argument must be 0)
+    [imm]            - absolute immediate memory address
+    [reg+imm]        - indirect memory address
+    [reg+(reg<<imm)] - indirect indexed memory address (shift must be between 0 and 3)
+                       useful for (byte, half, int, sljit_sw) array access
+                       (fully supported by both x86 and ARM architectures, and cheap operation on others)
+*/
+
+/*
+   IMPORATNT NOTE: memory access MUST be naturally aligned except
+                   SLJIT_UNALIGNED macro is defined and its value is 1.
+
+     length | alignment
+   ---------+-----------
+     byte   | 1 byte (any physical_address is accepted)
+     half   | 2 byte (physical_address & 0x1 == 0)
+     int    | 4 byte (physical_address & 0x3 == 0)
+     word   | 4 byte if SLJIT_32BIT_ARCHITECTURE is defined and its value is 1
+            | 8 byte if SLJIT_64BIT_ARCHITECTURE is defined and its value is 1
+    pointer | size of sljit_p type (4 byte on 32 bit machines, 4 or 8 byte
+            | on 64 bit machines)
+
+   Note:   Different architectures have different addressing limitations.
+           A single instruction is enough for the following addressing
+           modes. Other adrressing modes are emulated by instruction
+           sequences. This information could help to improve those code
+           generators which focuses only a few architectures.
+
+   x86:    [reg+imm], -2^32+1 <= imm <= 2^32-1 (full address space on x86-32)
+           [reg+(reg<<imm)] is supported
+           [imm], -2^32+1 <= imm <= 2^32-1 is supported
+           Write-back is not supported
+   arm:    [reg+imm], -4095 <= imm <= 4095 or -255 <= imm <= 255 for signed
+                bytes, any halfs or floating point values)
+           [reg+(reg<<imm)] is supported
+           Write-back is supported
+   arm-t2: [reg+imm], -255 <= imm <= 4095
+           [reg+(reg<<imm)] is supported
+           Write back is supported only for [reg+imm], where -255 <= imm <= 255
+   ppc:    [reg+imm], -65536 <= imm <= 65535. 64 bit loads/stores and 32 bit
+                signed load on 64 bit requires immediates divisible by 4.
+                [reg+imm] is not supported for signed 8 bit values.
+           [reg+reg] is supported
+           Write-back is supported except for one instruction: 32 bit signed
+                load with [reg+imm] addressing mode on 64 bit.
+   mips:   [reg+imm], -65536 <= imm <= 65535
+   sparc:  [reg+imm], -4096 <= imm <= 4095
+           [reg+reg] is supported
+*/
+
+/* Register output: simply the name of the register.
+   For destination, you can use SLJIT_UNUSED as well. */
+#define SLJIT_MEM              0x80
+#define SLJIT_MEM0()           (SLJIT_MEM)
+#define SLJIT_MEM1(r1)         (SLJIT_MEM | (r1))
+#define SLJIT_MEM2(r1, r2)     (SLJIT_MEM | (r1) | ((r2) << 8))
+#define SLJIT_IMM              0x40
+
+/* Set 32 bit operation mode (I) on 64 bit CPUs. The flag is totally ignored on
+   32 bit CPUs. If this flag is set for an arithmetic operation, it uses only the
+   lower 32 bit of the input register(s), and set the CPU status flags according
+   to the 32 bit result. The higher 32 bits are undefined for both the input and
+   output. However, the CPU might not ignore those higher 32 bits, like MIPS, which
+   expects it to be the sign extension of the lower 32 bit. All 32 bit operations
+   are undefined, if this condition is not fulfilled. Therefore, when SLJIT_INT_OP
+   is specified, all register arguments must be the result of other operations with
+   the same SLJIT_INT_OP flag. In other words, although a register can hold either
+   a 64 or 32 bit value, these values cannot be mixed. The only exceptions are
+   SLJIT_IMOV and SLJIT_IMOVU (SLJIT_MOV_SI/SLJIT_MOVU_SI with SLJIT_INT_OP flag)
+   which can convert any source argument to SLJIT_INT_OP compatible result. This
+   conversion might be unnecessary on some CPUs like x86-64, since the upper 32
+   bit is always ignored. In this case SLJIT is clever enough to not generate any
+   instructions if the source and destination operands are the same registers.
+   Affects sljit_emit_op0, sljit_emit_op1 and sljit_emit_op2. */
+#define SLJIT_INT_OP           0x100
+
+/* Single precision mode (SP). This flag is similar to SLJIT_INT_OP, just
+   it applies to floating point registers (it is even the same bit). When
+   this flag is passed, the CPU performs single precision floating point
+   operations. Similar to SLJIT_INT_OP, all register arguments must be the
+   result of other floating point operations with this flag. Affects
+   sljit_emit_fop1, sljit_emit_fop2 and sljit_emit_fcmp. */
+#define SLJIT_SINGLE_OP                0x100
+
+/* Common CPU status flags for all architectures (x86, ARM, PPC)
+    - carry flag
+    - overflow flag
+    - zero flag
+    - negative/positive flag (depends on arc)
+   On mips, these flags are emulated by software. */
+
+/* By default, the instructions may, or may not set the CPU status flags.
+   Forcing to set or keep status flags can be done with the following flags: */
+
+/* Note: sljit tries to emit the minimum number of instructions. Using these
+   flags can increase them, so use them wisely to avoid unnecessary code generation. */
+
+/* Set Equal (Zero) status flag (E). */
+#define SLJIT_SET_E                    0x0200
+/* Set unsigned status flag (U). */
+#define SLJIT_SET_U                    0x0400
+/* Set signed status flag (S). */
+#define SLJIT_SET_S                    0x0800
+/* Set signed overflow flag (O). */
+#define SLJIT_SET_O                    0x1000
+/* Set carry flag (C).
+   Note: Kinda unsigned overflow, but behaves differently on various cpus. */
+#define SLJIT_SET_C                    0x2000
+/* Do not modify the flags (K).
+   Note: This flag cannot be combined with any other SLJIT_SET_* flag. */
+#define SLJIT_KEEP_FLAGS               0x4000
+
+/* Notes:
+     - you cannot postpone conditional jump instructions except if noted that
+       the instruction does not set flags (See: SLJIT_KEEP_FLAGS).
+     - flag combinations: '|' means 'logical or'. */
+
+/* Starting index of opcodes for sljit_emit_op0. */
+#define SLJIT_OP0_BASE                 0
+
+/* Flags: - (never set any flags)
+   Note: breakpoint instruction is not supported by all architectures (namely ppc)
+         It falls back to SLJIT_NOP in those cases. */
+#define SLJIT_BREAKPOINT               (SLJIT_OP0_BASE + 0)
+/* Flags: - (never set any flags)
+   Note: may or may not cause an extra cycle wait
+         it can even decrease the runtime in a few cases. */
+#define SLJIT_NOP                      (SLJIT_OP0_BASE + 1)
+/* Flags: - (may destroy flags)
+   Unsigned multiplication of SLJIT_R0 and SLJIT_R1.
+   Result goes to SLJIT_R1:SLJIT_R0 (high:low) word */
+#define SLJIT_LUMUL                    (SLJIT_OP0_BASE + 2)
+/* Flags: - (may destroy flags)
+   Signed multiplication of SLJIT_R0 and SLJIT_R1.
+   Result goes to SLJIT_R1:SLJIT_R0 (high:low) word */
+#define SLJIT_LSMUL                    (SLJIT_OP0_BASE + 3)
+/* Flags: I - (may destroy flags)
+   Unsigned divide of the value in SLJIT_R0 by the value in SLJIT_R1.
+   The result is placed in SLJIT_R0 and the remainder goes to SLJIT_R1.
+   Note: if SLJIT_R1 contains 0, the behaviour is undefined. */
+#define SLJIT_LUDIV                    (SLJIT_OP0_BASE + 4)
+#define SLJIT_ILUDIV                   (SLJIT_LUDIV | SLJIT_INT_OP)
+/* Flags: I - (may destroy flags)
+   Signed divide of the value in SLJIT_R0 by the value in SLJIT_R1.
+   The result is placed in SLJIT_R0 and the remainder goes to SLJIT_R1.
+   Note: if SLJIT_R1 contains 0, the behaviour is undefined. */
+#define SLJIT_LSDIV                    (SLJIT_OP0_BASE + 5)
+#define SLJIT_ILSDIV                   (SLJIT_LSDIV | SLJIT_INT_OP)
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op);
+
+/* Starting index of opcodes for sljit_emit_op1. */
+#define SLJIT_OP1_BASE                 32
+
+/* Notes for MOV instructions:
+   U = Mov with update (pre form). If source or destination defined as SLJIT_MEM1(r1)
+       or SLJIT_MEM2(r1, r2), r1 is increased by the sum of r2 and the constant argument
+   UB = unsigned byte (8 bit)
+   SB = signed byte (8 bit)
+   UH = unsigned half (16 bit)
+   SH = signed half (16 bit)
+   UI = unsigned int (32 bit)
+   SI = signed int (32 bit)
+   P  = pointer (sljit_p) size */
+
+/* Flags: - (never set any flags) */
+#define SLJIT_MOV                      (SLJIT_OP1_BASE + 0)
+/* Flags: I - (never set any flags) */
+#define SLJIT_MOV_UB                   (SLJIT_OP1_BASE + 1)
+#define SLJIT_IMOV_UB                  (SLJIT_MOV_UB | SLJIT_INT_OP)
+/* Flags: I - (never set any flags) */
+#define SLJIT_MOV_SB                   (SLJIT_OP1_BASE + 2)
+#define SLJIT_IMOV_SB                  (SLJIT_MOV_SB | SLJIT_INT_OP)
+/* Flags: I - (never set any flags) */
+#define SLJIT_MOV_UH                   (SLJIT_OP1_BASE + 3)
+#define SLJIT_IMOV_UH                  (SLJIT_MOV_UH | SLJIT_INT_OP)
+/* Flags: I - (never set any flags) */
+#define SLJIT_MOV_SH                   (SLJIT_OP1_BASE + 4)
+#define SLJIT_IMOV_SH                  (SLJIT_MOV_SH | SLJIT_INT_OP)
+/* Flags: I - (never set any flags)
+   Note: see SLJIT_INT_OP for further details. */
+#define SLJIT_MOV_UI                   (SLJIT_OP1_BASE + 5)
+/* No SLJIT_INT_OP form, since it is the same as SLJIT_IMOV. */
+/* Flags: I - (never set any flags)
+   Note: see SLJIT_INT_OP for further details. */
+#define SLJIT_MOV_SI                   (SLJIT_OP1_BASE + 6)
+#define SLJIT_IMOV                     (SLJIT_MOV_SI | SLJIT_INT_OP)
+/* Flags: - (never set any flags) */
+#define SLJIT_MOV_P                    (SLJIT_OP1_BASE + 7)
+/* Flags: - (never set any flags) */
+#define SLJIT_MOVU                     (SLJIT_OP1_BASE + 8)
+/* Flags: I - (never set any flags) */
+#define SLJIT_MOVU_UB                  (SLJIT_OP1_BASE + 9)
+#define SLJIT_IMOVU_UB                 (SLJIT_MOVU_UB | SLJIT_INT_OP)
+/* Flags: I - (never set any flags) */
+#define SLJIT_MOVU_SB                  (SLJIT_OP1_BASE + 10)
+#define SLJIT_IMOVU_SB                 (SLJIT_MOVU_SB | SLJIT_INT_OP)
+/* Flags: I - (never set any flags) */
+#define SLJIT_MOVU_UH                  (SLJIT_OP1_BASE + 11)
+#define SLJIT_IMOVU_UH                 (SLJIT_MOVU_UH | SLJIT_INT_OP)
+/* Flags: I - (never set any flags) */
+#define SLJIT_MOVU_SH                  (SLJIT_OP1_BASE + 12)
+#define SLJIT_IMOVU_SH                 (SLJIT_MOVU_SH | SLJIT_INT_OP)
+/* Flags: I - (never set any flags)
+   Note: see SLJIT_INT_OP for further details. */
+#define SLJIT_MOVU_UI                  (SLJIT_OP1_BASE + 13)
+/* No SLJIT_INT_OP form, since it is the same as SLJIT_IMOVU. */
+/* Flags: I - (never set any flags)
+   Note: see SLJIT_INT_OP for further details. */
+#define SLJIT_MOVU_SI                  (SLJIT_OP1_BASE + 14)
+#define SLJIT_IMOVU                    (SLJIT_MOVU_SI | SLJIT_INT_OP)
+/* Flags: - (never set any flags) */
+#define SLJIT_MOVU_P                   (SLJIT_OP1_BASE + 15)
+/* Flags: I | E | K */
+#define SLJIT_NOT                      (SLJIT_OP1_BASE + 16)
+#define SLJIT_INOT                     (SLJIT_NOT | SLJIT_INT_OP)
+/* Flags: I | E | O | K */
+#define SLJIT_NEG                      (SLJIT_OP1_BASE + 17)
+#define SLJIT_INEG                     (SLJIT_NEG | SLJIT_INT_OP)
+/* Count leading zeroes
+   Flags: I | E | K
+   Important note! Sparc 32 does not support K flag, since
+   the required popc instruction is introduced only in sparc 64. */
+#define SLJIT_CLZ                      (SLJIT_OP1_BASE + 18)
+#define SLJIT_ICLZ                     (SLJIT_CLZ | SLJIT_INT_OP)
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw);
+
+/* Starting index of opcodes for sljit_emit_op2. */
+#define SLJIT_OP2_BASE                 96
+
+/* Flags: I | E | O | C | K */
+#define SLJIT_ADD                      (SLJIT_OP2_BASE + 0)
+#define SLJIT_IADD                     (SLJIT_ADD | SLJIT_INT_OP)
+/* Flags: I | C | K */
+#define SLJIT_ADDC                     (SLJIT_OP2_BASE + 1)
+#define SLJIT_IADDC                    (SLJIT_ADDC | SLJIT_INT_OP)
+/* Flags: I | E | U | S | O | C | K */
+#define SLJIT_SUB                      (SLJIT_OP2_BASE + 2)
+#define SLJIT_ISUB                     (SLJIT_SUB | SLJIT_INT_OP)
+/* Flags: I | C | K */
+#define SLJIT_SUBC                     (SLJIT_OP2_BASE + 3)
+#define SLJIT_ISUBC                    (SLJIT_SUBC | SLJIT_INT_OP)
+/* Note: integer mul
+   Flags: I | O (see SLJIT_C_MUL_*) | K */
+#define SLJIT_MUL                      (SLJIT_OP2_BASE + 4)
+#define SLJIT_IMUL                     (SLJIT_MUL | SLJIT_INT_OP)
+/* Flags: I | E | K */
+#define SLJIT_AND                      (SLJIT_OP2_BASE + 5)
+#define SLJIT_IAND                     (SLJIT_AND | SLJIT_INT_OP)
+/* Flags: I | E | K */
+#define SLJIT_OR                       (SLJIT_OP2_BASE + 6)
+#define SLJIT_IOR                      (SLJIT_OR | SLJIT_INT_OP)
+/* Flags: I | E | K */
+#define SLJIT_XOR                      (SLJIT_OP2_BASE + 7)
+#define SLJIT_IXOR                     (SLJIT_XOR | SLJIT_INT_OP)
+/* Flags: I | E | K
+   Let bit_length be the length of the shift operation: 32 or 64.
+   If src2 is immediate, src2w is masked by (bit_length - 1).
+   Otherwise, if the content of src2 is outside the range from 0
+   to bit_length - 1, the result is undefined. */
+#define SLJIT_SHL                      (SLJIT_OP2_BASE + 8)
+#define SLJIT_ISHL                     (SLJIT_SHL | SLJIT_INT_OP)
+/* Flags: I | E | K
+   Let bit_length be the length of the shift operation: 32 or 64.
+   If src2 is immediate, src2w is masked by (bit_length - 1).
+   Otherwise, if the content of src2 is outside the range from 0
+   to bit_length - 1, the result is undefined. */
+#define SLJIT_LSHR                     (SLJIT_OP2_BASE + 9)
+#define SLJIT_ILSHR                    (SLJIT_LSHR | SLJIT_INT_OP)
+/* Flags: I | E | K
+   Let bit_length be the length of the shift operation: 32 or 64.
+   If src2 is immediate, src2w is masked by (bit_length - 1).
+   Otherwise, if the content of src2 is outside the range from 0
+   to bit_length - 1, the result is undefined. */
+#define SLJIT_ASHR                     (SLJIT_OP2_BASE + 10)
+#define SLJIT_IASHR                    (SLJIT_ASHR | SLJIT_INT_OP)
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w);
+
+/* The following function is a helper function for sljit_emit_op_custom.
+   It returns with the real machine register index ( >=0 ) of any SLJIT_R,
+   SLJIT_S and SLJIT_SP registers.
+
+   Note: it returns with -1 for virtual registers (only on x86-32). */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg);
+
+/* The following function is a helper function for sljit_emit_op_custom.
+   It returns with the real machine register index of any SLJIT_FLOAT register.
+
+   Note: the index is always an even number on ARM (except ARM-64), MIPS, and SPARC. */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg);
+
+/* Any instruction can be inserted into the instruction stream by
+   sljit_emit_op_custom. It has a similar purpose as inline assembly.
+   The size parameter must match to the instruction size of the target
+   architecture:
+
+         x86: 0 < size <= 15. The instruction argument can be byte aligned.
+      Thumb2: if size == 2, the instruction argument must be 2 byte aligned.
+              if size == 4, the instruction argument must be 4 byte aligned.
+   Otherwise: size must be 4 and instruction argument must be 4 byte aligned. */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
+       void *instruction, sljit_si size);
+
+/* Returns with non-zero if fpu is available. */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void);
+
+/* Starting index of opcodes for sljit_emit_fop1. */
+#define SLJIT_FOP1_BASE                        128
+
+/* Flags: SP - (never set any flags) */
+#define SLJIT_DMOV                     (SLJIT_FOP1_BASE + 0)
+#define SLJIT_SMOV                     (SLJIT_DMOV | SLJIT_SINGLE_OP)
+/* Convert opcodes: CONV[DST_TYPE].FROM[SRC_TYPE]
+   SRC/DST TYPE can be: D - double, S - single, W - signed word, I - signed int
+   Rounding mode when the destination is W or I: round towards zero. */
+/* Flags: SP - (never set any flags) */
+#define SLJIT_CONVD_FROMS              (SLJIT_FOP1_BASE + 1)
+#define SLJIT_CONVS_FROMD              (SLJIT_CONVD_FROMS | SLJIT_SINGLE_OP)
+/* Flags: SP - (never set any flags) */
+#define SLJIT_CONVW_FROMD              (SLJIT_FOP1_BASE + 2)
+#define SLJIT_CONVW_FROMS              (SLJIT_CONVW_FROMD | SLJIT_SINGLE_OP)
+/* Flags: SP - (never set any flags) */
+#define SLJIT_CONVI_FROMD              (SLJIT_FOP1_BASE + 3)
+#define SLJIT_CONVI_FROMS              (SLJIT_CONVI_FROMD | SLJIT_SINGLE_OP)
+/* Flags: SP - (never set any flags) */
+#define SLJIT_CONVD_FROMW              (SLJIT_FOP1_BASE + 4)
+#define SLJIT_CONVS_FROMW              (SLJIT_CONVD_FROMW | SLJIT_SINGLE_OP)
+/* Flags: SP - (never set any flags) */
+#define SLJIT_CONVD_FROMI              (SLJIT_FOP1_BASE + 5)
+#define SLJIT_CONVS_FROMI              (SLJIT_CONVD_FROMI | SLJIT_SINGLE_OP)
+/* Note: dst is the left and src is the right operand for SLJIT_CMPD.
+   Note: NaN check is always performed. If SLJIT_C_FLOAT_UNORDERED flag
+         is set, the comparison result is unpredictable.
+   Flags: SP | E | S (see SLJIT_C_FLOAT_*) */
+#define SLJIT_DCMP                     (SLJIT_FOP1_BASE + 6)
+#define SLJIT_SCMP                     (SLJIT_DCMP | SLJIT_SINGLE_OP)
+/* Flags: SP - (never set any flags) */
+#define SLJIT_DNEG                     (SLJIT_FOP1_BASE + 7)
+#define SLJIT_SNEG                     (SLJIT_DNEG | SLJIT_SINGLE_OP)
+/* Flags: SP - (never set any flags) */
+#define SLJIT_DABS                     (SLJIT_FOP1_BASE + 8)
+#define SLJIT_SABS                     (SLJIT_DABS | SLJIT_SINGLE_OP)
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw);
+
+/* Starting index of opcodes for sljit_emit_fop2. */
+#define SLJIT_FOP2_BASE                        160
+
+/* Flags: SP - (never set any flags) */
+#define SLJIT_DADD                     (SLJIT_FOP2_BASE + 0)
+#define SLJIT_SADD                     (SLJIT_DADD | SLJIT_SINGLE_OP)
+/* Flags: SP - (never set any flags) */
+#define SLJIT_DSUB                     (SLJIT_FOP2_BASE + 1)
+#define SLJIT_SSUB                     (SLJIT_DSUB | SLJIT_SINGLE_OP)
+/* Flags: SP - (never set any flags) */
+#define SLJIT_DMUL                     (SLJIT_FOP2_BASE + 2)
+#define SLJIT_SMUL                     (SLJIT_DMUL | SLJIT_SINGLE_OP)
+/* Flags: SP - (never set any flags) */
+#define SLJIT_DDIV                     (SLJIT_FOP2_BASE + 3)
+#define SLJIT_SDIV                     (SLJIT_DDIV | SLJIT_SINGLE_OP)
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w);
+
+/* Label and jump instructions. */
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler);
+
+/* Invert (negate) conditional type: xor (^) with 0x1 */
+
+/* Integer comparison types. */
+#define SLJIT_EQUAL                    0
+#define SLJIT_I_EQUAL                  (SLJIT_EQUAL | SLJIT_INT_OP)
+#define SLJIT_ZERO                     0
+#define SLJIT_I_ZERO                   (SLJIT_ZERO | SLJIT_INT_OP)
+#define SLJIT_NOT_EQUAL                        1
+#define SLJIT_I_NOT_EQUAL              (SLJIT_NOT_EQUAL | SLJIT_INT_OP)
+#define SLJIT_NOT_ZERO                 1
+#define SLJIT_I_NOT_ZERO               (SLJIT_NOT_ZERO | SLJIT_INT_OP)
+
+#define SLJIT_LESS                     2
+#define SLJIT_I_LESS                   (SLJIT_LESS | SLJIT_INT_OP)
+#define SLJIT_GREATER_EQUAL            3
+#define SLJIT_I_GREATER_EQUAL          (SLJIT_GREATER_EQUAL | SLJIT_INT_OP)
+#define SLJIT_GREATER                  4
+#define SLJIT_I_GREATER                        (SLJIT_GREATER | SLJIT_INT_OP)
+#define SLJIT_LESS_EQUAL               5
+#define SLJIT_I_LESS_EQUAL             (SLJIT_LESS_EQUAL | SLJIT_INT_OP)
+#define SLJIT_SIG_LESS                 6
+#define SLJIT_I_SIG_LESS               (SLJIT_SIG_LESS | SLJIT_INT_OP)
+#define SLJIT_SIG_GREATER_EQUAL                7
+#define SLJIT_I_SIG_GREATER_EQUAL      (SLJIT_SIG_GREATER_EQUAL | SLJIT_INT_OP)
+#define SLJIT_SIG_GREATER              8
+#define SLJIT_I_SIG_GREATER            (SLJIT_SIG_GREATER | SLJIT_INT_OP)
+#define SLJIT_SIG_LESS_EQUAL           9
+#define SLJIT_I_SIG_LESS_EQUAL         (SLJIT_SIG_LESS_EQUAL | SLJIT_INT_OP)
+
+#define SLJIT_OVERFLOW                 10
+#define SLJIT_I_OVERFLOW               (SLJIT_OVERFLOW | SLJIT_INT_OP)
+#define SLJIT_NOT_OVERFLOW             11
+#define SLJIT_I_NOT_OVERFLOW           (SLJIT_NOT_OVERFLOW | SLJIT_INT_OP)
+
+#define SLJIT_MUL_OVERFLOW             12
+#define SLJIT_I_MUL_OVERFLOW           (SLJIT_MUL_OVERFLOW | SLJIT_INT_OP)
+#define SLJIT_MUL_NOT_OVERFLOW         13
+#define SLJIT_I_MUL_NOT_OVERFLOW       (SLJIT_MUL_NOT_OVERFLOW | SLJIT_INT_OP)
+
+/* Floating point comparison types. */
+#define SLJIT_D_EQUAL                  14
+#define SLJIT_S_EQUAL                  (SLJIT_D_EQUAL | SLJIT_SINGLE_OP)
+#define SLJIT_D_NOT_EQUAL              15
+#define SLJIT_S_NOT_EQUAL              (SLJIT_D_NOT_EQUAL | SLJIT_SINGLE_OP)
+#define SLJIT_D_LESS                   16
+#define SLJIT_S_LESS                   (SLJIT_D_LESS | SLJIT_SINGLE_OP)
+#define SLJIT_D_GREATER_EQUAL          17
+#define SLJIT_S_GREATER_EQUAL          (SLJIT_D_GREATER_EQUAL | SLJIT_SINGLE_OP)
+#define SLJIT_D_GREATER                        18
+#define SLJIT_S_GREATER                        (SLJIT_D_GREATER | SLJIT_SINGLE_OP)
+#define SLJIT_D_LESS_EQUAL             19
+#define SLJIT_S_LESS_EQUAL             (SLJIT_D_LESS_EQUAL | SLJIT_SINGLE_OP)
+#define SLJIT_D_UNORDERED              20
+#define SLJIT_S_UNORDERED              (SLJIT_D_UNORDERED | SLJIT_SINGLE_OP)
+#define SLJIT_D_ORDERED                        21
+#define SLJIT_S_ORDERED                        (SLJIT_D_ORDERED | SLJIT_SINGLE_OP)
+
+/* Unconditional jump types. */
+#define SLJIT_JUMP                     22
+#define SLJIT_FAST_CALL                        23
+#define SLJIT_CALL0                    24
+#define SLJIT_CALL1                    25
+#define SLJIT_CALL2                    26
+#define SLJIT_CALL3                    27
+
+/* Fast calling method. See sljit_emit_fast_enter / sljit_emit_fast_return. */
+
+/* The target can be changed during runtime (see: sljit_set_jump_addr). */
+#define SLJIT_REWRITABLE_JUMP          0x1000
+
+/* Emit a jump instruction. The destination is not set, only the type of the jump.
+    type must be between SLJIT_EQUAL and SLJIT_CALL3
+    type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP
+   Flags: - (never set any flags) for both conditional and unconditional jumps.
+   Flags: destroy all flags for calls. */
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type);
+
+/* Basic arithmetic comparison. In most architectures it is implemented as
+   an SLJIT_SUB operation (with SLJIT_UNUSED destination and setting
+   appropriate flags) followed by a sljit_emit_jump. However some
+   architectures (i.e: ARM64 or MIPS) may employ special optimizations here.
+   It is suggested to use this comparison form when appropriate.
+    type must be between SLJIT_EQUAL and SLJIT_I_SIG_LESS_EQUAL
+    type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP
+   Flags: destroy flags. */
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_si type,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w);
+
+/* Basic floating point comparison. In most architectures it is implemented as
+   an SLJIT_FCMP operation (setting appropriate flags) followed by a
+   sljit_emit_jump. However some architectures (i.e: MIPS) may employ
+   special optimizations here. It is suggested to use this comparison form
+   when appropriate.
+    type must be between SLJIT_D_EQUAL and SLJIT_S_ORDERED
+    type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP
+   Flags: destroy flags.
+   Note: if either operand is NaN, the behaviour is undefined for
+         types up to SLJIT_S_LESS_EQUAL. */
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_si type,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w);
+
+/* Set the destination of the jump to this label. */
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_label(struct sljit_jump *jump, struct sljit_label* label);
+/* Set the destination address of the jump to this label. */
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw target);
+
+/* Call function or jump anywhere. Both direct and indirect form
+    type must be between SLJIT_JUMP and SLJIT_CALL3
+    Direct form: set src to SLJIT_IMM() and srcw to the address
+    Indirect form: any other valid addressing mode
+   Flags: - (never set any flags) for unconditional jumps.
+   Flags: destroy all flags for calls. */
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw);
+
+/* Perform the operation using the conditional flags as the second argument.
+   Type must always be between SLJIT_EQUAL and SLJIT_S_ORDERED. The value
+   represented by the type is 1, if the condition represented by the type
+   is fulfilled, and 0 otherwise.
+
+   If op == SLJIT_MOV, SLJIT_MOV_SI, SLJIT_MOV_UI:
+     Set dst to the value represented by the type (0 or 1).
+     Src must be SLJIT_UNUSED, and srcw must be 0
+     Flags: - (never set any flags)
+   If op == SLJIT_OR, op == SLJIT_AND, op == SLJIT_XOR
+     Performs the binary operation using src as the first, and the value
+     represented by type as the second argument.
+     Important note: only dst=src and dstw=srcw is supported at the moment!
+     Flags: I | E | K
+   Note: sljit_emit_op_flags does nothing, if dst is SLJIT_UNUSED (regardless of op). */
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw,
+       sljit_si type);
+
+/* Copies the base address of SLJIT_SP + offset to dst.
+   Flags: - (never set any flags) */
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset);
+
+/* The constant can be changed runtime (see: sljit_set_const)
+   Flags: - (never set any flags) */
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value);
+
+/* After the code generation the address for label, jump and const instructions
+   are computed. Since these structures are freed by sljit_free_compiler, the
+   addresses must be preserved by the user program elsewere. */
+static SLJIT_INLINE sljit_uw sljit_get_label_addr(struct sljit_label *label) { return label->addr; }
+static SLJIT_INLINE sljit_uw sljit_get_jump_addr(struct sljit_jump *jump) { return jump->addr; }
+static SLJIT_INLINE sljit_uw sljit_get_const_addr(struct sljit_const *const_) { return const_->addr; }
+
+/* Only the address is required to rewrite the code. */
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr);
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant);
+
+/* --------------------------------------------------------------------- */
+/*  Miscellaneous utility functions                                      */
+/* --------------------------------------------------------------------- */
+
+#define SLJIT_MAJOR_VERSION    0
+#define SLJIT_MINOR_VERSION    93
+
+/* Get the human readable name of the platform. Can be useful on platforms
+   like ARM, where ARM and Thumb2 functions can be mixed, and
+   it is useful to know the type of the code generator. */
+SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void);
+
+/* Portable helper function to get an offset of a member. */
+#define SLJIT_OFFSETOF(base, member) ((sljit_sw)(&((base*)0x10)->member) - 0x10)
+
+#if (defined SLJIT_UTIL_GLOBAL_LOCK && SLJIT_UTIL_GLOBAL_LOCK)
+/* This global lock is useful to compile common functions. */
+SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_grab_lock(void);
+SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_release_lock(void);
+#endif
+
+#if (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK)
+
+/* The sljit_stack is a utiliy feature of sljit, which allocates a
+   writable memory region between base (inclusive) and limit (exclusive).
+   Both base and limit is a pointer, and base is always <= than limit.
+   This feature uses the "address space reserve" feature
+   of modern operating systems. Basically we don't need to allocate a
+   huge memory block in one step for the worst case, we can start with
+   a smaller chunk and extend it later. Since the address space is
+   reserved, the data never copied to other regions, thus it is safe
+   to store pointers here. */
+
+/* Note: The base field is aligned to PAGE_SIZE bytes (usually 4k or more).
+   Note: stack growing should not happen in small steps: 4k, 16k or even
+     bigger growth is better.
+   Note: this structure may not be supported by all operating systems.
+     Some kind of fallback mechanism is suggested when SLJIT_UTIL_STACK
+     is not defined. */
+
+struct sljit_stack {
+       /* User data, anything can be stored here.
+          Starting with the same value as base. */
+       sljit_uw top;
+       /* These members are read only. */
+       sljit_uw base;
+       sljit_uw limit;
+       sljit_uw max_limit;
+};
+
+/* Returns NULL if unsuccessful.
+   Note: limit and max_limit contains the size for stack allocation.
+   Note: the top field is initialized to base.
+   Note: see sljit_create_compiler for the explanation of allocator_data. */
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_CALL sljit_allocate_stack(sljit_uw limit, sljit_uw max_limit, void *allocator_data);
+SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_free_stack(struct sljit_stack *stack, void *allocator_data);
+
+/* Can be used to increase (allocate) or decrease (free) the memory area.
+   Returns with a non-zero value if unsuccessful. If new_limit is greater than
+   max_limit, it will fail. It is very easy to implement a stack data structure,
+   since the growth ratio can be added to the current limit, and sljit_stack_resize
+   will do all the necessary checks. The fields of the stack are not changed if
+   sljit_stack_resize fails. */
+SLJIT_API_FUNC_ATTRIBUTE sljit_sw SLJIT_CALL sljit_stack_resize(struct sljit_stack *stack, sljit_uw new_limit);
+
+#endif /* (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK) */
+
+#if !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
+
+/* Get the entry address of a given function. */
+#define SLJIT_FUNC_OFFSET(func_name)   ((sljit_sw)func_name)
+
+#else /* !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) */
+
+/* All JIT related code should be placed in the same context (library, binary, etc.). */
+
+#define SLJIT_FUNC_OFFSET(func_name)   (*(sljit_sw*)(void*)func_name)
+
+/* For powerpc64, the function pointers point to a context descriptor. */
+struct sljit_function_context {
+       sljit_sw addr;
+       sljit_sw r2;
+       sljit_sw r11;
+};
+
+/* Fill the context arguments using the addr and the function.
+   If func_ptr is NULL, it will not be set to the address of context
+   If addr is NULL, the function address also comes from the func pointer. */
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_sw addr, void* func);
+
+#endif /* !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) */
+
+#endif /* _SLJIT_LIR_H_ */
diff --git a/ext/pcre/pcrelib/sljit/sljitNativeARM_32.c b/ext/pcre/pcrelib/sljit/sljitNativeARM_32.c
new file mode 100644 (file)
index 0000000..aca1d31
--- /dev/null
@@ -0,0 +1,2551 @@
+/*
+ *    Stack-less Just-In-Time compiler
+ *
+ *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice, this list of
+ *      conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *      of conditions and the following disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
+{
+#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
+       return "ARMv7" SLJIT_CPUINFO;
+#elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+       return "ARMv5" SLJIT_CPUINFO;
+#else
+#error "Internal error: Unknown ARM architecture"
+#endif
+}
+
+/* Last register + 1. */
+#define TMP_REG1       (SLJIT_NUMBER_OF_REGISTERS + 2)
+#define TMP_REG2       (SLJIT_NUMBER_OF_REGISTERS + 3)
+#define TMP_REG3       (SLJIT_NUMBER_OF_REGISTERS + 4)
+#define TMP_PC         (SLJIT_NUMBER_OF_REGISTERS + 5)
+
+#define TMP_FREG1      (0)
+#define TMP_FREG2      (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
+
+/* In ARM instruction words.
+   Cache lines are usually 32 byte aligned. */
+#define CONST_POOL_ALIGNMENT   8
+#define CONST_POOL_EMPTY       0xffffffff
+
+#define ALIGN_INSTRUCTION(ptr) \
+       (sljit_uw*)(((sljit_uw)(ptr) + (CONST_POOL_ALIGNMENT * sizeof(sljit_uw)) - 1) & ~((CONST_POOL_ALIGNMENT * sizeof(sljit_uw)) - 1))
+#define MAX_DIFFERENCE(max_diff) \
+       (((max_diff) / (sljit_si)sizeof(sljit_uw)) - (CONST_POOL_ALIGNMENT - 1))
+
+/* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */
+static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = {
+       0, 0, 1, 2, 11, 10, 9, 8, 7, 6, 5, 4, 13, 3, 12, 14, 15
+};
+
+#define RM(rm) (reg_map[rm])
+#define RD(rd) (reg_map[rd] << 12)
+#define RN(rn) (reg_map[rn] << 16)
+
+/* --------------------------------------------------------------------- */
+/*  Instrucion forms                                                     */
+/* --------------------------------------------------------------------- */
+
+/* The instruction includes the AL condition.
+   INST_NAME - CONDITIONAL remove this flag. */
+#define COND_MASK      0xf0000000
+#define CONDITIONAL    0xe0000000
+#define PUSH_POOL      0xff000000
+
+/* DP - Data Processing instruction (use with EMIT_DATA_PROCESS_INS). */
+#define ADC_DP         0x5
+#define ADD_DP         0x4
+#define AND_DP         0x0
+#define B              0xea000000
+#define BIC_DP         0xe
+#define BL             0xeb000000
+#define BLX            0xe12fff30
+#define BX             0xe12fff10
+#define CLZ            0xe16f0f10
+#define CMP_DP         0xa
+#define BKPT           0xe1200070
+#define EOR_DP         0x1
+#define MOV_DP         0xd
+#define MUL            0xe0000090
+#define MVN_DP         0xf
+#define NOP            0xe1a00000
+#define ORR_DP         0xc
+#define PUSH           0xe92d0000
+#define POP            0xe8bd0000
+#define RSB_DP         0x3
+#define RSC_DP         0x7
+#define SBC_DP         0x6
+#define SMULL          0xe0c00090
+#define SUB_DP         0x2
+#define UMULL          0xe0800090
+#define VABS_F32       0xeeb00ac0
+#define VADD_F32       0xee300a00
+#define VCMP_F32       0xeeb40a40
+#define VCVT_F32_S32   0xeeb80ac0
+#define VCVT_F64_F32   0xeeb70ac0
+#define VCVT_S32_F32   0xeebd0ac0
+#define VDIV_F32       0xee800a00
+#define VMOV_F32       0xeeb00a40
+#define VMOV           0xee000a10
+#define VMRS           0xeef1fa10
+#define VMUL_F32       0xee200a00
+#define VNEG_F32       0xeeb10a40
+#define VSTR_F32       0xed000a00
+#define VSUB_F32       0xee300a40
+
+#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
+/* Arm v7 specific instructions. */
+#define MOVW           0xe3000000
+#define MOVT           0xe3400000
+#define SXTB           0xe6af0070
+#define SXTH           0xe6bf0070
+#define UXTB           0xe6ef0070
+#define UXTH           0xe6ff0070
+#endif
+
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+
+static sljit_si push_cpool(struct sljit_compiler *compiler)
+{
+       /* Pushing the constant pool into the instruction stream. */
+       sljit_uw* inst;
+       sljit_uw* cpool_ptr;
+       sljit_uw* cpool_end;
+       sljit_si i;
+
+       /* The label could point the address after the constant pool. */
+       if (compiler->last_label && compiler->last_label->size == compiler->size)
+               compiler->last_label->size += compiler->cpool_fill + (CONST_POOL_ALIGNMENT - 1) + 1;
+
+       SLJIT_ASSERT(compiler->cpool_fill > 0 && compiler->cpool_fill <= CPOOL_SIZE);
+       inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
+       FAIL_IF(!inst);
+       compiler->size++;
+       *inst = 0xff000000 | compiler->cpool_fill;
+
+       for (i = 0; i < CONST_POOL_ALIGNMENT - 1; i++) {
+               inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
+               FAIL_IF(!inst);
+               compiler->size++;
+               *inst = 0;
+       }
+
+       cpool_ptr = compiler->cpool;
+       cpool_end = cpool_ptr + compiler->cpool_fill;
+       while (cpool_ptr < cpool_end) {
+               inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
+               FAIL_IF(!inst);
+               compiler->size++;
+               *inst = *cpool_ptr++;
+       }
+       compiler->cpool_diff = CONST_POOL_EMPTY;
+       compiler->cpool_fill = 0;
+       return SLJIT_SUCCESS;
+}
+
+static sljit_si push_inst(struct sljit_compiler *compiler, sljit_uw inst)
+{
+       sljit_uw* ptr;
+
+       if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)))
+               FAIL_IF(push_cpool(compiler));
+
+       ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
+       FAIL_IF(!ptr);
+       compiler->size++;
+       *ptr = inst;
+       return SLJIT_SUCCESS;
+}
+
+static sljit_si push_inst_with_literal(struct sljit_compiler *compiler, sljit_uw inst, sljit_uw literal)
+{
+       sljit_uw* ptr;
+       sljit_uw cpool_index = CPOOL_SIZE;
+       sljit_uw* cpool_ptr;
+       sljit_uw* cpool_end;
+       sljit_ub* cpool_unique_ptr;
+
+       if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)))
+               FAIL_IF(push_cpool(compiler));
+       else if (compiler->cpool_fill > 0) {
+               cpool_ptr = compiler->cpool;
+               cpool_end = cpool_ptr + compiler->cpool_fill;
+               cpool_unique_ptr = compiler->cpool_unique;
+               do {
+                       if ((*cpool_ptr == literal) && !(*cpool_unique_ptr)) {
+                               cpool_index = cpool_ptr - compiler->cpool;
+                               break;
+                       }
+                       cpool_ptr++;
+                       cpool_unique_ptr++;
+               } while (cpool_ptr < cpool_end);
+       }
+
+       if (cpool_index == CPOOL_SIZE) {
+               /* Must allocate a new entry in the literal pool. */
+               if (compiler->cpool_fill < CPOOL_SIZE) {
+                       cpool_index = compiler->cpool_fill;
+                       compiler->cpool_fill++;
+               }
+               else {
+                       FAIL_IF(push_cpool(compiler));
+                       cpool_index = 0;
+                       compiler->cpool_fill = 1;
+               }
+       }
+
+       SLJIT_ASSERT((inst & 0xfff) == 0);
+       ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
+       FAIL_IF(!ptr);
+       compiler->size++;
+       *ptr = inst | cpool_index;
+
+       compiler->cpool[cpool_index] = literal;
+       compiler->cpool_unique[cpool_index] = 0;
+       if (compiler->cpool_diff == CONST_POOL_EMPTY)
+               compiler->cpool_diff = compiler->size;
+       return SLJIT_SUCCESS;
+}
+
+static sljit_si push_inst_with_unique_literal(struct sljit_compiler *compiler, sljit_uw inst, sljit_uw literal)
+{
+       sljit_uw* ptr;
+       if (SLJIT_UNLIKELY((compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)) || compiler->cpool_fill >= CPOOL_SIZE))
+               FAIL_IF(push_cpool(compiler));
+
+       SLJIT_ASSERT(compiler->cpool_fill < CPOOL_SIZE && (inst & 0xfff) == 0);
+       ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
+       FAIL_IF(!ptr);
+       compiler->size++;
+       *ptr = inst | compiler->cpool_fill;
+
+       compiler->cpool[compiler->cpool_fill] = literal;
+       compiler->cpool_unique[compiler->cpool_fill] = 1;
+       compiler->cpool_fill++;
+       if (compiler->cpool_diff == CONST_POOL_EMPTY)
+               compiler->cpool_diff = compiler->size;
+       return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_si prepare_blx(struct sljit_compiler *compiler)
+{
+       /* Place for at least two instruction (doesn't matter whether the first has a literal). */
+       if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4088)))
+               return push_cpool(compiler);
+       return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_si emit_blx(struct sljit_compiler *compiler)
+{
+       /* Must follow tightly the previous instruction (to be able to convert it to bl instruction). */
+       SLJIT_ASSERT(compiler->cpool_diff == CONST_POOL_EMPTY || compiler->size - compiler->cpool_diff < MAX_DIFFERENCE(4092));
+       return push_inst(compiler, BLX | RM(TMP_REG1));
+}
+
+static sljit_uw patch_pc_relative_loads(sljit_uw *last_pc_patch, sljit_uw *code_ptr, sljit_uw* const_pool, sljit_uw cpool_size)
+{
+       sljit_uw diff;
+       sljit_uw ind;
+       sljit_uw counter = 0;
+       sljit_uw* clear_const_pool = const_pool;
+       sljit_uw* clear_const_pool_end = const_pool + cpool_size;
+
+       SLJIT_ASSERT(const_pool - code_ptr <= CONST_POOL_ALIGNMENT);
+       /* Set unused flag for all literals in the constant pool.
+          I.e.: unused literals can belong to branches, which can be encoded as B or BL.
+          We can "compress" the constant pool by discarding these literals. */
+       while (clear_const_pool < clear_const_pool_end)
+               *clear_const_pool++ = (sljit_uw)(-1);
+
+       while (last_pc_patch < code_ptr) {
+               /* Data transfer instruction with Rn == r15. */
+               if ((*last_pc_patch & 0x0c0f0000) == 0x040f0000) {
+                       diff = const_pool - last_pc_patch;
+                       ind = (*last_pc_patch) & 0xfff;
+
+                       /* Must be a load instruction with immediate offset. */
+                       SLJIT_ASSERT(ind < cpool_size && !(*last_pc_patch & (1 << 25)) && (*last_pc_patch & (1 << 20)));
+                       if ((sljit_si)const_pool[ind] < 0) {
+                               const_pool[ind] = counter;
+                               ind = counter;
+                               counter++;
+                       }
+                       else
+                               ind = const_pool[ind];
+
+                       SLJIT_ASSERT(diff >= 1);
+                       if (diff >= 2 || ind > 0) {
+                               diff = (diff + ind - 2) << 2;
+                               SLJIT_ASSERT(diff <= 0xfff);
+                               *last_pc_patch = (*last_pc_patch & ~0xfff) | diff;
+                       }
+                       else
+                               *last_pc_patch = (*last_pc_patch & ~(0xfff | (1 << 23))) | 0x004;
+               }
+               last_pc_patch++;
+       }
+       return counter;
+}
+
+/* In some rare ocasions we may need future patches. The probability is close to 0 in practice. */
+struct future_patch {
+       struct future_patch* next;
+       sljit_si index;
+       sljit_si value;
+};
+
+static sljit_si resolve_const_pool_index(struct sljit_compiler *compiler, struct future_patch **first_patch, sljit_uw cpool_current_index, sljit_uw *cpool_start_address, sljit_uw *buf_ptr)
+{
+       sljit_si value;
+       struct future_patch *curr_patch, *prev_patch;
+
+       SLJIT_UNUSED_ARG(compiler);
+
+       /* Using the values generated by patch_pc_relative_loads. */
+       if (!*first_patch)
+               value = (sljit_si)cpool_start_address[cpool_current_index];
+       else {
+               curr_patch = *first_patch;
+               prev_patch = 0;
+               while (1) {
+                       if (!curr_patch) {
+                               value = (sljit_si)cpool_start_address[cpool_current_index];
+                               break;
+                       }
+                       if ((sljit_uw)curr_patch->index == cpool_current_index) {
+                               value = curr_patch->value;
+                               if (prev_patch)
+                                       prev_patch->next = curr_patch->next;
+                               else
+                                       *first_patch = curr_patch->next;
+                               SLJIT_FREE(curr_patch, compiler->allocator_data);
+                               break;
+                       }
+                       prev_patch = curr_patch;
+                       curr_patch = curr_patch->next;
+               }
+       }
+
+       if (value >= 0) {
+               if ((sljit_uw)value > cpool_current_index) {
+                       curr_patch = (struct future_patch*)SLJIT_MALLOC(sizeof(struct future_patch), compiler->allocator_data);
+                       if (!curr_patch) {
+                               while (*first_patch) {
+                                       curr_patch = *first_patch;
+                                       *first_patch = (*first_patch)->next;
+                                       SLJIT_FREE(curr_patch, compiler->allocator_data);
+                               }
+                               return SLJIT_ERR_ALLOC_FAILED;
+                       }
+                       curr_patch->next = *first_patch;
+                       curr_patch->index = value;
+                       curr_patch->value = cpool_start_address[value];
+                       *first_patch = curr_patch;
+               }
+               cpool_start_address[value] = *buf_ptr;
+       }
+       return SLJIT_SUCCESS;
+}
+
+#else
+
+static sljit_si push_inst(struct sljit_compiler *compiler, sljit_uw inst)
+{
+       sljit_uw* ptr;
+
+       ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
+       FAIL_IF(!ptr);
+       compiler->size++;
+       *ptr = inst;
+       return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_si emit_imm(struct sljit_compiler *compiler, sljit_si reg, sljit_sw imm)
+{
+       FAIL_IF(push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff)));
+       return push_inst(compiler, MOVT | RD(reg) | ((imm >> 12) & 0xf0000) | ((imm >> 16) & 0xfff));
+}
+
+#endif
+
+static SLJIT_INLINE sljit_si detect_jump_type(struct sljit_jump *jump, sljit_uw *code_ptr, sljit_uw *code)
+{
+       sljit_sw diff;
+
+       if (jump->flags & SLJIT_REWRITABLE_JUMP)
+               return 0;
+
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+       if (jump->flags & IS_BL)
+               code_ptr--;
+
+       if (jump->flags & JUMP_ADDR)
+               diff = ((sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2));
+       else {
+               SLJIT_ASSERT(jump->flags & JUMP_LABEL);
+               diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)(code_ptr + 2));
+       }
+
+       /* Branch to Thumb code has not been optimized yet. */
+       if (diff & 0x3)
+               return 0;
+
+       if (jump->flags & IS_BL) {
+               if (diff <= 0x01ffffff && diff >= -0x02000000) {
+                       *code_ptr = (BL - CONDITIONAL) | (*(code_ptr + 1) & COND_MASK);
+                       jump->flags |= PATCH_B;
+                       return 1;
+               }
+       }
+       else {
+               if (diff <= 0x01ffffff && diff >= -0x02000000) {
+                       *code_ptr = (B - CONDITIONAL) | (*code_ptr & COND_MASK);
+                       jump->flags |= PATCH_B;
+               }
+       }
+#else
+       if (jump->flags & JUMP_ADDR)
+               diff = ((sljit_sw)jump->u.target - (sljit_sw)code_ptr);
+       else {
+               SLJIT_ASSERT(jump->flags & JUMP_LABEL);
+               diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)code_ptr);
+       }
+
+       /* Branch to Thumb code has not been optimized yet. */
+       if (diff & 0x3)
+               return 0;
+
+       if (diff <= 0x01ffffff && diff >= -0x02000000) {
+               code_ptr -= 2;
+               *code_ptr = ((jump->flags & IS_BL) ? (BL - CONDITIONAL) : (B - CONDITIONAL)) | (code_ptr[2] & COND_MASK);
+               jump->flags |= PATCH_B;
+               return 1;
+       }
+#endif
+       return 0;
+}
+
+static SLJIT_INLINE void inline_set_jump_addr(sljit_uw addr, sljit_uw new_addr, sljit_si flush)
+{
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+       sljit_uw *ptr = (sljit_uw*)addr;
+       sljit_uw *inst = (sljit_uw*)ptr[0];
+       sljit_uw mov_pc = ptr[1];
+       sljit_si bl = (mov_pc & 0x0000f000) != RD(TMP_PC);
+       sljit_sw diff = (sljit_sw)(((sljit_sw)new_addr - (sljit_sw)(inst + 2)) >> 2);
+
+       if (diff <= 0x7fffff && diff >= -0x800000) {
+               /* Turn to branch. */
+               if (!bl) {
+                       inst[0] = (mov_pc & COND_MASK) | (B - CONDITIONAL) | (diff & 0xffffff);
+                       if (flush) {
+                               SLJIT_CACHE_FLUSH(inst, inst + 1);
+                       }
+               } else {
+                       inst[0] = (mov_pc & COND_MASK) | (BL - CONDITIONAL) | (diff & 0xffffff);
+                       inst[1] = NOP;
+                       if (flush) {
+                               SLJIT_CACHE_FLUSH(inst, inst + 2);
+                       }
+               }
+       } else {
+               /* Get the position of the constant. */
+               if (mov_pc & (1 << 23))
+                       ptr = inst + ((mov_pc & 0xfff) >> 2) + 2;
+               else
+                       ptr = inst + 1;
+
+               if (*inst != mov_pc) {
+                       inst[0] = mov_pc;
+                       if (!bl) {
+                               if (flush) {
+                                       SLJIT_CACHE_FLUSH(inst, inst + 1);
+                               }
+                       } else {
+                               inst[1] = BLX | RM(TMP_REG1);
+                               if (flush) {
+                                       SLJIT_CACHE_FLUSH(inst, inst + 2);
+                               }
+                       }
+               }
+               *ptr = new_addr;
+       }
+#else
+       sljit_uw *inst = (sljit_uw*)addr;
+       SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT);
+       inst[0] = MOVW | (inst[0] & 0xf000) | ((new_addr << 4) & 0xf0000) | (new_addr & 0xfff);
+       inst[1] = MOVT | (inst[1] & 0xf000) | ((new_addr >> 12) & 0xf0000) | ((new_addr >> 16) & 0xfff);
+       if (flush) {
+               SLJIT_CACHE_FLUSH(inst, inst + 2);
+       }
+#endif
+}
+
+static sljit_uw get_imm(sljit_uw imm);
+
+static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw new_constant, sljit_si flush)
+{
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+       sljit_uw *ptr = (sljit_uw*)addr;
+       sljit_uw *inst = (sljit_uw*)ptr[0];
+       sljit_uw ldr_literal = ptr[1];
+       sljit_uw src2;
+
+       src2 = get_imm(new_constant);
+       if (src2) {
+               *inst = 0xe3a00000 | (ldr_literal & 0xf000) | src2;
+               if (flush) {
+                       SLJIT_CACHE_FLUSH(inst, inst + 1);
+               }
+               return;
+       }
+
+       src2 = get_imm(~new_constant);
+       if (src2) {
+               *inst = 0xe3e00000 | (ldr_literal & 0xf000) | src2;
+               if (flush) {
+                       SLJIT_CACHE_FLUSH(inst, inst + 1);
+               }
+               return;
+       }
+
+       if (ldr_literal & (1 << 23))
+               ptr = inst + ((ldr_literal & 0xfff) >> 2) + 2;
+       else
+               ptr = inst + 1;
+
+       if (*inst != ldr_literal) {
+               *inst = ldr_literal;
+               if (flush) {
+                       SLJIT_CACHE_FLUSH(inst, inst + 1);
+               }
+       }
+       *ptr = new_constant;
+#else
+       sljit_uw *inst = (sljit_uw*)addr;
+       SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT);
+       inst[0] = MOVW | (inst[0] & 0xf000) | ((new_constant << 4) & 0xf0000) | (new_constant & 0xfff);
+       inst[1] = MOVT | (inst[1] & 0xf000) | ((new_constant >> 12) & 0xf0000) | ((new_constant >> 16) & 0xfff);
+       if (flush) {
+               SLJIT_CACHE_FLUSH(inst, inst + 2);
+       }
+#endif
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
+{
+       struct sljit_memory_fragment *buf;
+       sljit_uw *code;
+       sljit_uw *code_ptr;
+       sljit_uw *buf_ptr;
+       sljit_uw *buf_end;
+       sljit_uw size;
+       sljit_uw word_count;
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+       sljit_uw cpool_size;
+       sljit_uw cpool_skip_alignment;
+       sljit_uw cpool_current_index;
+       sljit_uw *cpool_start_address;
+       sljit_uw *last_pc_patch;
+       struct future_patch *first_patch;
+#endif
+
+       struct sljit_label *label;
+       struct sljit_jump *jump;
+       struct sljit_const *const_;
+
+       CHECK_ERROR_PTR();
+       CHECK_PTR(check_sljit_generate_code(compiler));
+       reverse_buf(compiler);
+
+       /* Second code generation pass. */
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+       size = compiler->size + (compiler->patches << 1);
+       if (compiler->cpool_fill > 0)
+               size += compiler->cpool_fill + CONST_POOL_ALIGNMENT - 1;
+#else
+       size = compiler->size;
+#endif
+       code = (sljit_uw*)SLJIT_MALLOC_EXEC(size * sizeof(sljit_uw));
+       PTR_FAIL_WITH_EXEC_IF(code);
+       buf = compiler->buf;
+
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+       cpool_size = 0;
+       cpool_skip_alignment = 0;
+       cpool_current_index = 0;
+       cpool_start_address = NULL;
+       first_patch = NULL;
+       last_pc_patch = code;
+#endif
+
+       code_ptr = code;
+       word_count = 0;
+
+       label = compiler->labels;
+       jump = compiler->jumps;
+       const_ = compiler->consts;
+
+       if (label && label->size == 0) {
+               label->addr = (sljit_uw)code;
+               label->size = 0;
+               label = label->next;
+       }
+
+       do {
+               buf_ptr = (sljit_uw*)buf->memory;
+               buf_end = buf_ptr + (buf->used_size >> 2);
+               do {
+                       word_count++;
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+                       if (cpool_size > 0) {
+                               if (cpool_skip_alignment > 0) {
+                                       buf_ptr++;
+                                       cpool_skip_alignment--;
+                               }
+                               else {
+                                       if (SLJIT_UNLIKELY(resolve_const_pool_index(compiler, &first_patch, cpool_current_index, cpool_start_address, buf_ptr))) {
+                                               SLJIT_FREE_EXEC(code);
+                                               compiler->error = SLJIT_ERR_ALLOC_FAILED;
+                                               return NULL;
+                                       }
+                                       buf_ptr++;
+                                       if (++cpool_current_index >= cpool_size) {
+                                               SLJIT_ASSERT(!first_patch);
+                                               cpool_size = 0;
+                                               if (label && label->size == word_count) {
+                                                       /* Points after the current instruction. */
+                                                       label->addr = (sljit_uw)code_ptr;
+                                                       label->size = code_ptr - code;
+                                                       label = label->next;
+                                               }
+                                       }
+                               }
+                       }
+                       else if ((*buf_ptr & 0xff000000) != PUSH_POOL) {
+#endif
+                               *code_ptr = *buf_ptr++;
+                               /* These structures are ordered by their address. */
+                               SLJIT_ASSERT(!label || label->size >= word_count);
+                               SLJIT_ASSERT(!jump || jump->addr >= word_count);
+                               SLJIT_ASSERT(!const_ || const_->addr >= word_count);
+                               if (jump && jump->addr == word_count) {
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+                                       if (detect_jump_type(jump, code_ptr, code))
+                                               code_ptr--;
+                                       jump->addr = (sljit_uw)code_ptr;
+#else
+                                       jump->addr = (sljit_uw)(code_ptr - 2);
+                                       if (detect_jump_type(jump, code_ptr, code))
+                                               code_ptr -= 2;
+#endif
+                                       jump = jump->next;
+                               }
+                               if (label && label->size == word_count) {
+                                       /* code_ptr can be affected above. */
+                                       label->addr = (sljit_uw)(code_ptr + 1);
+                                       label->size = (code_ptr + 1) - code;
+                                       label = label->next;
+                               }
+                               if (const_ && const_->addr == word_count) {
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+                                       const_->addr = (sljit_uw)code_ptr;
+#else
+                                       const_->addr = (sljit_uw)(code_ptr - 1);
+#endif
+                                       const_ = const_->next;
+                               }
+                               code_ptr++;
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+                       }
+                       else {
+                               /* Fortunately, no need to shift. */
+                               cpool_size = *buf_ptr++ & ~PUSH_POOL;
+                               SLJIT_ASSERT(cpool_size > 0);
+                               cpool_start_address = ALIGN_INSTRUCTION(code_ptr + 1);
+                               cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, cpool_size);
+                               if (cpool_current_index > 0) {
+                                       /* Unconditional branch. */
+                                       *code_ptr = B | (((cpool_start_address - code_ptr) + cpool_current_index - 2) & ~PUSH_POOL);
+                                       code_ptr = cpool_start_address + cpool_current_index;
+                               }
+                               cpool_skip_alignment = CONST_POOL_ALIGNMENT - 1;
+                               cpool_current_index = 0;
+                               last_pc_patch = code_ptr;
+                       }
+#endif
+               } while (buf_ptr < buf_end);
+               buf = buf->next;
+       } while (buf);
+
+       SLJIT_ASSERT(!label);
+       SLJIT_ASSERT(!jump);
+       SLJIT_ASSERT(!const_);
+
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+       SLJIT_ASSERT(cpool_size == 0);
+       if (compiler->cpool_fill > 0) {
+               cpool_start_address = ALIGN_INSTRUCTION(code_ptr);
+               cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, compiler->cpool_fill);
+               if (cpool_current_index > 0)
+                       code_ptr = cpool_start_address + cpool_current_index;
+
+               buf_ptr = compiler->cpool;
+               buf_end = buf_ptr + compiler->cpool_fill;
+               cpool_current_index = 0;
+               while (buf_ptr < buf_end) {
+                       if (SLJIT_UNLIKELY(resolve_const_pool_index(compiler, &first_patch, cpool_current_index, cpool_start_address, buf_ptr))) {
+                               SLJIT_FREE_EXEC(code);
+                               compiler->error = SLJIT_ERR_ALLOC_FAILED;
+                               return NULL;
+                       }
+                       buf_ptr++;
+                       cpool_current_index++;
+               }
+               SLJIT_ASSERT(!first_patch);
+       }
+#endif
+
+       jump = compiler->jumps;
+       while (jump) {
+               buf_ptr = (sljit_uw*)jump->addr;
+
+               if (jump->flags & PATCH_B) {
+                       if (!(jump->flags & JUMP_ADDR)) {
+                               SLJIT_ASSERT(jump->flags & JUMP_LABEL);
+                               SLJIT_ASSERT(((sljit_sw)jump->u.label->addr - (sljit_sw)(buf_ptr + 2)) <= 0x01ffffff && ((sljit_sw)jump->u.label->addr - (sljit_sw)(buf_ptr + 2)) >= -0x02000000);
+                               *buf_ptr |= (((sljit_sw)jump->u.label->addr - (sljit_sw)(buf_ptr + 2)) >> 2) & 0x00ffffff;
+                       }
+                       else {
+                               SLJIT_ASSERT(((sljit_sw)jump->u.target - (sljit_sw)(buf_ptr + 2)) <= 0x01ffffff && ((sljit_sw)jump->u.target - (sljit_sw)(buf_ptr + 2)) >= -0x02000000);
+                               *buf_ptr |= (((sljit_sw)jump->u.target - (sljit_sw)(buf_ptr + 2)) >> 2) & 0x00ffffff;
+                       }
+               }
+               else if (jump->flags & SLJIT_REWRITABLE_JUMP) {
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+                       jump->addr = (sljit_uw)code_ptr;
+                       code_ptr[0] = (sljit_uw)buf_ptr;
+                       code_ptr[1] = *buf_ptr;
+                       inline_set_jump_addr((sljit_uw)code_ptr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0);
+                       code_ptr += 2;
+#else
+                       inline_set_jump_addr((sljit_uw)buf_ptr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0);
+#endif
+               }
+               else {
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+                       if (jump->flags & IS_BL)
+                               buf_ptr--;
+                       if (*buf_ptr & (1 << 23))
+                               buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2;
+                       else
+                               buf_ptr += 1;
+                       *buf_ptr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
+#else
+                       inline_set_jump_addr((sljit_uw)buf_ptr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0);
+#endif
+               }
+               jump = jump->next;
+       }
+
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+       const_ = compiler->consts;
+       while (const_) {
+               buf_ptr = (sljit_uw*)const_->addr;
+               const_->addr = (sljit_uw)code_ptr;
+
+               code_ptr[0] = (sljit_uw)buf_ptr;
+               code_ptr[1] = *buf_ptr;
+               if (*buf_ptr & (1 << 23))
+                       buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2;
+               else
+                       buf_ptr += 1;
+               /* Set the value again (can be a simple constant). */
+               inline_set_const((sljit_uw)code_ptr, *buf_ptr, 0);
+               code_ptr += 2;
+
+               const_ = const_->next;
+       }
+#endif
+
+       SLJIT_ASSERT(code_ptr - code <= (sljit_si)size);
+
+       compiler->error = SLJIT_ERR_COMPILED;
+       compiler->executable_size = (code_ptr - code) * sizeof(sljit_uw);
+       SLJIT_CACHE_FLUSH(code, code_ptr);
+       return code;
+}
+
+/* --------------------------------------------------------------------- */
+/*  Entry, exit                                                          */
+/* --------------------------------------------------------------------- */
+
+/* emit_op inp_flags.
+   WRITE_BACK must be the first, since it is a flag. */
+#define WRITE_BACK     0x01
+#define ALLOW_IMM      0x02
+#define ALLOW_INV_IMM  0x04
+#define ALLOW_ANY_IMM  (ALLOW_IMM | ALLOW_INV_IMM)
+#define ARG_TEST       0x08
+
+/* Creates an index in data_transfer_insts array. */
+#define WORD_DATA      0x00
+#define BYTE_DATA      0x10
+#define HALF_DATA      0x20
+#define SIGNED_DATA    0x40
+#define LOAD_DATA      0x80
+
+/* Condition: AL. */
+#define EMIT_DATA_PROCESS_INS(opcode, set_flags, dst, src1, src2) \
+       (0xe0000000 | ((opcode) << 21) | (set_flags) | RD(dst) | RN(src1) | (src2))
+
+static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si inp_flags,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w);
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler,
+       sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds,
+       sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
+{
+       sljit_si size, i, tmp;
+       sljit_uw push;
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
+       set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
+
+       /* Push saved registers, temporary registers
+          stmdb sp!, {..., lr} */
+       push = PUSH | (1 << 14);
+
+       tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
+       for (i = SLJIT_S0; i >= tmp; i--)
+               push |= 1 << reg_map[i];
+
+       for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--)
+               push |= 1 << reg_map[i];
+
+       FAIL_IF(push_inst(compiler, push));
+
+       /* Stack must be aligned to 8 bytes: */
+       size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
+       local_size = ((size + local_size + 7) & ~7) - size;
+       compiler->local_size = local_size;
+       if (local_size > 0)
+               FAIL_IF(emit_op(compiler, SLJIT_SUB, ALLOW_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size));
+
+       if (args >= 1)
+               FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_S0, SLJIT_UNUSED, RM(SLJIT_R0))));
+       if (args >= 2)
+               FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_S1, SLJIT_UNUSED, RM(SLJIT_R1))));
+       if (args >= 3)
+               FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_S2, SLJIT_UNUSED, RM(SLJIT_R2))));
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compiler,
+       sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds,
+       sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
+{
+       sljit_si size;
+
+       CHECK_ERROR();
+       CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
+       set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
+
+       size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
+       compiler->local_size = ((size + local_size + 7) & ~7) - size;
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
+{
+       sljit_si i, tmp;
+       sljit_uw pop;
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_return(compiler, op, src, srcw));
+
+       FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
+
+       if (compiler->local_size > 0)
+               FAIL_IF(emit_op(compiler, SLJIT_ADD, ALLOW_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size));
+
+       /* Push saved registers, temporary registers
+          ldmia sp!, {..., pc} */
+       pop = POP | (1 << 15);
+
+       tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
+       for (i = SLJIT_S0; i >= tmp; i--)
+               pop |= 1 << reg_map[i];
+
+       for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--)
+               pop |= 1 << reg_map[i];
+
+       return push_inst(compiler, pop);
+}
+
+/* --------------------------------------------------------------------- */
+/*  Operators                                                            */
+/* --------------------------------------------------------------------- */
+
+/* s/l - store/load (1 bit)
+   u/s - signed/unsigned (1 bit)
+   w/b/h/N - word/byte/half/NOT allowed (2 bit)
+   It contans 16 items, but not all are different. */
+
+static sljit_sw data_transfer_insts[16] = {
+/* s u w */ 0xe5000000 /* str */,
+/* s u b */ 0xe5400000 /* strb */,
+/* s u h */ 0xe10000b0 /* strh */,
+/* s u N */ 0x00000000 /* not allowed */,
+/* s s w */ 0xe5000000 /* str */,
+/* s s b */ 0xe5400000 /* strb */,
+/* s s h */ 0xe10000b0 /* strh */,
+/* s s N */ 0x00000000 /* not allowed */,
+
+/* l u w */ 0xe5100000 /* ldr */,
+/* l u b */ 0xe5500000 /* ldrb */,
+/* l u h */ 0xe11000b0 /* ldrh */,
+/* l u N */ 0x00000000 /* not allowed */,
+/* l s w */ 0xe5100000 /* ldr */,
+/* l s b */ 0xe11000d0 /* ldrsb */,
+/* l s h */ 0xe11000f0 /* ldrsh */,
+/* l s N */ 0x00000000 /* not allowed */,
+};
+
+#define EMIT_DATA_TRANSFER(type, add, wb, target, base1, base2) \
+       (data_transfer_insts[(type) >> 4] | ((add) << 23) | ((wb) << 21) | (reg_map[target] << 12) | (reg_map[base1] << 16) | (base2))
+/* Normal ldr/str instruction.
+   Type2: ldrsb, ldrh, ldrsh */
+#define IS_TYPE1_TRANSFER(type) \
+       (data_transfer_insts[(type) >> 4] & 0x04000000)
+#define TYPE2_TRANSFER_IMM(imm) \
+       (((imm) & 0xf) | (((imm) & 0xf0) << 4) | (1 << 22))
+
+/* flags: */
+  /* Arguments are swapped. */
+#define ARGS_SWAPPED   0x01
+  /* Inverted immediate. */
+#define INV_IMM                0x02
+  /* Source and destination is register. */
+#define REG_DEST       0x04
+#define REG_SOURCE     0x08
+  /* One instruction is enough. */
+#define FAST_DEST      0x10
+  /* Multiple instructions are required. */
+#define SLOW_DEST      0x20
+/* SET_FLAGS must be (1 << 20) as it is also the value of S bit (can be used for optimization). */
+#define SET_FLAGS      (1 << 20)
+/* dst: reg
+   src1: reg
+   src2: reg or imm (if allowed)
+   SRC2_IMM must be (1 << 25) as it is also the value of I bit (can be used for optimization). */
+#define SRC2_IMM       (1 << 25)
+
+#define EMIT_DATA_PROCESS_INS_AND_RETURN(opcode) \
+       return push_inst(compiler, EMIT_DATA_PROCESS_INS(opcode, flags & SET_FLAGS, dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2)))
+
+#define EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(opcode, dst, src1, src2) \
+       return push_inst(compiler, EMIT_DATA_PROCESS_INS(opcode, flags & SET_FLAGS, dst, src1, src2))
+
+#define EMIT_SHIFT_INS_AND_RETURN(opcode) \
+       SLJIT_ASSERT(!(flags & INV_IMM) && !(src2 & SRC2_IMM)); \
+       if (compiler->shift_imm != 0x20) { \
+               SLJIT_ASSERT(src1 == TMP_REG1); \
+               SLJIT_ASSERT(!(flags & ARGS_SWAPPED)); \
+               if (compiler->shift_imm != 0) \
+                       return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, (compiler->shift_imm << 7) | (opcode << 5) | reg_map[src2])); \
+               return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, reg_map[src2])); \
+       } \
+       return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, (reg_map[(flags & ARGS_SWAPPED) ? src1 : src2] << 8) | (opcode << 5) | 0x10 | ((flags & ARGS_SWAPPED) ? reg_map[src2] : reg_map[src1])));
+
+static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags,
+       sljit_si dst, sljit_si src1, sljit_si src2)
+{
+       sljit_sw mul_inst;
+
+       switch (GET_OPCODE(op)) {
+       case SLJIT_MOV:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
+               if (dst != src2) {
+                       if (src2 & SRC2_IMM) {
+                               if (flags & INV_IMM)
+                                       EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2);
+                               EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2);
+                       }
+                       EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, reg_map[src2]);
+               }
+               return SLJIT_SUCCESS;
+
+       case SLJIT_MOV_UB:
+       case SLJIT_MOV_SB:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
+               if ((flags & (REG_DEST | REG_SOURCE)) == (REG_DEST | REG_SOURCE)) {
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+                       if (op == SLJIT_MOV_UB)
+                               return push_inst(compiler, EMIT_DATA_PROCESS_INS(AND_DP, 0, dst, src2, SRC2_IMM | 0xff));
+                       FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | reg_map[src2])));
+                       return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | (op == SLJIT_MOV_UB ? 0x20 : 0x40) | reg_map[dst]));
+#else
+                       return push_inst(compiler, (op == SLJIT_MOV_UB ? UXTB : SXTB) | RD(dst) | RM(src2));
+#endif
+               }
+               else if (dst != src2) {
+                       SLJIT_ASSERT(src2 & SRC2_IMM);
+                       if (flags & INV_IMM)
+                               EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2);
+                       EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2);
+               }
+               return SLJIT_SUCCESS;
+
+       case SLJIT_MOV_UH:
+       case SLJIT_MOV_SH:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
+               if ((flags & (REG_DEST | REG_SOURCE)) == (REG_DEST | REG_SOURCE)) {
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+                       FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | reg_map[src2])));
+                       return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | (op == SLJIT_MOV_UH ? 0x20 : 0x40) | reg_map[dst]));
+#else
+                       return push_inst(compiler, (op == SLJIT_MOV_UH ? UXTH : SXTH) | RD(dst) | RM(src2));
+#endif
+               }
+               else if (dst != src2) {
+                       SLJIT_ASSERT(src2 & SRC2_IMM);
+                       if (flags & INV_IMM)
+                               EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2);
+                       EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2);
+               }
+               return SLJIT_SUCCESS;
+
+       case SLJIT_NOT:
+               if (src2 & SRC2_IMM) {
+                       if (flags & INV_IMM)
+                               EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2);
+                       EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2);
+               }
+               EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, RM(src2));
+
+       case SLJIT_CLZ:
+               SLJIT_ASSERT(!(flags & INV_IMM));
+               SLJIT_ASSERT(!(src2 & SRC2_IMM));
+               FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(src2)));
+               if (flags & SET_FLAGS)
+                       EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(CMP_DP, SLJIT_UNUSED, dst, SRC2_IMM);
+               return SLJIT_SUCCESS;
+
+       case SLJIT_ADD:
+               SLJIT_ASSERT(!(flags & INV_IMM));
+               EMIT_DATA_PROCESS_INS_AND_RETURN(ADD_DP);
+
+       case SLJIT_ADDC:
+               SLJIT_ASSERT(!(flags & INV_IMM));
+               EMIT_DATA_PROCESS_INS_AND_RETURN(ADC_DP);
+
+       case SLJIT_SUB:
+               SLJIT_ASSERT(!(flags & INV_IMM));
+               if (!(flags & ARGS_SWAPPED))
+                       EMIT_DATA_PROCESS_INS_AND_RETURN(SUB_DP);
+               EMIT_DATA_PROCESS_INS_AND_RETURN(RSB_DP);
+
+       case SLJIT_SUBC:
+               SLJIT_ASSERT(!(flags & INV_IMM));
+               if (!(flags & ARGS_SWAPPED))
+                       EMIT_DATA_PROCESS_INS_AND_RETURN(SBC_DP);
+               EMIT_DATA_PROCESS_INS_AND_RETURN(RSC_DP);
+
+       case SLJIT_MUL:
+               SLJIT_ASSERT(!(flags & INV_IMM));
+               SLJIT_ASSERT(!(src2 & SRC2_IMM));
+               if (SLJIT_UNLIKELY(op & SLJIT_SET_O))
+                       mul_inst = SMULL | (reg_map[TMP_REG3] << 16) | (reg_map[dst] << 12);
+               else
+                       mul_inst = MUL | (reg_map[dst] << 16);
+
+               if (dst != src2)
+                       FAIL_IF(push_inst(compiler, mul_inst | (reg_map[src1] << 8) | reg_map[src2]));
+               else if (dst != src1)
+                       FAIL_IF(push_inst(compiler, mul_inst | (reg_map[src2] << 8) | reg_map[src1]));
+               else {
+                       /* Rm and Rd must not be the same register. */
+                       SLJIT_ASSERT(dst != TMP_REG1);
+                       FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, SLJIT_UNUSED, reg_map[src2])));
+                       FAIL_IF(push_inst(compiler, mul_inst | (reg_map[src2] << 8) | reg_map[TMP_REG1]));
+               }
+
+               if (!(op & SLJIT_SET_O))
+                       return SLJIT_SUCCESS;
+
+               /* We need to use TMP_REG3. */
+               compiler->cache_arg = 0;
+               compiler->cache_argw = 0;
+               /* cmp TMP_REG2, dst asr #31. */
+               return push_inst(compiler, EMIT_DATA_PROCESS_INS(CMP_DP, SET_FLAGS, SLJIT_UNUSED, TMP_REG3, RM(dst) | 0xfc0));
+
+       case SLJIT_AND:
+               if (!(flags & INV_IMM))
+                       EMIT_DATA_PROCESS_INS_AND_RETURN(AND_DP);
+               EMIT_DATA_PROCESS_INS_AND_RETURN(BIC_DP);
+
+       case SLJIT_OR:
+               SLJIT_ASSERT(!(flags & INV_IMM));
+               EMIT_DATA_PROCESS_INS_AND_RETURN(ORR_DP);
+
+       case SLJIT_XOR:
+               SLJIT_ASSERT(!(flags & INV_IMM));
+               EMIT_DATA_PROCESS_INS_AND_RETURN(EOR_DP);
+
+       case SLJIT_SHL:
+               EMIT_SHIFT_INS_AND_RETURN(0);
+
+       case SLJIT_LSHR:
+               EMIT_SHIFT_INS_AND_RETURN(1);
+
+       case SLJIT_ASHR:
+               EMIT_SHIFT_INS_AND_RETURN(2);
+       }
+       SLJIT_ASSERT_STOP();
+       return SLJIT_SUCCESS;
+}
+
+#undef EMIT_DATA_PROCESS_INS_AND_RETURN
+#undef EMIT_FULL_DATA_PROCESS_INS_AND_RETURN
+#undef EMIT_SHIFT_INS_AND_RETURN
+
+/* Tests whether the immediate can be stored in the 12 bit imm field.
+   Returns with 0 if not possible. */
+static sljit_uw get_imm(sljit_uw imm)
+{
+       sljit_si rol;
+
+       if (imm <= 0xff)
+               return SRC2_IMM | imm;
+
+       if (!(imm & 0xff000000)) {
+               imm <<= 8;
+               rol = 8;
+       }
+       else {
+               imm = (imm << 24) | (imm >> 8);
+               rol = 0;
+       }
+
+       if (!(imm & 0xff000000)) {
+               imm <<= 8;
+               rol += 4;
+       }
+
+       if (!(imm & 0xf0000000)) {
+               imm <<= 4;
+               rol += 2;
+       }
+
+       if (!(imm & 0xc0000000)) {
+               imm <<= 2;
+               rol += 1;
+       }
+
+       if (!(imm & 0x00ffffff))
+               return SRC2_IMM | (imm >> 24) | (rol << 8);
+       else
+               return 0;
+}
+
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+static sljit_si generate_int(struct sljit_compiler *compiler, sljit_si reg, sljit_uw imm, sljit_si positive)
+{
+       sljit_uw mask;
+       sljit_uw imm1;
+       sljit_uw imm2;
+       sljit_si rol;
+
+       /* Step1: Search a zero byte (8 continous zero bit). */
+       mask = 0xff000000;
+       rol = 8;
+       while(1) {
+               if (!(imm & mask)) {
+                       /* Rol imm by rol. */
+                       imm = (imm << rol) | (imm >> (32 - rol));
+                       /* Calculate arm rol. */
+                       rol = 4 + (rol >> 1);
+                       break;
+               }
+               rol += 2;
+               mask >>= 2;
+               if (mask & 0x3) {
+                       /* rol by 8. */
+                       imm = (imm << 8) | (imm >> 24);
+                       mask = 0xff00;
+                       rol = 24;
+                       while (1) {
+                               if (!(imm & mask)) {
+                                       /* Rol imm by rol. */
+                                       imm = (imm << rol) | (imm >> (32 - rol));
+                                       /* Calculate arm rol. */
+                                       rol = (rol >> 1) - 8;
+                                       break;
+                               }
+                               rol += 2;
+                               mask >>= 2;
+                               if (mask & 0x3)
+                                       return 0;
+                       }
+                       break;
+               }
+       }
+
+       /* The low 8 bit must be zero. */
+       SLJIT_ASSERT(!(imm & 0xff));
+
+       if (!(imm & 0xff000000)) {
+               imm1 = SRC2_IMM | ((imm >> 16) & 0xff) | (((rol + 4) & 0xf) << 8);
+               imm2 = SRC2_IMM | ((imm >> 8) & 0xff) | (((rol + 8) & 0xf) << 8);
+       }
+       else if (imm & 0xc0000000) {
+               imm1 = SRC2_IMM | ((imm >> 24) & 0xff) | ((rol & 0xf) << 8);
+               imm <<= 8;
+               rol += 4;
+
+               if (!(imm & 0xff000000)) {
+                       imm <<= 8;
+                       rol += 4;
+               }
+
+               if (!(imm & 0xf0000000)) {
+                       imm <<= 4;
+                       rol += 2;
+               }
+
+               if (!(imm & 0xc0000000)) {
+                       imm <<= 2;
+                       rol += 1;
+               }
+
+               if (!(imm & 0x00ffffff))
+                       imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8);
+               else
+                       return 0;
+       }
+       else {
+               if (!(imm & 0xf0000000)) {
+                       imm <<= 4;
+                       rol += 2;
+               }
+
+               if (!(imm & 0xc0000000)) {
+                       imm <<= 2;
+                       rol += 1;
+               }
+
+               imm1 = SRC2_IMM | ((imm >> 24) & 0xff) | ((rol & 0xf) << 8);
+               imm <<= 8;
+               rol += 4;
+
+               if (!(imm & 0xf0000000)) {
+                       imm <<= 4;
+                       rol += 2;
+               }
+
+               if (!(imm & 0xc0000000)) {
+                       imm <<= 2;
+                       rol += 1;
+               }
+
+               if (!(imm & 0x00ffffff))
+                       imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8);
+               else
+                       return 0;
+       }
+
+       FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(positive ? MOV_DP : MVN_DP, 0, reg, SLJIT_UNUSED, imm1)));
+       FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(positive ? ORR_DP : BIC_DP, 0, reg, reg, imm2)));
+       return 1;
+}
+#endif
+
+static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si reg, sljit_uw imm)
+{
+       sljit_uw tmp;
+
+#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
+       if (!(imm & ~0xffff))
+               return push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff));
+#endif
+
+       /* Create imm by 1 inst. */
+       tmp = get_imm(imm);
+       if (tmp)
+               return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, tmp));
+
+       tmp = get_imm(~imm);
+       if (tmp)
+               return push_inst(compiler, EMIT_DATA_PROCESS_INS(MVN_DP, 0, reg, SLJIT_UNUSED, tmp));
+
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+       /* Create imm by 2 inst. */
+       FAIL_IF(generate_int(compiler, reg, imm, 1));
+       FAIL_IF(generate_int(compiler, reg, ~imm, 0));
+
+       /* Load integer. */
+       return push_inst_with_literal(compiler, EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, reg, TMP_PC, 0), imm);
+#else
+       return emit_imm(compiler, reg, imm);
+#endif
+}
+
+/* Helper function. Dst should be reg + value, using at most 1 instruction, flags does not set. */
+static sljit_si emit_set_delta(struct sljit_compiler *compiler, sljit_si dst, sljit_si reg, sljit_sw value)
+{
+       if (value >= 0) {
+               value = get_imm(value);
+               if (value)
+                       return push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, dst, reg, value));
+       }
+       else {
+               value = get_imm(-value);
+               if (value)
+                       return push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, dst, reg, value));
+       }
+       return SLJIT_ERR_UNSUPPORTED;
+}
+
+/* Can perform an operation using at most 1 instruction. */
+static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si inp_flags, sljit_si reg, sljit_si arg, sljit_sw argw)
+{
+       sljit_uw imm;
+
+       if (arg & SLJIT_IMM) {
+               imm = get_imm(argw);
+               if (imm) {
+                       if (inp_flags & ARG_TEST)
+                               return 1;
+                       FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, imm)));
+                       return -1;
+               }
+               imm = get_imm(~argw);
+               if (imm) {
+                       if (inp_flags & ARG_TEST)
+                               return 1;
+                       FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MVN_DP, 0, reg, SLJIT_UNUSED, imm)));
+                       return -1;
+               }
+               return 0;
+       }
+
+       SLJIT_ASSERT(arg & SLJIT_MEM);
+
+       /* Fast loads/stores. */
+       if (!(arg & REG_MASK))
+               return 0;
+
+       if (arg & OFFS_REG_MASK) {
+               if ((argw & 0x3) != 0 && !IS_TYPE1_TRANSFER(inp_flags))
+                       return 0;
+
+               if (inp_flags & ARG_TEST)
+                       return 1;
+               FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK,
+                       RM(OFFS_REG(arg)) | (IS_TYPE1_TRANSFER(inp_flags) ? SRC2_IMM : 0) | ((argw & 0x3) << 7))));
+               return -1;
+       }
+
+       if (IS_TYPE1_TRANSFER(inp_flags)) {
+               if (argw >= 0 && argw <= 0xfff) {
+                       if (inp_flags & ARG_TEST)
+                               return 1;
+                       FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, argw)));
+                       return -1;
+               }
+               if (argw < 0 && argw >= -0xfff) {
+                       if (inp_flags & ARG_TEST)
+                               return 1;
+                       FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 0, inp_flags & WRITE_BACK, reg, arg & REG_MASK, -argw)));
+                       return -1;
+               }
+       }
+       else {
+               if (argw >= 0 && argw <= 0xff) {
+                       if (inp_flags & ARG_TEST)
+                               return 1;
+                       FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, TYPE2_TRANSFER_IMM(argw))));
+                       return -1;
+               }
+               if (argw < 0 && argw >= -0xff) {
+                       if (inp_flags & ARG_TEST)
+                               return 1;
+                       argw = -argw;
+                       FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 0, inp_flags & WRITE_BACK, reg, arg & REG_MASK, TYPE2_TRANSFER_IMM(argw))));
+                       return -1;
+               }
+       }
+
+       return 0;
+}
+
+/* See getput_arg below.
+   Note: can_cache is called only for binary operators. Those
+   operators always uses word arguments without write back. */
+static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw)
+{
+       /* Immediate caching is not supported as it would be an operation on constant arguments. */
+       if (arg & SLJIT_IMM)
+               return 0;
+
+       /* Always a simple operation. */
+       if (arg & OFFS_REG_MASK)
+               return 0;
+
+       if (!(arg & REG_MASK)) {
+               /* Immediate access. */
+               if ((next_arg & SLJIT_MEM) && ((sljit_uw)argw - (sljit_uw)next_argw <= 0xfff || (sljit_uw)next_argw - (sljit_uw)argw <= 0xfff))
+                       return 1;
+               return 0;
+       }
+
+       if (argw <= 0xfffff && argw >= -0xfffff)
+               return 0;
+
+       if (argw == next_argw && (next_arg & SLJIT_MEM))
+               return 1;
+
+       if (arg == next_arg && ((sljit_uw)argw - (sljit_uw)next_argw <= 0xfff || (sljit_uw)next_argw - (sljit_uw)argw <= 0xfff))
+               return 1;
+
+       return 0;
+}
+
+#define GETPUT_ARG_DATA_TRANSFER(add, wb, target, base, imm) \
+       if (max_delta & 0xf00) \
+               FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, add, wb, target, base, imm))); \
+       else \
+               FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, add, wb, target, base, TYPE2_TRANSFER_IMM(imm))));
+
+#define TEST_WRITE_BACK() \
+       if (inp_flags & WRITE_BACK) { \
+               tmp_r = arg & REG_MASK; \
+               if (reg == tmp_r) { \
+                       /* This can only happen for stores */ \
+                       /* since ldr reg, [reg, ...]! has no meaning */ \
+                       SLJIT_ASSERT(!(inp_flags & LOAD_DATA)); \
+                       FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(reg)))); \
+                       reg = TMP_REG3; \
+               } \
+       }
+
+/* Emit the necessary instructions. See can_cache above. */
+static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si inp_flags, sljit_si reg, sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw)
+{
+       sljit_si tmp_r;
+       sljit_sw max_delta;
+       sljit_sw sign;
+       sljit_uw imm;
+
+       if (arg & SLJIT_IMM) {
+               SLJIT_ASSERT(inp_flags & LOAD_DATA);
+               return load_immediate(compiler, reg, argw);
+       }
+
+       SLJIT_ASSERT(arg & SLJIT_MEM);
+
+       tmp_r = (inp_flags & LOAD_DATA) ? reg : TMP_REG3;
+       max_delta = IS_TYPE1_TRANSFER(inp_flags) ? 0xfff : 0xff;
+
+       if ((arg & REG_MASK) == SLJIT_UNUSED) {
+               /* Write back is not used. */
+               imm = (sljit_uw)(argw - compiler->cache_argw);
+               if ((compiler->cache_arg & SLJIT_IMM) && (imm <= (sljit_uw)max_delta || imm >= (sljit_uw)-max_delta)) {
+                       if (imm <= (sljit_uw)max_delta) {
+                               sign = 1;
+                               argw = argw - compiler->cache_argw;
+                       }
+                       else {
+                               sign = 0;
+                               argw = compiler->cache_argw - argw;
+                       }
+
+                       GETPUT_ARG_DATA_TRANSFER(sign, 0, reg, TMP_REG3, argw);
+                       return SLJIT_SUCCESS;
+               }
+
+               /* With write back, we can create some sophisticated loads, but
+                  it is hard to decide whether we should convert downward (0s) or upward (1s). */
+               imm = (sljit_uw)(argw - next_argw);
+               if ((next_arg & SLJIT_MEM) && (imm <= (sljit_uw)max_delta || imm >= (sljit_uw)-max_delta)) {
+                       SLJIT_ASSERT(inp_flags & LOAD_DATA);
+
+                       compiler->cache_arg = SLJIT_IMM;
+                       compiler->cache_argw = argw;
+                       tmp_r = TMP_REG3;
+               }
+
+               FAIL_IF(load_immediate(compiler, tmp_r, argw));
+               GETPUT_ARG_DATA_TRANSFER(1, 0, reg, tmp_r, 0);
+               return SLJIT_SUCCESS;
+       }
+
+       if (arg & OFFS_REG_MASK) {
+               SLJIT_ASSERT((argw & 0x3) && !(max_delta & 0xf00));
+               if (inp_flags & WRITE_BACK)
+                       tmp_r = arg & REG_MASK;
+               FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, tmp_r, arg & REG_MASK, RM(OFFS_REG(arg)) | ((argw & 0x3) << 7))));
+               return push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, 0, reg, tmp_r, TYPE2_TRANSFER_IMM(0)));
+       }
+
+       imm = (sljit_uw)(argw - compiler->cache_argw);
+       if (compiler->cache_arg == arg && imm <= (sljit_uw)max_delta) {
+               SLJIT_ASSERT(!(inp_flags & WRITE_BACK));
+               GETPUT_ARG_DATA_TRANSFER(1, 0, reg, TMP_REG3, imm);
+               return SLJIT_SUCCESS;
+       }
+       if (compiler->cache_arg == arg && imm >= (sljit_uw)-max_delta) {
+               SLJIT_ASSERT(!(inp_flags & WRITE_BACK));
+               imm = (sljit_uw)-(sljit_sw)imm;
+               GETPUT_ARG_DATA_TRANSFER(0, 0, reg, TMP_REG3, imm);
+               return SLJIT_SUCCESS;
+       }
+
+       imm = get_imm(argw & ~max_delta);
+       if (imm) {
+               TEST_WRITE_BACK();
+               FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, tmp_r, arg & REG_MASK, imm)));
+               GETPUT_ARG_DATA_TRANSFER(1, inp_flags & WRITE_BACK, reg, tmp_r, argw & max_delta);
+               return SLJIT_SUCCESS;
+       }
+
+       imm = get_imm(-argw & ~max_delta);
+       if (imm) {
+               argw = -argw;
+               TEST_WRITE_BACK();
+               FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, tmp_r, arg & REG_MASK, imm)));
+               GETPUT_ARG_DATA_TRANSFER(0, inp_flags & WRITE_BACK, reg, tmp_r, argw & max_delta);
+               return SLJIT_SUCCESS;
+       }
+
+       if ((compiler->cache_arg & SLJIT_IMM) && compiler->cache_argw == argw) {
+               TEST_WRITE_BACK();
+               return push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, RM(TMP_REG3) | (max_delta & 0xf00 ? SRC2_IMM : 0)));
+       }
+
+       if (argw == next_argw && (next_arg & SLJIT_MEM)) {
+               SLJIT_ASSERT(inp_flags & LOAD_DATA);
+               FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
+
+               compiler->cache_arg = SLJIT_IMM;
+               compiler->cache_argw = argw;
+
+               TEST_WRITE_BACK();
+               return push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, RM(TMP_REG3) | (max_delta & 0xf00 ? SRC2_IMM : 0)));
+       }
+
+       imm = (sljit_uw)(argw - next_argw);
+       if (arg == next_arg && !(inp_flags & WRITE_BACK) && (imm <= (sljit_uw)max_delta || imm >= (sljit_uw)-max_delta)) {
+               SLJIT_ASSERT(inp_flags & LOAD_DATA);
+               FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
+               FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG3, TMP_REG3, reg_map[arg & REG_MASK])));
+
+               compiler->cache_arg = arg;
+               compiler->cache_argw = argw;
+
+               GETPUT_ARG_DATA_TRANSFER(1, 0, reg, TMP_REG3, 0);
+               return SLJIT_SUCCESS;
+       }
+
+       if ((arg & REG_MASK) == tmp_r) {
+               compiler->cache_arg = SLJIT_IMM;
+               compiler->cache_argw = argw;
+               tmp_r = TMP_REG3;
+       }
+
+       FAIL_IF(load_immediate(compiler, tmp_r, argw));
+       return push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, reg_map[tmp_r] | (max_delta & 0xf00 ? SRC2_IMM : 0)));
+}
+
+static SLJIT_INLINE sljit_si emit_op_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw)
+{
+       if (getput_arg_fast(compiler, flags, reg, arg, argw))
+               return compiler->error;
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+       return getput_arg(compiler, flags, reg, arg, argw, 0, 0);
+}
+
+static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg1, sljit_sw arg1w, sljit_si arg2, sljit_sw arg2w)
+{
+       if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
+               return compiler->error;
+       return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
+}
+
+static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si inp_flags,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       /* arg1 goes to TMP_REG1 or src reg
+          arg2 goes to TMP_REG2, imm or src reg
+          TMP_REG3 can be used for caching
+          result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */
+
+       /* We prefers register and simple consts. */
+       sljit_si dst_r;
+       sljit_si src1_r;
+       sljit_si src2_r = 0;
+       sljit_si sugg_src2_r = TMP_REG2;
+       sljit_si flags = GET_FLAGS(op) ? SET_FLAGS : 0;
+
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+
+       /* Destination check. */
+       if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
+               if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI && !(src2 & SLJIT_MEM))
+                       return SLJIT_SUCCESS;
+               dst_r = TMP_REG2;
+       }
+       else if (FAST_IS_REG(dst)) {
+               dst_r = dst;
+               flags |= REG_DEST;
+               if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI)
+                       sugg_src2_r = dst_r;
+       }
+       else {
+               SLJIT_ASSERT(dst & SLJIT_MEM);
+               if (getput_arg_fast(compiler, inp_flags | ARG_TEST, TMP_REG2, dst, dstw)) {
+                       flags |= FAST_DEST;
+                       dst_r = TMP_REG2;
+               }
+               else {
+                       flags |= SLOW_DEST;
+                       dst_r = 0;
+               }
+       }
+
+       /* Source 1. */
+       if (FAST_IS_REG(src1))
+               src1_r = src1;
+       else if (FAST_IS_REG(src2)) {
+               flags |= ARGS_SWAPPED;
+               src1_r = src2;
+               src2 = src1;
+               src2w = src1w;
+       }
+       else do { /* do { } while(0) is used because of breaks. */
+               src1_r = 0;
+               if ((inp_flags & ALLOW_ANY_IMM) && (src1 & SLJIT_IMM)) {
+                       /* The second check will generate a hit. */
+                       src2_r = get_imm(src1w);
+                       if (src2_r) {
+                               flags |= ARGS_SWAPPED;
+                               src1 = src2;
+                               src1w = src2w;
+                               break;
+                       }
+                       if (inp_flags & ALLOW_INV_IMM) {
+                               src2_r = get_imm(~src1w);
+                               if (src2_r) {
+                                       flags |= ARGS_SWAPPED | INV_IMM;
+                                       src1 = src2;
+                                       src1w = src2w;
+                                       break;
+                               }
+                       }
+                       if (GET_OPCODE(op) == SLJIT_ADD) {
+                               src2_r = get_imm(-src1w);
+                               if (src2_r) {
+                                       /* Note: ARGS_SWAPPED is intentionally not applied! */
+                                       src1 = src2;
+                                       src1w = src2w;
+                                       op = SLJIT_SUB | GET_ALL_FLAGS(op);
+                                       break;
+                               }
+                       }
+               }
+
+               if (getput_arg_fast(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w)) {
+                       FAIL_IF(compiler->error);
+                       src1_r = TMP_REG1;
+               }
+       } while (0);
+
+       /* Source 2. */
+       if (src2_r == 0) {
+               if (FAST_IS_REG(src2)) {
+                       src2_r = src2;
+                       flags |= REG_SOURCE;
+                       if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_SI)
+                               dst_r = src2_r;
+               }
+               else do { /* do { } while(0) is used because of breaks. */
+                       if ((inp_flags & ALLOW_ANY_IMM) && (src2 & SLJIT_IMM)) {
+                               src2_r = get_imm(src2w);
+                               if (src2_r)
+                                       break;
+                               if (inp_flags & ALLOW_INV_IMM) {
+                                       src2_r = get_imm(~src2w);
+                                       if (src2_r) {
+                                               flags |= INV_IMM;
+                                               break;
+                                       }
+                               }
+                               if (GET_OPCODE(op) == SLJIT_ADD) {
+                                       src2_r = get_imm(-src2w);
+                                       if (src2_r) {
+                                               op = SLJIT_SUB | GET_ALL_FLAGS(op);
+                                               flags &= ~ARGS_SWAPPED;
+                                               break;
+                                       }
+                               }
+                               if (GET_OPCODE(op) == SLJIT_SUB && !(flags & ARGS_SWAPPED)) {
+                                       src2_r = get_imm(-src2w);
+                                       if (src2_r) {
+                                               op = SLJIT_ADD | GET_ALL_FLAGS(op);
+                                               flags &= ~ARGS_SWAPPED;
+                                               break;
+                                       }
+                               }
+                       }
+
+                       /* src2_r is 0. */
+                       if (getput_arg_fast(compiler, inp_flags | LOAD_DATA, sugg_src2_r, src2, src2w)) {
+                               FAIL_IF(compiler->error);
+                               src2_r = sugg_src2_r;
+                       }
+               } while (0);
+       }
+
+       /* src1_r, src2_r and dst_r can be zero (=unprocessed) or non-zero.
+          If they are zero, they must not be registers. */
+       if (src1_r == 0 && src2_r == 0 && dst_r == 0) {
+               if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
+                       SLJIT_ASSERT(!(flags & ARGS_SWAPPED));
+                       flags |= ARGS_SWAPPED;
+                       FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src2, src2w, src1, src1w));
+                       FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG2, src1, src1w, dst, dstw));
+               }
+               else {
+                       FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
+                       FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw));
+               }
+               src1_r = TMP_REG1;
+               src2_r = TMP_REG2;
+       }
+       else if (src1_r == 0 && src2_r == 0) {
+               FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
+               src1_r = TMP_REG1;
+       }
+       else if (src1_r == 0 && dst_r == 0) {
+               FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
+               src1_r = TMP_REG1;
+       }
+       else if (src2_r == 0 && dst_r == 0) {
+               FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw));
+               src2_r = sugg_src2_r;
+       }
+
+       if (dst_r == 0)
+               dst_r = TMP_REG2;
+
+       if (src1_r == 0) {
+               FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, 0, 0));
+               src1_r = TMP_REG1;
+       }
+
+       if (src2_r == 0) {
+               FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, sugg_src2_r, src2, src2w, 0, 0));
+               src2_r = sugg_src2_r;
+       }
+
+       FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));
+
+       if (flags & (FAST_DEST | SLOW_DEST)) {
+               if (flags & FAST_DEST)
+                       FAIL_IF(getput_arg_fast(compiler, inp_flags, dst_r, dst, dstw));
+               else
+                       FAIL_IF(getput_arg(compiler, inp_flags, dst_r, dst, dstw, 0, 0));
+       }
+       return SLJIT_SUCCESS;
+}
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined(__GNUC__)
+extern unsigned int __aeabi_uidivmod(unsigned int numerator, unsigned int denominator);
+extern int __aeabi_idivmod(int numerator, int denominator);
+#else
+#error "Software divmod functions are needed"
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op)
+{
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_op0(compiler, op));
+
+       op = GET_OPCODE(op);
+       switch (op) {
+       case SLJIT_BREAKPOINT:
+               FAIL_IF(push_inst(compiler, BKPT));
+               break;
+       case SLJIT_NOP:
+               FAIL_IF(push_inst(compiler, NOP));
+               break;
+       case SLJIT_LUMUL:
+       case SLJIT_LSMUL:
+#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
+               return push_inst(compiler, (op == SLJIT_LUMUL ? UMULL : SMULL)
+                       | (reg_map[SLJIT_R1] << 16)
+                       | (reg_map[SLJIT_R0] << 12)
+                       | (reg_map[SLJIT_R0] << 8)
+                       | reg_map[SLJIT_R1]);
+#else
+               FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, SLJIT_UNUSED, RM(SLJIT_R1))));
+               return push_inst(compiler, (op == SLJIT_LUMUL ? UMULL : SMULL)
+                       | (reg_map[SLJIT_R1] << 16)
+                       | (reg_map[SLJIT_R0] << 12)
+                       | (reg_map[SLJIT_R0] << 8)
+                       | reg_map[TMP_REG1]);
+#endif
+       case SLJIT_LUDIV:
+       case SLJIT_LSDIV:
+               if (compiler->scratches >= 3)
+                       FAIL_IF(push_inst(compiler, 0xe52d2008 /* str r2, [sp, #-8]! */));
+#if defined(__GNUC__)
+               FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
+                       (op == SLJIT_LUDIV ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod))));
+#else
+#error "Software divmod functions are needed"
+#endif
+               if (compiler->scratches >= 3)
+                       return push_inst(compiler, 0xe49d2008 /* ldr r2, [sp], #8 */);
+               return SLJIT_SUCCESS;
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       switch (GET_OPCODE(op)) {
+       case SLJIT_MOV:
+       case SLJIT_MOV_UI:
+       case SLJIT_MOV_SI:
+       case SLJIT_MOV_P:
+               return emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw);
+
+       case SLJIT_MOV_UB:
+               return emit_op(compiler, SLJIT_MOV_UB, ALLOW_ANY_IMM | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ub)srcw : srcw);
+
+       case SLJIT_MOV_SB:
+               return emit_op(compiler, SLJIT_MOV_SB, ALLOW_ANY_IMM | SIGNED_DATA | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sb)srcw : srcw);
+
+       case SLJIT_MOV_UH:
+               return emit_op(compiler, SLJIT_MOV_UH, ALLOW_ANY_IMM | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_uh)srcw : srcw);
+
+       case SLJIT_MOV_SH:
+               return emit_op(compiler, SLJIT_MOV_SH, ALLOW_ANY_IMM | SIGNED_DATA | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sh)srcw : srcw);
+
+       case SLJIT_MOVU:
+       case SLJIT_MOVU_UI:
+       case SLJIT_MOVU_SI:
+       case SLJIT_MOVU_P:
+               return emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
+
+       case SLJIT_MOVU_UB:
+               return emit_op(compiler, SLJIT_MOV_UB, ALLOW_ANY_IMM | BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ub)srcw : srcw);
+
+       case SLJIT_MOVU_SB:
+               return emit_op(compiler, SLJIT_MOV_SB, ALLOW_ANY_IMM | SIGNED_DATA | BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sb)srcw : srcw);
+
+       case SLJIT_MOVU_UH:
+               return emit_op(compiler, SLJIT_MOV_UH, ALLOW_ANY_IMM | HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_uh)srcw : srcw);
+
+       case SLJIT_MOVU_SH:
+               return emit_op(compiler, SLJIT_MOV_SH, ALLOW_ANY_IMM | SIGNED_DATA | HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sh)srcw : srcw);
+
+       case SLJIT_NOT:
+               return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw);
+
+       case SLJIT_NEG:
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+                       || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+               compiler->skip_checks = 1;
+#endif
+               return sljit_emit_op2(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), dst, dstw, SLJIT_IMM, 0, src, srcw);
+
+       case SLJIT_CLZ:
+               return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw);
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+       ADJUST_LOCAL_OFFSET(src1, src1w);
+       ADJUST_LOCAL_OFFSET(src2, src2w);
+
+       switch (GET_OPCODE(op)) {
+       case SLJIT_ADD:
+       case SLJIT_ADDC:
+       case SLJIT_SUB:
+       case SLJIT_SUBC:
+       case SLJIT_OR:
+       case SLJIT_XOR:
+               return emit_op(compiler, op, ALLOW_IMM, dst, dstw, src1, src1w, src2, src2w);
+
+       case SLJIT_MUL:
+               return emit_op(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w);
+
+       case SLJIT_AND:
+               return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, src1, src1w, src2, src2w);
+
+       case SLJIT_SHL:
+       case SLJIT_LSHR:
+       case SLJIT_ASHR:
+               if (src2 & SLJIT_IMM) {
+                       compiler->shift_imm = src2w & 0x1f;
+                       return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src1, src1w);
+               }
+               else {
+                       compiler->shift_imm = 0x20;
+                       return emit_op(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w);
+               }
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
+{
+       CHECK_REG_INDEX(check_sljit_get_register_index(reg));
+       return reg_map[reg];
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg)
+{
+       CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
+       return reg << 1;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
+       void *instruction, sljit_si size)
+{
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
+
+       return push_inst(compiler, *(sljit_uw*)instruction);
+}
+
+/* --------------------------------------------------------------------- */
+/*  Floating point operators                                             */
+/* --------------------------------------------------------------------- */
+
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+
+/* 0 - no fpu
+   1 - vfp */
+static sljit_si arm_fpu_type = -1;
+
+static void init_compiler(void)
+{
+       if (arm_fpu_type != -1)
+               return;
+
+       /* TODO: Only the OS can help to determine the correct fpu type. */
+       arm_fpu_type = 1;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
+{
+#ifdef SLJIT_IS_FPU_AVAILABLE
+       return SLJIT_IS_FPU_AVAILABLE;
+#else
+       if (arm_fpu_type == -1)
+               init_compiler();
+       return arm_fpu_type;
+#endif
+}
+
+#else
+
+#define arm_fpu_type 1
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
+{
+       /* Always available. */
+       return 1;
+}
+
+#endif
+
+#define FPU_LOAD (1 << 20)
+#define EMIT_FPU_DATA_TRANSFER(inst, add, base, freg, offs) \
+       ((inst) | ((add) << 23) | (reg_map[base] << 16) | (freg << 12) | (offs))
+#define EMIT_FPU_OPERATION(opcode, mode, dst, src1, src2) \
+       ((opcode) | (mode) | ((dst) << 12) | (src1) | ((src2) << 16))
+
+static sljit_si emit_fop_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw)
+{
+       sljit_sw tmp;
+       sljit_uw imm;
+       sljit_sw inst = VSTR_F32 | (flags & (SLJIT_SINGLE_OP | FPU_LOAD));
+       SLJIT_ASSERT(arg & SLJIT_MEM);
+
+       if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
+               FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG1, arg & REG_MASK, RM(OFFS_REG(arg)) | ((argw & 0x3) << 7))));
+               arg = SLJIT_MEM | TMP_REG1;
+               argw = 0;
+       }
+
+       /* Fast loads and stores. */
+       if ((arg & REG_MASK)) {
+               if (!(argw & ~0x3fc))
+                       return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, arg & REG_MASK, reg, argw >> 2));
+               if (!(-argw & ~0x3fc))
+                       return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, arg & REG_MASK, reg, (-argw) >> 2));
+       }
+
+       if (compiler->cache_arg == arg) {
+               tmp = argw - compiler->cache_argw;
+               if (!(tmp & ~0x3fc))
+                       return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG3, reg, tmp >> 2));
+               if (!(-tmp & ~0x3fc))
+                       return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, TMP_REG3, reg, -tmp >> 2));
+               if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, tmp) != SLJIT_ERR_UNSUPPORTED) {
+                       FAIL_IF(compiler->error);
+                       compiler->cache_argw = argw;
+                       return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG3, reg, 0));
+               }
+       }
+
+       if (arg & REG_MASK) {
+               if (emit_set_delta(compiler, TMP_REG1, arg & REG_MASK, argw) != SLJIT_ERR_UNSUPPORTED) {
+                       FAIL_IF(compiler->error);
+                       return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG1, reg, 0));
+               }
+               imm = get_imm(argw & ~0x3fc);
+               if (imm) {
+                       FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG1, arg & REG_MASK, imm)));
+                       return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG1, reg, (argw & 0x3fc) >> 2));
+               }
+               imm = get_imm(-argw & ~0x3fc);
+               if (imm) {
+                       argw = -argw;
+                       FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, TMP_REG1, arg & REG_MASK, imm)));
+                       return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, TMP_REG1, reg, (argw & 0x3fc) >> 2));
+               }
+       }
+
+       compiler->cache_arg = arg;
+       compiler->cache_argw = argw;
+       if (arg & REG_MASK) {
+               FAIL_IF(load_immediate(compiler, TMP_REG1, argw));
+               FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG3, arg & REG_MASK, reg_map[TMP_REG1])));
+       }
+       else
+               FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
+
+       return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG3, reg, 0));
+}
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       if (src & SLJIT_MEM) {
+               FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, src, srcw));
+               src = TMP_FREG1;
+       }
+
+       FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_S32_F32, op & SLJIT_SINGLE_OP, TMP_FREG1, src, 0)));
+
+       if (dst == SLJIT_UNUSED)
+               return SLJIT_SUCCESS;
+
+       if (FAST_IS_REG(dst))
+               return push_inst(compiler, VMOV | (1 << 20) | RD(dst) | (TMP_FREG1 << 16));
+
+       /* Store the integer value from a VFP register. */
+       return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw);
+}
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
+
+       if (FAST_IS_REG(src))
+               FAIL_IF(push_inst(compiler, VMOV | RD(src) | (TMP_FREG1 << 16)));
+       else if (src & SLJIT_MEM) {
+               /* Load the integer value into a VFP register. */
+               FAIL_IF(emit_fop_mem(compiler, FPU_LOAD, TMP_FREG1, src, srcw));
+       }
+       else {
+               FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
+               FAIL_IF(push_inst(compiler, VMOV | RD(TMP_REG1) | (TMP_FREG1 << 16)));
+       }
+
+       FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F32_S32, op & SLJIT_SINGLE_OP, dst_r, TMP_FREG1, 0)));
+
+       if (dst & SLJIT_MEM)
+               return emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), TMP_FREG1, dst, dstw);
+       return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       if (src1 & SLJIT_MEM) {
+               FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, src1, src1w));
+               src1 = TMP_FREG1;
+       }
+
+       if (src2 & SLJIT_MEM) {
+               FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG2, src2, src2w));
+               src2 = TMP_FREG2;
+       }
+
+       FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCMP_F32, op & SLJIT_SINGLE_OP, src1, src2, 0)));
+       return push_inst(compiler, VMRS);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       sljit_si dst_r;
+
+       CHECK_ERROR();
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+       if (GET_OPCODE(op) != SLJIT_CONVD_FROMS)
+               op ^= SLJIT_SINGLE_OP;
+
+       SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100), float_transfer_bit_error);
+       SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
+
+       dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
+
+       if (src & SLJIT_MEM) {
+               FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, dst_r, src, srcw));
+               src = dst_r;
+       }
+
+       switch (GET_OPCODE(op)) {
+       case SLJIT_DMOV:
+               if (src != dst_r) {
+                       if (dst_r != TMP_FREG1)
+                               FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, op & SLJIT_SINGLE_OP, dst_r, src, 0)));
+                       else
+                               dst_r = src;
+               }
+               break;
+       case SLJIT_DNEG:
+               FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VNEG_F32, op & SLJIT_SINGLE_OP, dst_r, src, 0)));
+               break;
+       case SLJIT_DABS:
+               FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VABS_F32, op & SLJIT_SINGLE_OP, dst_r, src, 0)));
+               break;
+       case SLJIT_CONVD_FROMS:
+               FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F64_F32, op & SLJIT_SINGLE_OP, dst_r, src, 0)));
+               op ^= SLJIT_SINGLE_OP;
+               break;
+       }
+
+       if (dst & SLJIT_MEM)
+               return emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), dst_r, dst, dstw);
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       sljit_si dst_r;
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+       ADJUST_LOCAL_OFFSET(src1, src1w);
+       ADJUST_LOCAL_OFFSET(src2, src2w);
+
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+       op ^= SLJIT_SINGLE_OP;
+
+       dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
+
+       if (src2 & SLJIT_MEM) {
+               FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG2, src2, src2w));
+               src2 = TMP_FREG2;
+       }
+
+       if (src1 & SLJIT_MEM) {
+               FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, src1, src1w));
+               src1 = TMP_FREG1;
+       }
+
+       switch (GET_OPCODE(op)) {
+       case SLJIT_DADD:
+               FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VADD_F32, op & SLJIT_SINGLE_OP, dst_r, src2, src1)));
+               break;
+
+       case SLJIT_DSUB:
+               FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VSUB_F32, op & SLJIT_SINGLE_OP, dst_r, src2, src1)));
+               break;
+
+       case SLJIT_DMUL:
+               FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMUL_F32, op & SLJIT_SINGLE_OP, dst_r, src2, src1)));
+               break;
+
+       case SLJIT_DDIV:
+               FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VDIV_F32, op & SLJIT_SINGLE_OP, dst_r, src2, src1)));
+               break;
+       }
+
+       if (dst_r == TMP_FREG1)
+               FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), TMP_FREG1, dst, dstw));
+
+       return SLJIT_SUCCESS;
+}
+
+#undef FPU_LOAD
+#undef EMIT_FPU_DATA_TRANSFER
+#undef EMIT_FPU_OPERATION
+
+/* --------------------------------------------------------------------- */
+/*  Other instructions                                                   */
+/* --------------------------------------------------------------------- */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw)
+{
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+
+       /* For UNUSED dst. Uncommon, but possible. */
+       if (dst == SLJIT_UNUSED)
+               return SLJIT_SUCCESS;
+
+       if (FAST_IS_REG(dst))
+               return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, RM(TMP_REG3)));
+
+       /* Memory. */
+       if (getput_arg_fast(compiler, WORD_DATA, TMP_REG3, dst, dstw))
+               return compiler->error;
+       /* TMP_REG3 is used for caching. */
+       FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG2, SLJIT_UNUSED, RM(TMP_REG3))));
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+       return getput_arg(compiler, WORD_DATA, TMP_REG2, dst, dstw, 0, 0);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw)
+{
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       if (FAST_IS_REG(src))
+               FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(src))));
+       else if (src & SLJIT_MEM) {
+               if (getput_arg_fast(compiler, WORD_DATA | LOAD_DATA, TMP_REG3, src, srcw))
+                       FAIL_IF(compiler->error);
+               else {
+                       compiler->cache_arg = 0;
+                       compiler->cache_argw = 0;
+                       FAIL_IF(getput_arg(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src, srcw, 0, 0));
+                       FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(TMP_REG2))));
+               }
+       }
+       else if (src & SLJIT_IMM)
+               FAIL_IF(load_immediate(compiler, TMP_REG3, srcw));
+       return push_inst(compiler, BLX | RM(TMP_REG3));
+}
+
+/* --------------------------------------------------------------------- */
+/*  Conditional instructions                                             */
+/* --------------------------------------------------------------------- */
+
+static sljit_uw get_cc(sljit_si type)
+{
+       switch (type) {
+       case SLJIT_EQUAL:
+       case SLJIT_MUL_NOT_OVERFLOW:
+       case SLJIT_D_EQUAL:
+               return 0x00000000;
+
+       case SLJIT_NOT_EQUAL:
+       case SLJIT_MUL_OVERFLOW:
+       case SLJIT_D_NOT_EQUAL:
+               return 0x10000000;
+
+       case SLJIT_LESS:
+       case SLJIT_D_LESS:
+               return 0x30000000;
+
+       case SLJIT_GREATER_EQUAL:
+       case SLJIT_D_GREATER_EQUAL:
+               return 0x20000000;
+
+       case SLJIT_GREATER:
+       case SLJIT_D_GREATER:
+               return 0x80000000;
+
+       case SLJIT_LESS_EQUAL:
+       case SLJIT_D_LESS_EQUAL:
+               return 0x90000000;
+
+       case SLJIT_SIG_LESS:
+               return 0xb0000000;
+
+       case SLJIT_SIG_GREATER_EQUAL:
+               return 0xa0000000;
+
+       case SLJIT_SIG_GREATER:
+               return 0xc0000000;
+
+       case SLJIT_SIG_LESS_EQUAL:
+               return 0xd0000000;
+
+       case SLJIT_OVERFLOW:
+       case SLJIT_D_UNORDERED:
+               return 0x60000000;
+
+       case SLJIT_NOT_OVERFLOW:
+       case SLJIT_D_ORDERED:
+               return 0x70000000;
+
+       default:
+               SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL3);
+               return 0xe0000000;
+       }
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
+{
+       struct sljit_label *label;
+
+       CHECK_ERROR_PTR();
+       CHECK_PTR(check_sljit_emit_label(compiler));
+
+       if (compiler->last_label && compiler->last_label->size == compiler->size)
+               return compiler->last_label;
+
+       label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
+       PTR_FAIL_IF(!label);
+       set_label(label, compiler);
+       return label;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type)
+{
+       struct sljit_jump *jump;
+
+       CHECK_ERROR_PTR();
+       CHECK_PTR(check_sljit_emit_jump(compiler, type));
+
+       jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
+       PTR_FAIL_IF(!jump);
+       set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
+       type &= 0xff;
+
+       /* In ARM, we don't need to touch the arguments. */
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+       if (type >= SLJIT_FAST_CALL)
+               PTR_FAIL_IF(prepare_blx(compiler));
+       PTR_FAIL_IF(push_inst_with_unique_literal(compiler, ((EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0,
+               type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0)) & ~COND_MASK) | get_cc(type), 0));
+
+       if (jump->flags & SLJIT_REWRITABLE_JUMP) {
+               jump->addr = compiler->size;
+               compiler->patches++;
+       }
+
+       if (type >= SLJIT_FAST_CALL) {
+               jump->flags |= IS_BL;
+               PTR_FAIL_IF(emit_blx(compiler));
+       }
+
+       if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
+               jump->addr = compiler->size;
+#else
+       if (type >= SLJIT_FAST_CALL)
+               jump->flags |= IS_BL;
+       PTR_FAIL_IF(emit_imm(compiler, TMP_REG1, 0));
+       PTR_FAIL_IF(push_inst(compiler, (((type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)) & ~COND_MASK) | get_cc(type)));
+       jump->addr = compiler->size;
+#endif
+       return jump;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw)
+{
+       struct sljit_jump *jump;
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       /* In ARM, we don't need to touch the arguments. */
+       if (!(src & SLJIT_IMM)) {
+               if (FAST_IS_REG(src))
+                       return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(src));
+
+               SLJIT_ASSERT(src & SLJIT_MEM);
+               FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src, srcw));
+               return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG2));
+       }
+
+       jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
+       FAIL_IF(!jump);
+       set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0));
+       jump->u.target = srcw;
+
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+       if (type >= SLJIT_FAST_CALL)
+               FAIL_IF(prepare_blx(compiler));
+       FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0), 0));
+       if (type >= SLJIT_FAST_CALL)
+               FAIL_IF(emit_blx(compiler));
+#else
+       FAIL_IF(emit_imm(compiler, TMP_REG1, 0));
+       FAIL_IF(push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)));
+#endif
+       jump->addr = compiler->size;
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw,
+       sljit_si type)
+{
+       sljit_si dst_r, flags = GET_ALL_FLAGS(op);
+       sljit_uw cc, ins;
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       if (dst == SLJIT_UNUSED)
+               return SLJIT_SUCCESS;
+
+       op = GET_OPCODE(op);
+       cc = get_cc(type & 0xff);
+       dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
+
+       if (op < SLJIT_ADD) {
+               FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_r, SLJIT_UNUSED, SRC2_IMM | 0)));
+               FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_r, SLJIT_UNUSED, SRC2_IMM | 1) & ~COND_MASK) | cc));
+               return (dst_r == TMP_REG2) ? emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw) : SLJIT_SUCCESS;
+       }
+
+       ins = (op == SLJIT_AND ? AND_DP : (op == SLJIT_OR ? ORR_DP : EOR_DP));
+       if ((op == SLJIT_OR || op == SLJIT_XOR) && FAST_IS_REG(dst) && dst == src) {
+               FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst, dst, SRC2_IMM | 1) & ~COND_MASK) | cc));
+               /* The condition must always be set, even if the ORR/EOR is not executed above. */
+               return (flags & SLJIT_SET_E) ? push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, SET_FLAGS, TMP_REG1, SLJIT_UNUSED, RM(dst))) : SLJIT_SUCCESS;
+       }
+
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+       if (src & SLJIT_MEM) {
+               FAIL_IF(emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
+               src = TMP_REG1;
+               srcw = 0;
+       } else if (src & SLJIT_IMM) {
+               FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
+               src = TMP_REG1;
+               srcw = 0;
+       }
+
+       FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst_r, src, SRC2_IMM | 1) & ~COND_MASK) | cc));
+       FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst_r, src, SRC2_IMM | 0) & ~COND_MASK) | (cc ^ 0x10000000)));
+       if (dst_r == TMP_REG2)
+               FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG2, dst, dstw, 0, 0));
+
+       return (flags & SLJIT_SET_E) ? push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, SET_FLAGS, TMP_REG1, SLJIT_UNUSED, RM(dst_r))) : SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value)
+{
+       struct sljit_const *const_;
+       sljit_si reg;
+
+       CHECK_ERROR_PTR();
+       CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+
+       const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
+       PTR_FAIL_IF(!const_);
+
+       reg = SLOW_IS_REG(dst) ? dst : TMP_REG2;
+
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+       PTR_FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, reg, TMP_PC, 0), init_value));
+       compiler->patches++;
+#else
+       PTR_FAIL_IF(emit_imm(compiler, reg, init_value));
+#endif
+       set_const(const_, compiler);
+
+       if (dst & SLJIT_MEM)
+               PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw));
+       return const_;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
+{
+       inline_set_jump_addr(addr, new_addr, 1);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
+{
+       inline_set_const(addr, new_constant, 1);
+}
diff --git a/ext/pcre/pcrelib/sljit/sljitNativeARM_64.c b/ext/pcre/pcrelib/sljit/sljitNativeARM_64.c
new file mode 100644 (file)
index 0000000..b66455f
--- /dev/null
@@ -0,0 +1,2028 @@
+/*
+ *    Stack-less Just-In-Time compiler
+ *
+ *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice, this list of
+ *      conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *      of conditions and the following disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
+{
+       return "ARM-64" SLJIT_CPUINFO;
+}
+
+/* Length of an instruction word */
+typedef sljit_ui sljit_ins;
+
+#define TMP_ZERO       (0)
+
+#define TMP_REG1       (SLJIT_NUMBER_OF_REGISTERS + 2)
+#define TMP_REG2       (SLJIT_NUMBER_OF_REGISTERS + 3)
+#define TMP_REG3       (SLJIT_NUMBER_OF_REGISTERS + 4)
+#define TMP_LR         (SLJIT_NUMBER_OF_REGISTERS + 5)
+#define TMP_SP         (SLJIT_NUMBER_OF_REGISTERS + 6)
+
+#define TMP_FREG1      (0)
+#define TMP_FREG2      (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
+
+static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 8] = {
+  31, 0, 1, 2, 3, 4, 5, 6, 7, 12, 13, 14, 15, 16, 17, 8, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 29, 9, 10, 11, 30, 31
+};
+
+#define W_OP (1 << 31)
+#define RD(rd) (reg_map[rd])
+#define RT(rt) (reg_map[rt])
+#define RN(rn) (reg_map[rn] << 5)
+#define RT2(rt2) (reg_map[rt2] << 10)
+#define RM(rm) (reg_map[rm] << 16)
+#define VD(vd) (vd)
+#define VT(vt) (vt)
+#define VN(vn) ((vn) << 5)
+#define VM(vm) ((vm) << 16)
+
+/* --------------------------------------------------------------------- */
+/*  Instrucion forms                                                     */
+/* --------------------------------------------------------------------- */
+
+#define ADC 0x9a000000
+#define ADD 0x8b000000
+#define ADDI 0x91000000
+#define AND 0x8a000000
+#define ANDI 0x92000000
+#define ASRV 0x9ac02800
+#define B 0x14000000
+#define B_CC 0x54000000
+#define BL 0x94000000
+#define BLR 0xd63f0000
+#define BR 0xd61f0000
+#define BRK 0xd4200000
+#define CBZ 0xb4000000
+#define CLZ 0xdac01000
+#define CSINC 0x9a800400
+#define EOR 0xca000000
+#define EORI 0xd2000000
+#define FABS 0x1e60c000
+#define FADD 0x1e602800
+#define FCMP 0x1e602000
+#define FCVT 0x1e224000
+#define FCVTZS 0x9e780000
+#define FDIV 0x1e601800
+#define FMOV 0x1e604000
+#define FMUL 0x1e600800
+#define FNEG 0x1e614000
+#define FSUB 0x1e603800
+#define LDRI 0xf9400000
+#define LDP 0xa9400000
+#define LDP_PST 0xa8c00000
+#define LSLV 0x9ac02000
+#define LSRV 0x9ac02400
+#define MADD 0x9b000000
+#define MOVK 0xf2800000
+#define MOVN 0x92800000
+#define MOVZ 0xd2800000
+#define NOP 0xd503201f
+#define ORN 0xaa200000
+#define ORR 0xaa000000
+#define ORRI 0xb2000000
+#define RET 0xd65f0000
+#define SBC 0xda000000
+#define SBFM 0x93000000
+#define SCVTF 0x9e620000
+#define SDIV 0x9ac00c00
+#define SMADDL 0x9b200000
+#define SMULH 0x9b403c00
+#define STP 0xa9000000
+#define STP_PRE 0xa9800000
+#define STRI 0xf9000000
+#define STR_FI 0x3d000000
+#define STR_FR 0x3c206800
+#define STUR_FI 0x3c000000
+#define SUB 0xcb000000
+#define SUBI 0xd1000000
+#define SUBS 0xeb000000
+#define UBFM 0xd3000000
+#define UDIV 0x9ac00800
+#define UMULH 0x9bc03c00
+
+/* dest_reg is the absolute name of the register
+   Useful for reordering instructions in the delay slot. */
+static sljit_si push_inst(struct sljit_compiler *compiler, sljit_ins ins)
+{
+       sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
+       FAIL_IF(!ptr);
+       *ptr = ins;
+       compiler->size++;
+       return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_si emit_imm64_const(struct sljit_compiler *compiler, sljit_si dst, sljit_uw imm)
+{
+       FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((imm & 0xffff) << 5)));
+       FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((imm >> 16) & 0xffff) << 5) | (1 << 21)));
+       FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((imm >> 32) & 0xffff) << 5) | (2 << 21)));
+       return push_inst(compiler, MOVK | RD(dst) | ((imm >> 48) << 5) | (3 << 21));
+}
+
+static SLJIT_INLINE void modify_imm64_const(sljit_ins* inst, sljit_uw new_imm)
+{
+       sljit_si dst = inst[0] & 0x1f;
+       SLJIT_ASSERT((inst[0] & 0xffe00000) == MOVZ && (inst[1] & 0xffe00000) == (MOVK | (1 << 21)));
+       inst[0] = MOVZ | dst | ((new_imm & 0xffff) << 5);
+       inst[1] = MOVK | dst | (((new_imm >> 16) & 0xffff) << 5) | (1 << 21);
+       inst[2] = MOVK | dst | (((new_imm >> 32) & 0xffff) << 5) | (2 << 21);
+       inst[3] = MOVK | dst | ((new_imm >> 48) << 5) | (3 << 21);
+}
+
+static SLJIT_INLINE sljit_si detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code)
+{
+       sljit_sw diff;
+       sljit_uw target_addr;
+
+       if (jump->flags & SLJIT_REWRITABLE_JUMP) {
+               jump->flags |= PATCH_ABS64;
+               return 0;
+       }
+
+       if (jump->flags & JUMP_ADDR)
+               target_addr = jump->u.target;
+       else {
+               SLJIT_ASSERT(jump->flags & JUMP_LABEL);
+               target_addr = (sljit_uw)(code + jump->u.label->size);
+       }
+       diff = (sljit_sw)target_addr - (sljit_sw)(code_ptr + 4);
+
+       if (jump->flags & IS_COND) {
+               diff += sizeof(sljit_ins);
+               if (diff <= 0xfffff && diff >= -0x100000) {
+                       code_ptr[-5] ^= (jump->flags & IS_CBZ) ? (0x1 << 24) : 0x1;
+                       jump->addr -= sizeof(sljit_ins);
+                       jump->flags |= PATCH_COND;
+                       return 5;
+               }
+               diff -= sizeof(sljit_ins);
+       }
+
+       if (diff <= 0x7ffffff && diff >= -0x8000000) {
+               jump->flags |= PATCH_B;
+               return 4;
+       }
+
+       if (target_addr <= 0xffffffffl) {
+               if (jump->flags & IS_COND)
+                       code_ptr[-5] -= (2 << 5);
+               code_ptr[-2] = code_ptr[0];
+               return 2;
+       }
+       if (target_addr <= 0xffffffffffffl) {
+               if (jump->flags & IS_COND)
+                       code_ptr[-5] -= (1 << 5);
+               jump->flags |= PATCH_ABS48;
+               code_ptr[-1] = code_ptr[0];
+               return 1;
+       }
+
+       jump->flags |= PATCH_ABS64;
+       return 0;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
+{
+       struct sljit_memory_fragment *buf;
+       sljit_ins *code;
+       sljit_ins *code_ptr;
+       sljit_ins *buf_ptr;
+       sljit_ins *buf_end;
+       sljit_uw word_count;
+       sljit_uw addr;
+       sljit_si dst;
+
+       struct sljit_label *label;
+       struct sljit_jump *jump;
+       struct sljit_const *const_;
+
+       CHECK_ERROR_PTR();
+       CHECK_PTR(check_sljit_generate_code(compiler));
+       reverse_buf(compiler);
+
+       code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins));
+       PTR_FAIL_WITH_EXEC_IF(code);
+       buf = compiler->buf;
+
+       code_ptr = code;
+       word_count = 0;
+       label = compiler->labels;
+       jump = compiler->jumps;
+       const_ = compiler->consts;
+
+       do {
+               buf_ptr = (sljit_ins*)buf->memory;
+               buf_end = buf_ptr + (buf->used_size >> 2);
+               do {
+                       *code_ptr = *buf_ptr++;
+                       /* These structures are ordered by their address. */
+                       SLJIT_ASSERT(!label || label->size >= word_count);
+                       SLJIT_ASSERT(!jump || jump->addr >= word_count);
+                       SLJIT_ASSERT(!const_ || const_->addr >= word_count);
+                       if (label && label->size == word_count) {
+                               label->addr = (sljit_uw)code_ptr;
+                               label->size = code_ptr - code;
+                               label = label->next;
+                       }
+                       if (jump && jump->addr == word_count) {
+                                       jump->addr = (sljit_uw)(code_ptr - 4);
+                                       code_ptr -= detect_jump_type(jump, code_ptr, code);
+                                       jump = jump->next;
+                       }
+                       if (const_ && const_->addr == word_count) {
+                               const_->addr = (sljit_uw)code_ptr;
+                               const_ = const_->next;
+                       }
+                       code_ptr ++;
+                       word_count ++;
+               } while (buf_ptr < buf_end);
+
+               buf = buf->next;
+       } while (buf);
+
+       if (label && label->size == word_count) {
+               label->addr = (sljit_uw)code_ptr;
+               label->size = code_ptr - code;
+               label = label->next;
+       }
+
+       SLJIT_ASSERT(!label);
+       SLJIT_ASSERT(!jump);
+       SLJIT_ASSERT(!const_);
+       SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
+
+       jump = compiler->jumps;
+       while (jump) {
+               do {
+                       addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
+                       buf_ptr = (sljit_ins*)jump->addr;
+                       if (jump->flags & PATCH_B) {
+                               addr = (sljit_sw)(addr - jump->addr) >> 2;
+                               SLJIT_ASSERT((sljit_sw)addr <= 0x1ffffff && (sljit_sw)addr >= -0x2000000);
+                               buf_ptr[0] = ((jump->flags & IS_BL) ? BL : B) | (addr & 0x3ffffff);
+                               if (jump->flags & IS_COND)
+                                       buf_ptr[-1] -= (4 << 5);
+                               break;
+                       }
+                       if (jump->flags & PATCH_COND) {
+                               addr = (sljit_sw)(addr - jump->addr) >> 2;
+                               SLJIT_ASSERT((sljit_sw)addr <= 0x3ffff && (sljit_sw)addr >= -0x40000);
+                               buf_ptr[0] = (buf_ptr[0] & ~0xffffe0) | ((addr & 0x7ffff) << 5);
+                               break;
+                       }
+
+                       SLJIT_ASSERT((jump->flags & (PATCH_ABS48 | PATCH_ABS64)) || addr <= 0xffffffffl);
+                       SLJIT_ASSERT((jump->flags & PATCH_ABS64) || addr <= 0xffffffffffffl);
+
+                       dst = buf_ptr[0] & 0x1f;
+                       buf_ptr[0] = MOVZ | dst | ((addr & 0xffff) << 5);
+                       buf_ptr[1] = MOVK | dst | (((addr >> 16) & 0xffff) << 5) | (1 << 21);
+                       if (jump->flags & (PATCH_ABS48 | PATCH_ABS64))
+                               buf_ptr[2] = MOVK | dst | (((addr >> 32) & 0xffff) << 5) | (2 << 21);
+                       if (jump->flags & PATCH_ABS64)
+                               buf_ptr[3] = MOVK | dst | (((addr >> 48) & 0xffff) << 5) | (3 << 21);
+               } while (0);
+               jump = jump->next;
+       }
+
+       compiler->error = SLJIT_ERR_COMPILED;
+       compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
+       SLJIT_CACHE_FLUSH(code, code_ptr);
+       return code;
+}
+
+/* --------------------------------------------------------------------- */
+/*  Core code generator functions.                                       */
+/* --------------------------------------------------------------------- */
+
+#define COUNT_TRAILING_ZERO(value, result) \
+       result = 0; \
+       if (!(value & 0xffffffff)) { \
+               result += 32; \
+               value >>= 32; \
+       } \
+       if (!(value & 0xffff)) { \
+               result += 16; \
+               value >>= 16; \
+       } \
+       if (!(value & 0xff)) { \
+               result += 8; \
+               value >>= 8; \
+       } \
+       if (!(value & 0xf)) { \
+               result += 4; \
+               value >>= 4; \
+       } \
+       if (!(value & 0x3)) { \
+               result += 2; \
+               value >>= 2; \
+       } \
+       if (!(value & 0x1)) { \
+               result += 1; \
+               value >>= 1; \
+       }
+
+#define LOGICAL_IMM_CHECK 0x100
+
+static sljit_ins logical_imm(sljit_sw imm, sljit_si len)
+{
+       sljit_si negated, ones, right;
+       sljit_uw mask, uimm;
+       sljit_ins ins;
+
+       if (len & LOGICAL_IMM_CHECK) {
+               len &= ~LOGICAL_IMM_CHECK;
+               if (len == 32 && (imm == 0 || imm == -1))
+                       return 0;
+               if (len == 16 && ((sljit_si)imm == 0 || (sljit_si)imm == -1))
+                       return 0;
+       }
+
+       SLJIT_ASSERT((len == 32 && imm != 0 && imm != -1)
+               || (len == 16 && (sljit_si)imm != 0 && (sljit_si)imm != -1));
+       uimm = (sljit_uw)imm;
+       while (1) {
+               if (len <= 0) {
+                       SLJIT_ASSERT_STOP();
+                       return 0;
+               }
+               mask = ((sljit_uw)1 << len) - 1;
+               if ((uimm & mask) != ((uimm >> len) & mask))
+                       break;
+               len >>= 1;
+       }
+
+       len <<= 1;
+
+       negated = 0;
+       if (uimm & 0x1) {
+               negated = 1;
+               uimm = ~uimm;
+       }
+
+       if (len < 64)
+               uimm &= ((sljit_uw)1 << len) - 1;
+
+       /* Unsigned right shift. */
+       COUNT_TRAILING_ZERO(uimm, right);
+
+       /* Signed shift. We also know that the highest bit is set. */
+       imm = (sljit_sw)~uimm;
+       SLJIT_ASSERT(imm < 0);
+
+       COUNT_TRAILING_ZERO(imm, ones);
+
+       if (~imm)
+               return 0;
+
+       if (len == 64)
+               ins = 1 << 22;
+       else
+               ins = (0x3f - ((len << 1) - 1)) << 10;
+
+       if (negated)
+               return ins | ((len - ones - 1) << 10) | ((len - ones - right) << 16);
+
+       return ins | ((ones - 1) << 10) | ((len - right) << 16);
+}
+
+#undef COUNT_TRAILING_ZERO
+
+static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si dst, sljit_sw simm)
+{
+       sljit_uw imm = (sljit_uw)simm;
+       sljit_si i, zeros, ones, first;
+       sljit_ins bitmask;
+
+       if (imm <= 0xffff)
+               return push_inst(compiler, MOVZ | RD(dst) | (imm << 5));
+
+       if (simm >= -0x10000 && simm < 0)
+               return push_inst(compiler, MOVN | RD(dst) | ((~imm & 0xffff) << 5));
+
+       if (imm <= 0xffffffffl) {
+               if ((imm & 0xffff0000l) == 0xffff0000)
+                       return push_inst(compiler, (MOVN ^ W_OP) | RD(dst) | ((~imm & 0xffff) << 5));
+               if ((imm & 0xffff) == 0xffff)
+                       return push_inst(compiler, (MOVN ^ W_OP) | RD(dst) | ((~imm & 0xffff0000l) >> (16 - 5)) | (1 << 21));
+               bitmask = logical_imm(simm, 16);
+               if (bitmask != 0)
+                       return push_inst(compiler, (ORRI ^ W_OP) | RD(dst) | RN(TMP_ZERO) | bitmask);
+       }
+       else {
+               bitmask = logical_imm(simm, 32);
+               if (bitmask != 0)
+                       return push_inst(compiler, ORRI | RD(dst) | RN(TMP_ZERO) | bitmask);
+       }
+
+       if (imm <= 0xffffffffl) {
+               FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((imm & 0xffff) << 5)));
+               return push_inst(compiler, MOVK | RD(dst) | ((imm & 0xffff0000l) >> (16 - 5)) | (1 << 21));
+       }
+
+       if (simm >= -0x100000000l && simm < 0) {
+               FAIL_IF(push_inst(compiler, MOVN | RD(dst) | ((~imm & 0xffff) << 5)));
+               return push_inst(compiler, MOVK | RD(dst) | ((imm & 0xffff0000l) >> (16 - 5)) | (1 << 21));
+       }
+
+       /* A large amount of number can be constructed from ORR and MOVx,
+       but computing them is costly. We don't  */
+
+       zeros = 0;
+       ones = 0;
+       for (i = 4; i > 0; i--) {
+               if ((simm & 0xffff) == 0)
+                       zeros++;
+               if ((simm & 0xffff) == 0xffff)
+                       ones++;
+               simm >>= 16;
+       }
+
+       simm = (sljit_sw)imm;
+       first = 1;
+       if (ones > zeros) {
+               simm = ~simm;
+               for (i = 0; i < 4; i++) {
+                       if (!(simm & 0xffff)) {
+                               simm >>= 16;
+                               continue;
+                       }
+                       if (first) {
+                               first = 0;
+                               FAIL_IF(push_inst(compiler, MOVN | RD(dst) | ((simm & 0xffff) << 5) | (i << 21)));
+                       }
+                       else
+                               FAIL_IF(push_inst(compiler, MOVK | RD(dst) | ((~simm & 0xffff) << 5) | (i << 21)));
+                       simm >>= 16;
+               }
+               return SLJIT_SUCCESS;
+       }
+
+       for (i = 0; i < 4; i++) {
+               if (!(simm & 0xffff)) {
+                       simm >>= 16;
+                       continue;
+               }
+               if (first) {
+                       first = 0;
+                       FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((simm & 0xffff) << 5) | (i << 21)));
+               }
+               else
+                       FAIL_IF(push_inst(compiler, MOVK | RD(dst) | ((simm & 0xffff) << 5) | (i << 21)));
+               simm >>= 16;
+       }
+       return SLJIT_SUCCESS;
+}
+
+#define ARG1_IMM       0x0010000
+#define ARG2_IMM       0x0020000
+#define INT_OP         0x0040000
+#define SET_FLAGS      0x0080000
+#define UNUSED_RETURN  0x0100000
+#define SLOW_DEST      0x0200000
+#define SLOW_SRC1      0x0400000
+#define SLOW_SRC2      0x0800000
+
+#define CHECK_FLAGS(flag_bits) \
+       if (flags & SET_FLAGS) { \
+               inv_bits |= flag_bits; \
+               if (flags & UNUSED_RETURN) \
+                       dst = TMP_ZERO; \
+       }
+
+static sljit_si emit_op_imm(struct sljit_compiler *compiler, sljit_si flags, sljit_si dst, sljit_sw arg1, sljit_sw arg2)
+{
+       /* dst must be register, TMP_REG1
+          arg1 must be register, TMP_REG1, imm
+          arg2 must be register, TMP_REG2, imm */
+       sljit_ins inv_bits = (flags & INT_OP) ? (1 << 31) : 0;
+       sljit_ins inst_bits;
+       sljit_si op = (flags & 0xffff);
+       sljit_si reg;
+       sljit_sw imm, nimm;
+
+       if (SLJIT_UNLIKELY((flags & (ARG1_IMM | ARG2_IMM)) == (ARG1_IMM | ARG2_IMM))) {
+               /* Both are immediates. */
+               flags &= ~ARG1_IMM;
+               if (arg1 == 0 && op != SLJIT_ADD && op != SLJIT_SUB)
+                       arg1 = TMP_ZERO;
+               else {
+                       FAIL_IF(load_immediate(compiler, TMP_REG1, arg1));
+                       arg1 = TMP_REG1;
+               }
+       }
+
+       if (flags & (ARG1_IMM | ARG2_IMM)) {
+               reg = (flags & ARG2_IMM) ? arg1 : arg2;
+               imm = (flags & ARG2_IMM) ? arg2 : arg1;
+
+               switch (op) {
+               case SLJIT_MUL:
+               case SLJIT_NEG:
+               case SLJIT_CLZ:
+               case SLJIT_ADDC:
+               case SLJIT_SUBC:
+                       /* No form with immediate operand (except imm 0, which
+                       is represented by a ZERO register). */
+                       break;
+               case SLJIT_MOV:
+                       SLJIT_ASSERT(!(flags & SET_FLAGS) && (flags & ARG2_IMM) && arg1 == TMP_REG1);
+                       return load_immediate(compiler, dst, imm);
+               case SLJIT_NOT:
+                       SLJIT_ASSERT(flags & ARG2_IMM);
+                       FAIL_IF(load_immediate(compiler, dst, (flags & INT_OP) ? (~imm & 0xffffffff) : ~imm));
+                       goto set_flags;
+               case SLJIT_SUB:
+                       if (flags & ARG1_IMM)
+                               break;
+                       imm = -imm;
+                       /* Fall through. */
+               case SLJIT_ADD:
+                       if (imm == 0) {
+                               CHECK_FLAGS(1 << 29);
+                               return push_inst(compiler, ((op == SLJIT_ADD ? ADDI : SUBI) ^ inv_bits) | RD(dst) | RN(reg));
+                       }
+                       if (imm > 0 && imm <= 0xfff) {
+                               CHECK_FLAGS(1 << 29);
+                               return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | (imm << 10));
+                       }
+                       nimm = -imm;
+                       if (nimm > 0 && nimm <= 0xfff) {
+                               CHECK_FLAGS(1 << 29);
+                               return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | (nimm << 10));
+                       }
+                       if (imm > 0 && imm <= 0xffffff && !(imm & 0xfff)) {
+                               CHECK_FLAGS(1 << 29);
+                               return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | ((imm >> 12) << 10) | (1 << 22));
+                       }
+                       if (nimm > 0 && nimm <= 0xffffff && !(nimm & 0xfff)) {
+                               CHECK_FLAGS(1 << 29);
+                               return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | ((nimm >> 12) << 10) | (1 << 22));
+                       }
+                       if (imm > 0 && imm <= 0xffffff && !(flags & SET_FLAGS)) {
+                               FAIL_IF(push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | ((imm >> 12) << 10) | (1 << 22)));
+                               return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(dst) | ((imm & 0xfff) << 10));
+                       }
+                       if (nimm > 0 && nimm <= 0xffffff && !(flags & SET_FLAGS)) {
+                               FAIL_IF(push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | ((nimm >> 12) << 10) | (1 << 22)));
+                               return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(dst) | ((nimm & 0xfff) << 10));
+                       }
+                       break;
+               case SLJIT_AND:
+                       inst_bits = logical_imm(imm, LOGICAL_IMM_CHECK | ((flags & INT_OP) ? 16 : 32));
+                       if (!inst_bits)
+                               break;
+                       CHECK_FLAGS(3 << 29);
+                       return push_inst(compiler, (ANDI ^ inv_bits) | RD(dst) | RN(reg) | inst_bits);
+               case SLJIT_OR:
+               case SLJIT_XOR:
+                       inst_bits = logical_imm(imm, LOGICAL_IMM_CHECK | ((flags & INT_OP) ? 16 : 32));
+                       if (!inst_bits)
+                               break;
+                       if (op == SLJIT_OR)
+                               inst_bits |= ORRI;
+                       else
+                               inst_bits |= EORI;
+                       FAIL_IF(push_inst(compiler, (inst_bits ^ inv_bits) | RD(dst) | RN(reg)));
+                       goto set_flags;
+               case SLJIT_SHL:
+                       if (flags & ARG1_IMM)
+                               break;
+                       if (flags & INT_OP) {
+                               imm &= 0x1f;
+                               FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | ((-imm & 0x1f) << 16) | ((31 - imm) << 10)));
+                       }
+                       else {
+                               imm &= 0x3f;
+                               FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | (1 << 22) | ((-imm & 0x3f) << 16) | ((63 - imm) << 10)));
+                       }
+                       goto set_flags;
+               case SLJIT_LSHR:
+               case SLJIT_ASHR:
+                       if (flags & ARG1_IMM)
+                               break;
+                       if (op == SLJIT_ASHR)
+                               inv_bits |= 1 << 30;
+                       if (flags & INT_OP) {
+                               imm &= 0x1f;
+                               FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | (imm << 16) | (31 << 10)));
+                       }
+                       else {
+                               imm &= 0x3f;
+                               FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | (1 << 22) | (imm << 16) | (63 << 10)));
+                       }
+                       goto set_flags;
+               default:
+                       SLJIT_ASSERT_STOP();
+                       break;
+               }
+
+               if (flags & ARG2_IMM) {
+                       if (arg2 == 0)
+                               arg2 = TMP_ZERO;
+                       else {
+                               FAIL_IF(load_immediate(compiler, TMP_REG2, arg2));
+                               arg2 = TMP_REG2;
+                       }
+               }
+               else {
+                       if (arg1 == 0)
+                               arg1 = TMP_ZERO;
+                       else {
+                               FAIL_IF(load_immediate(compiler, TMP_REG1, arg1));
+                               arg1 = TMP_REG1;
+                       }
+               }
+       }
+
+       /* Both arguments are registers. */
+       switch (op) {
+       case SLJIT_MOV:
+       case SLJIT_MOV_P:
+       case SLJIT_MOVU:
+       case SLJIT_MOVU_P:
+               SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
+               if (dst == arg2)
+                       return SLJIT_SUCCESS;
+               return push_inst(compiler, ORR | RD(dst) | RN(TMP_ZERO) | RM(arg2));
+       case SLJIT_MOV_UB:
+       case SLJIT_MOVU_UB:
+               SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
+               return push_inst(compiler, (UBFM ^ (1 << 31)) | RD(dst) | RN(arg2) | (7 << 10));
+       case SLJIT_MOV_SB:
+       case SLJIT_MOVU_SB:
+               SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
+               if (!(flags & INT_OP))
+                       inv_bits |= 1 << 22;
+               return push_inst(compiler, (SBFM ^ inv_bits) | RD(dst) | RN(arg2) | (7 << 10));
+       case SLJIT_MOV_UH:
+       case SLJIT_MOVU_UH:
+               SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
+               return push_inst(compiler, (UBFM ^ (1 << 31)) | RD(dst) | RN(arg2) | (15 << 10));
+       case SLJIT_MOV_SH:
+       case SLJIT_MOVU_SH:
+               SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
+               if (!(flags & INT_OP))
+                       inv_bits |= 1 << 22;
+               return push_inst(compiler, (SBFM ^ inv_bits) | RD(dst) | RN(arg2) | (15 << 10));
+       case SLJIT_MOV_UI:
+       case SLJIT_MOVU_UI:
+               SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
+               if ((flags & INT_OP) && dst == arg2)
+                       return SLJIT_SUCCESS;
+               return push_inst(compiler, (ORR ^ (1 << 31)) | RD(dst) | RN(TMP_ZERO) | RM(arg2));
+       case SLJIT_MOV_SI:
+       case SLJIT_MOVU_SI:
+               SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
+               if ((flags & INT_OP) && dst == arg2)
+                       return SLJIT_SUCCESS;
+               return push_inst(compiler, SBFM | (1 << 22) | RD(dst) | RN(arg2) | (31 << 10));
+       case SLJIT_NOT:
+               SLJIT_ASSERT(arg1 == TMP_REG1);
+               FAIL_IF(push_inst(compiler, (ORN ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(arg2)));
+               goto set_flags;
+       case SLJIT_NEG:
+               SLJIT_ASSERT(arg1 == TMP_REG1);
+               if (flags & SET_FLAGS)
+                       inv_bits |= 1 << 29;
+               return push_inst(compiler, (SUB ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(arg2));
+       case SLJIT_CLZ:
+               SLJIT_ASSERT(arg1 == TMP_REG1);
+               FAIL_IF(push_inst(compiler, (CLZ ^ inv_bits) | RD(dst) | RN(arg2)));
+               goto set_flags;
+       case SLJIT_ADD:
+               CHECK_FLAGS(1 << 29);
+               return push_inst(compiler, (ADD ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
+       case SLJIT_ADDC:
+               CHECK_FLAGS(1 << 29);
+               return push_inst(compiler, (ADC ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
+       case SLJIT_SUB:
+               CHECK_FLAGS(1 << 29);
+               return push_inst(compiler, (SUB ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
+       case SLJIT_SUBC:
+               CHECK_FLAGS(1 << 29);
+               return push_inst(compiler, (SBC ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
+       case SLJIT_MUL:
+               if (!(flags & SET_FLAGS))
+                       return push_inst(compiler, (MADD ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2) | RT2(TMP_ZERO));
+               if (flags & INT_OP) {
+                       FAIL_IF(push_inst(compiler, SMADDL | RD(dst) | RN(arg1) | RM(arg2) | (31 << 10)));
+                       FAIL_IF(push_inst(compiler, ADD | RD(TMP_LR) | RN(TMP_ZERO) | RM(dst) | (2 << 22) | (31 << 10)));
+                       return push_inst(compiler, SUBS | RD(TMP_ZERO) | RN(TMP_LR) | RM(dst) | (2 << 22) | (63 << 10));
+               }
+               FAIL_IF(push_inst(compiler, SMULH | RD(TMP_LR) | RN(arg1) | RM(arg2)));
+               FAIL_IF(push_inst(compiler, MADD | RD(dst) | RN(arg1) | RM(arg2) | RT2(TMP_ZERO)));
+               return push_inst(compiler, SUBS | RD(TMP_ZERO) | RN(TMP_LR) | RM(dst) | (2 << 22) | (63 << 10));
+       case SLJIT_AND:
+               CHECK_FLAGS(3 << 29);
+               return push_inst(compiler, (AND ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
+       case SLJIT_OR:
+               FAIL_IF(push_inst(compiler, (ORR ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
+               goto set_flags;
+       case SLJIT_XOR:
+               FAIL_IF(push_inst(compiler, (EOR ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
+               goto set_flags;
+       case SLJIT_SHL:
+               FAIL_IF(push_inst(compiler, (LSLV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
+               goto set_flags;
+       case SLJIT_LSHR:
+               FAIL_IF(push_inst(compiler, (LSRV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
+               goto set_flags;
+       case SLJIT_ASHR:
+               FAIL_IF(push_inst(compiler, (ASRV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
+               goto set_flags;
+       }
+
+       SLJIT_ASSERT_STOP();
+       return SLJIT_SUCCESS;
+
+set_flags:
+       if (flags & SET_FLAGS)
+               return push_inst(compiler, (SUBS ^ inv_bits) | RD(TMP_ZERO) | RN(dst) | RM(TMP_ZERO));
+       return SLJIT_SUCCESS;
+}
+
+#define STORE          0x01
+#define SIGNED         0x02
+
+#define UPDATE         0x04
+#define ARG_TEST       0x08
+
+#define BYTE_SIZE      0x000
+#define HALF_SIZE      0x100
+#define INT_SIZE       0x200
+#define WORD_SIZE      0x300
+
+#define MEM_SIZE_SHIFT(flags) ((flags) >> 8)
+
+static SLJIT_CONST sljit_ins sljit_mem_imm[4] = {
+/* u l */ 0x39400000 /* ldrb [reg,imm] */,
+/* u s */ 0x39000000 /* strb [reg,imm] */,
+/* s l */ 0x39800000 /* ldrsb [reg,imm] */,
+/* s s */ 0x39000000 /* strb [reg,imm] */,
+};
+
+static SLJIT_CONST sljit_ins sljit_mem_simm[4] = {
+/* u l */ 0x38400000 /* ldurb [reg,imm] */,
+/* u s */ 0x38000000 /* sturb [reg,imm] */,
+/* s l */ 0x38800000 /* ldursb [reg,imm] */,
+/* s s */ 0x38000000 /* sturb [reg,imm] */,
+};
+
+static SLJIT_CONST sljit_ins sljit_mem_pre_simm[4] = {
+/* u l */ 0x38400c00 /* ldrb [reg,imm]! */,
+/* u s */ 0x38000c00 /* strb [reg,imm]! */,
+/* s l */ 0x38800c00 /* ldrsb [reg,imm]! */,
+/* s s */ 0x38000c00 /* strb [reg,imm]! */,
+};
+
+static SLJIT_CONST sljit_ins sljit_mem_reg[4] = {
+/* u l */ 0x38606800 /* ldrb [reg,reg] */,
+/* u s */ 0x38206800 /* strb [reg,reg] */,
+/* s l */ 0x38a06800 /* ldrsb [reg,reg] */,
+/* s s */ 0x38206800 /* strb [reg,reg] */,
+};
+
+/* Helper function. Dst should be reg + value, using at most 1 instruction, flags does not set. */
+static sljit_si emit_set_delta(struct sljit_compiler *compiler, sljit_si dst, sljit_si reg, sljit_sw value)
+{
+       if (value >= 0) {
+               if (value <= 0xfff)
+                       return push_inst(compiler, ADDI | RD(dst) | RN(reg) | (value << 10));
+               if (value <= 0xffffff && !(value & 0xfff))
+                       return push_inst(compiler, ADDI | (1 << 22) | RD(dst) | RN(reg) | (value >> 2));
+       }
+       else {
+               value = -value;
+               if (value <= 0xfff)
+                       return push_inst(compiler, SUBI | RD(dst) | RN(reg) | (value << 10));
+               if (value <= 0xffffff && !(value & 0xfff))
+                       return push_inst(compiler, SUBI | (1 << 22) | RD(dst) | RN(reg) | (value >> 2));
+       }
+       return SLJIT_ERR_UNSUPPORTED;
+}
+
+/* Can perform an operation using at most 1 instruction. */
+static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw)
+{
+       sljit_ui shift = MEM_SIZE_SHIFT(flags);
+
+       SLJIT_ASSERT(arg & SLJIT_MEM);
+
+       if (SLJIT_UNLIKELY(flags & UPDATE)) {
+               if ((arg & REG_MASK) && !(arg & OFFS_REG_MASK) && argw <= 255 && argw >= -256) {
+                       if (SLJIT_UNLIKELY(flags & ARG_TEST))
+                               return 1;
+
+                       arg &= REG_MASK;
+                       argw &= 0x1ff;
+                       FAIL_IF(push_inst(compiler, sljit_mem_pre_simm[flags & 0x3]
+                               | (shift << 30) | RT(reg) | RN(arg) | (argw << 12)));
+                       return -1;
+               }
+               return 0;
+       }
+
+       if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
+               argw &= 0x3;
+               if (argw && argw != shift)
+                       return 0;
+
+               if (SLJIT_UNLIKELY(flags & ARG_TEST))
+                       return 1;
+
+               FAIL_IF(push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg)
+                       | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw ? (1 << 12) : 0)));
+               return -1;
+       }
+
+       arg &= REG_MASK;
+       if (argw >= 0 && (argw >> shift) <= 0xfff && (argw & ((1 << shift) - 1)) == 0) {
+               if (SLJIT_UNLIKELY(flags & ARG_TEST))
+                       return 1;
+
+               FAIL_IF(push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30)
+                       | RT(reg) | RN(arg) | (argw << (10 - shift))));
+               return -1;
+       }
+
+       if (argw > 255 || argw < -256)
+               return 0;
+
+       if (SLJIT_UNLIKELY(flags & ARG_TEST))
+               return 1;
+
+       FAIL_IF(push_inst(compiler, sljit_mem_simm[flags & 0x3] | (shift << 30)
+               | RT(reg) | RN(arg) | ((argw & 0x1ff) << 12)));
+       return -1;
+}
+
+/* see getput_arg below.
+   Note: can_cache is called only for binary operators. Those
+   operators always uses word arguments without write back. */
+static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw)
+{
+       sljit_sw diff;
+       if ((arg & OFFS_REG_MASK) || !(next_arg & SLJIT_MEM))
+               return 0;
+
+       if (!(arg & REG_MASK)) {
+               diff = argw - next_argw;
+               if (diff <= 0xfff && diff >= -0xfff)
+                       return 1;
+               return 0;
+       }
+
+       if (argw == next_argw)
+               return 1;
+
+       diff = argw - next_argw;
+       if (arg == next_arg && diff <= 0xfff && diff >= -0xfff)
+               return 1;
+
+       return 0;
+}
+
+/* Emit the necessary instructions. See can_cache above. */
+static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg,
+       sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw)
+{
+       sljit_ui shift = MEM_SIZE_SHIFT(flags);
+       sljit_si tmp_r, other_r;
+       sljit_sw diff;
+
+       SLJIT_ASSERT(arg & SLJIT_MEM);
+       if (!(next_arg & SLJIT_MEM)) {
+               next_arg = 0;
+               next_argw = 0;
+       }
+
+       tmp_r = (flags & STORE) ? TMP_REG3 : reg;
+
+       if (SLJIT_UNLIKELY((flags & UPDATE) && (arg & REG_MASK))) {
+               /* Update only applies if a base register exists. */
+               other_r = OFFS_REG(arg);
+               if (!other_r) {
+                       other_r = arg & REG_MASK;
+                       if (other_r != reg && argw >= 0 && argw <= 0xffffff) {
+                               if ((argw & 0xfff) != 0)
+                                       FAIL_IF(push_inst(compiler, ADDI | RD(other_r) | RN(other_r) | ((argw & 0xfff) << 10)));
+                               if (argw >> 12)
+                                       FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(other_r) | RN(other_r) | ((argw >> 12) << 10)));
+                               return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(other_r));
+                       }
+                       else if (other_r != reg && argw < 0 && argw >= -0xffffff) {
+                               argw = -argw;
+                               if ((argw & 0xfff) != 0)
+                                       FAIL_IF(push_inst(compiler, SUBI | RD(other_r) | RN(other_r) | ((argw & 0xfff) << 10)));
+                               if (argw >> 12)
+                                       FAIL_IF(push_inst(compiler, SUBI | (1 << 22) | RD(other_r) | RN(other_r) | ((argw >> 12) << 10)));
+                               return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(other_r));
+                       }
+
+                       if (compiler->cache_arg == SLJIT_MEM) {
+                               if (argw == compiler->cache_argw) {
+                                       other_r = TMP_REG3;
+                                       argw = 0;
+                               }
+                               else if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) {
+                                       FAIL_IF(compiler->error);
+                                       compiler->cache_argw = argw;
+                                       other_r = TMP_REG3;
+                                       argw = 0;
+                               }
+                       }
+
+                       if (argw) {
+                               FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
+                               compiler->cache_arg = SLJIT_MEM;
+                               compiler->cache_argw = argw;
+                               other_r = TMP_REG3;
+                               argw = 0;
+                       }
+               }
+
+               /* No caching here. */
+               arg &= REG_MASK;
+               argw &= 0x3;
+               if (!argw || argw == shift) {
+                       FAIL_IF(push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg) | RM(other_r) | (argw ? (1 << 12) : 0)));
+                       return push_inst(compiler, ADD | RD(arg) | RN(arg) | RM(other_r) | (argw << 10));
+               }
+               if (arg != reg) {
+                       FAIL_IF(push_inst(compiler, ADD | RD(arg) | RN(arg) | RM(other_r) | (argw << 10)));
+                       return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg));
+               }
+               FAIL_IF(push_inst(compiler, ADD | RD(TMP_LR) | RN(arg) | RM(other_r) | (argw << 10)));
+               FAIL_IF(push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(TMP_LR)));
+               return push_inst(compiler, ORR | RD(arg) | RN(TMP_ZERO) | RM(TMP_LR));
+       }
+
+       if (arg & OFFS_REG_MASK) {
+               other_r = OFFS_REG(arg);
+               arg &= REG_MASK;
+               FAIL_IF(push_inst(compiler, ADD | RD(tmp_r) | RN(arg) | RM(other_r) | ((argw & 0x3) << 10)));
+               return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(tmp_r));
+       }
+
+       if (compiler->cache_arg == arg) {
+               diff = argw - compiler->cache_argw;
+               if (diff <= 255 && diff >= -256)
+                       return push_inst(compiler, sljit_mem_simm[flags & 0x3] | (shift << 30)
+                               | RT(reg) | RN(TMP_REG3) | ((diff & 0x1ff) << 12));
+               if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, diff) != SLJIT_ERR_UNSUPPORTED) {
+                       FAIL_IF(compiler->error);
+                       return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg));
+               }
+       }
+
+       if (argw >= 0 && argw <= 0xffffff && (argw & ((1 << shift) - 1)) == 0) {
+               FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(tmp_r) | RN(arg & REG_MASK) | ((argw >> 12) << 10)));
+               return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30)
+                       | RT(reg) | RN(tmp_r) | ((argw & 0xfff) << (10 - shift)));
+       }
+
+       diff = argw - next_argw;
+       next_arg = (arg & REG_MASK) && (arg == next_arg) && diff <= 0xfff && diff >= -0xfff && diff != 0;
+       arg &= REG_MASK;
+
+       if (arg && compiler->cache_arg == SLJIT_MEM) {
+               if (compiler->cache_argw == argw)
+                       return push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg) | RM(TMP_REG3));
+               if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) {
+                       FAIL_IF(compiler->error);
+                       compiler->cache_argw = argw;
+                       return push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg) | RM(TMP_REG3));
+               }
+       }
+
+       compiler->cache_argw = argw;
+       if (next_arg && emit_set_delta(compiler, TMP_REG3, arg, argw) != SLJIT_ERR_UNSUPPORTED) {
+               FAIL_IF(compiler->error);
+               compiler->cache_arg = SLJIT_MEM | arg;
+               arg = 0;
+       }
+       else {
+               FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
+               compiler->cache_arg = SLJIT_MEM;
+
+               if (next_arg) {
+                       FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG3) | RN(TMP_REG3) | RM(arg)));
+                       compiler->cache_arg = SLJIT_MEM | arg;
+                       arg = 0;
+               }
+       }
+
+       if (arg)
+               return push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg) | RM(TMP_REG3));
+       return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(TMP_REG3));
+}
+
+static SLJIT_INLINE sljit_si emit_op_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw)
+{
+       if (getput_arg_fast(compiler, flags, reg, arg, argw))
+               return compiler->error;
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+       return getput_arg(compiler, flags, reg, arg, argw, 0, 0);
+}
+
+static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg1, sljit_sw arg1w, sljit_si arg2, sljit_sw arg2w)
+{
+       if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
+               return compiler->error;
+       return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
+}
+
+/* --------------------------------------------------------------------- */
+/*  Entry, exit                                                          */
+/* --------------------------------------------------------------------- */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler,
+       sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds,
+       sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
+{
+       sljit_si i, tmp, offs, prev, saved_regs_size;
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
+       set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
+
+       saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 0);
+       local_size += saved_regs_size + SLJIT_LOCALS_OFFSET;
+       local_size = (local_size + 15) & ~0xf;
+       compiler->local_size = local_size;
+
+       if (local_size <= (63 * sizeof(sljit_sw))) {
+               FAIL_IF(push_inst(compiler, STP_PRE | 29 | RT2(TMP_LR)
+                       | RN(TMP_SP) | ((-(local_size >> 3) & 0x7f) << 15)));
+               FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_SP) | (0 << 10)));
+               offs = (local_size - saved_regs_size) << (15 - 3);
+       } else {
+               offs = 0 << 15;
+               if (saved_regs_size & 0x8) {
+                       offs = 1 << 15;
+                       saved_regs_size += sizeof(sljit_sw);
+               }
+               local_size -= saved_regs_size + SLJIT_LOCALS_OFFSET;
+               FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | (saved_regs_size << 10)));
+       }
+
+       tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
+       prev = -1;
+       for (i = SLJIT_S0; i >= tmp; i--) {
+               if (prev == -1) {
+                       prev = i;
+                       continue;
+               }
+               FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
+               offs += 2 << 15;
+               prev = -1;
+       }
+
+       for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
+               if (prev == -1) {
+                       prev = i;
+                       continue;
+               }
+               FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
+               offs += 2 << 15;
+               prev = -1;
+       }
+
+       if (prev != -1)
+               FAIL_IF(push_inst(compiler, STRI | RT(prev) | RN(TMP_SP) | (offs >> 5)));
+
+       if (compiler->local_size > (63 * sizeof(sljit_sw))) {
+               /* The local_size is already adjusted by the saved registers. */
+               if (local_size > 0xfff) {
+                       FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | ((local_size >> 12) << 10) | (1 << 22)));
+                       local_size &= 0xfff;
+               }
+               if (local_size)
+                       FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | (local_size << 10)));
+               FAIL_IF(push_inst(compiler, STP_PRE | 29 | RT2(TMP_LR)
+                       | RN(TMP_SP) | ((-(16 >> 3) & 0x7f) << 15)));
+               FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_SP) | (0 << 10)));
+       }
+
+       if (args >= 1)
+               FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S0) | RN(TMP_ZERO) | RM(SLJIT_R0)));
+       if (args >= 2)
+               FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S1) | RN(TMP_ZERO) | RM(SLJIT_R1)));
+       if (args >= 3)
+               FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S2) | RN(TMP_ZERO) | RM(SLJIT_R2)));
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compiler,
+       sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds,
+       sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
+{
+       CHECK_ERROR();
+       CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
+       set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
+
+       local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 0) + SLJIT_LOCALS_OFFSET;
+       local_size = (local_size + 15) & ~0xf;
+       compiler->local_size = local_size;
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
+{
+       sljit_si local_size;
+       sljit_si i, tmp, offs, prev, saved_regs_size;
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_return(compiler, op, src, srcw));
+
+       FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
+
+       local_size = compiler->local_size;
+
+       saved_regs_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 0);
+       if (local_size <= (63 * sizeof(sljit_sw)))
+               offs = (local_size - saved_regs_size) << (15 - 3);
+       else {
+               FAIL_IF(push_inst(compiler, LDP_PST | 29 | RT2(TMP_LR)
+                       | RN(TMP_SP) | (((16 >> 3) & 0x7f) << 15)));
+               offs = 0 << 15;
+               if (saved_regs_size & 0x8) {
+                       offs = 1 << 15;
+                       saved_regs_size += sizeof(sljit_sw);
+               }
+               local_size -= saved_regs_size + SLJIT_LOCALS_OFFSET;
+               if (local_size > 0xfff) {
+                       FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | ((local_size >> 12) << 10) | (1 << 22)));
+                       local_size &= 0xfff;
+               }
+               if (local_size)
+                       FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | (local_size << 10)));
+       }
+
+       tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
+       prev = -1;
+       for (i = SLJIT_S0; i >= tmp; i--) {
+               if (prev == -1) {
+                       prev = i;
+                       continue;
+               }
+               FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
+               offs += 2 << 15;
+               prev = -1;
+       }
+
+       for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
+               if (prev == -1) {
+                       prev = i;
+                       continue;
+               }
+               FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
+               offs += 2 << 15;
+               prev = -1;
+       }
+
+       if (prev != -1)
+               FAIL_IF(push_inst(compiler, LDRI | RT(prev) | RN(TMP_SP) | (offs >> 5)));
+
+       if (compiler->local_size <= (63 * sizeof(sljit_sw))) {
+               FAIL_IF(push_inst(compiler, LDP_PST | 29 | RT2(TMP_LR)
+                       | RN(TMP_SP) | (((local_size >> 3) & 0x7f) << 15)));
+       } else {
+               FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | (saved_regs_size << 10)));
+       }
+
+       FAIL_IF(push_inst(compiler, RET | RN(TMP_LR)));
+       return SLJIT_SUCCESS;
+}
+
+/* --------------------------------------------------------------------- */
+/*  Operators                                                            */
+/* --------------------------------------------------------------------- */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op)
+{
+       sljit_ins inv_bits = (op & SLJIT_INT_OP) ? (1 << 31) : 0;
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_op0(compiler, op));
+
+       op = GET_OPCODE(op);
+       switch (op) {
+       case SLJIT_BREAKPOINT:
+               return push_inst(compiler, BRK);
+       case SLJIT_NOP:
+               return push_inst(compiler, NOP);
+       case SLJIT_LUMUL:
+       case SLJIT_LSMUL:
+               FAIL_IF(push_inst(compiler, ORR | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_R0)));
+               FAIL_IF(push_inst(compiler, MADD | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO)));
+               return push_inst(compiler, (op == SLJIT_LUMUL ? UMULH : SMULH) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1));
+       case SLJIT_LUDIV:
+       case SLJIT_LSDIV:
+               FAIL_IF(push_inst(compiler, (ORR ^ inv_bits) | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_R0)));
+               FAIL_IF(push_inst(compiler, ((op == SLJIT_LUDIV ? UDIV : SDIV) ^ inv_bits) | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1)));
+               FAIL_IF(push_inst(compiler, (MADD ^ inv_bits) | RD(SLJIT_R1) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO)));
+               return push_inst(compiler, (SUB ^ inv_bits) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1));
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       sljit_si dst_r, flags, mem_flags;
+       sljit_si op_flags = GET_ALL_FLAGS(op);
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+
+       dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
+
+       op = GET_OPCODE(op);
+       if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) {
+               switch (op) {
+               case SLJIT_MOV:
+               case SLJIT_MOV_P:
+                       flags = WORD_SIZE;
+                       break;
+               case SLJIT_MOV_UB:
+                       flags = BYTE_SIZE;
+                       if (src & SLJIT_IMM)
+                               srcw = (sljit_ub)srcw;
+                       break;
+               case SLJIT_MOV_SB:
+                       flags = BYTE_SIZE | SIGNED;
+                       if (src & SLJIT_IMM)
+                               srcw = (sljit_sb)srcw;
+                       break;
+               case SLJIT_MOV_UH:
+                       flags = HALF_SIZE;
+                       if (src & SLJIT_IMM)
+                               srcw = (sljit_uh)srcw;
+                       break;
+               case SLJIT_MOV_SH:
+                       flags = HALF_SIZE | SIGNED;
+                       if (src & SLJIT_IMM)
+                               srcw = (sljit_sh)srcw;
+                       break;
+               case SLJIT_MOV_UI:
+                       flags = INT_SIZE;
+                       if (src & SLJIT_IMM)
+                               srcw = (sljit_ui)srcw;
+                       break;
+               case SLJIT_MOV_SI:
+                       flags = INT_SIZE | SIGNED;
+                       if (src & SLJIT_IMM)
+                               srcw = (sljit_si)srcw;
+                       break;
+               case SLJIT_MOVU:
+               case SLJIT_MOVU_P:
+                       flags = WORD_SIZE | UPDATE;
+                       break;
+               case SLJIT_MOVU_UB:
+                       flags = BYTE_SIZE | UPDATE;
+                       if (src & SLJIT_IMM)
+                               srcw = (sljit_ub)srcw;
+                       break;
+               case SLJIT_MOVU_SB:
+                       flags = BYTE_SIZE | SIGNED | UPDATE;
+                       if (src & SLJIT_IMM)
+                               srcw = (sljit_sb)srcw;
+                       break;
+               case SLJIT_MOVU_UH:
+                       flags = HALF_SIZE | UPDATE;
+                       if (src & SLJIT_IMM)
+                               srcw = (sljit_uh)srcw;
+                       break;
+               case SLJIT_MOVU_SH:
+                       flags = HALF_SIZE | SIGNED | UPDATE;
+                       if (src & SLJIT_IMM)
+                               srcw = (sljit_sh)srcw;
+                       break;
+               case SLJIT_MOVU_UI:
+                       flags = INT_SIZE | UPDATE;
+                       if (src & SLJIT_IMM)
+                               srcw = (sljit_ui)srcw;
+                       break;
+               case SLJIT_MOVU_SI:
+                       flags = INT_SIZE | SIGNED | UPDATE;
+                       if (src & SLJIT_IMM)
+                               srcw = (sljit_si)srcw;
+                       break;
+               default:
+                       SLJIT_ASSERT_STOP();
+                       flags = 0;
+                       break;
+               }
+
+               if (src & SLJIT_IMM)
+                       FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG1, srcw));
+               else if (src & SLJIT_MEM) {
+                       if (getput_arg_fast(compiler, flags, dst_r, src, srcw))
+                               FAIL_IF(compiler->error);
+                       else
+                               FAIL_IF(getput_arg(compiler, flags, dst_r, src, srcw, dst, dstw));
+               } else {
+                       if (dst_r != TMP_REG1)
+                               return emit_op_imm(compiler, op | ((op_flags & SLJIT_INT_OP) ? INT_OP : 0), dst_r, TMP_REG1, src);
+                       dst_r = src;
+               }
+
+               if (dst & SLJIT_MEM) {
+                       if (getput_arg_fast(compiler, flags | STORE, dst_r, dst, dstw))
+                               return compiler->error;
+                       else
+                               return getput_arg(compiler, flags | STORE, dst_r, dst, dstw, 0, 0);
+               }
+               return SLJIT_SUCCESS;
+       }
+
+       flags = GET_FLAGS(op_flags) ? SET_FLAGS : 0;
+       mem_flags = WORD_SIZE;
+       if (op_flags & SLJIT_INT_OP) {
+               flags |= INT_OP;
+               mem_flags = INT_SIZE;
+       }
+
+       if (dst == SLJIT_UNUSED)
+               flags |= UNUSED_RETURN;
+
+       if (src & SLJIT_MEM) {
+               if (getput_arg_fast(compiler, mem_flags, TMP_REG2, src, srcw))
+                       FAIL_IF(compiler->error);
+               else
+                       FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG2, src, srcw, dst, dstw));
+               src = TMP_REG2;
+       }
+
+       if (src & SLJIT_IMM) {
+               flags |= ARG2_IMM;
+               if (op_flags & SLJIT_INT_OP)
+                       srcw = (sljit_si)srcw;
+       } else
+               srcw = src;
+
+       emit_op_imm(compiler, flags | op, dst_r, TMP_REG1, srcw);
+
+       if (dst & SLJIT_MEM) {
+               if (getput_arg_fast(compiler, mem_flags | STORE, dst_r, dst, dstw))
+                       return compiler->error;
+               else
+                       return getput_arg(compiler, mem_flags | STORE, dst_r, dst, dstw, 0, 0);
+       }
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       sljit_si dst_r, flags, mem_flags;
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+       ADJUST_LOCAL_OFFSET(src1, src1w);
+       ADJUST_LOCAL_OFFSET(src2, src2w);
+
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+
+       dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
+       flags = GET_FLAGS(op) ? SET_FLAGS : 0;
+       mem_flags = WORD_SIZE;
+       if (op & SLJIT_INT_OP) {
+               flags |= INT_OP;
+               mem_flags = INT_SIZE;
+       }
+
+       if (dst == SLJIT_UNUSED)
+               flags |= UNUSED_RETURN;
+
+       if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, mem_flags | STORE | ARG_TEST, TMP_REG1, dst, dstw))
+               flags |= SLOW_DEST;
+
+       if (src1 & SLJIT_MEM) {
+               if (getput_arg_fast(compiler, mem_flags, TMP_REG1, src1, src1w))
+                       FAIL_IF(compiler->error);
+               else
+                       flags |= SLOW_SRC1;
+       }
+       if (src2 & SLJIT_MEM) {
+               if (getput_arg_fast(compiler, mem_flags, TMP_REG2, src2, src2w))
+                       FAIL_IF(compiler->error);
+               else
+                       flags |= SLOW_SRC2;
+       }
+
+       if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
+               if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
+                       FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG2, src2, src2w, src1, src1w));
+                       FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG1, src1, src1w, dst, dstw));
+               }
+               else {
+                       FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG1, src1, src1w, src2, src2w));
+                       FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG2, src2, src2w, dst, dstw));
+               }
+       }
+       else if (flags & SLOW_SRC1)
+               FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG1, src1, src1w, dst, dstw));
+       else if (flags & SLOW_SRC2)
+               FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG2, src2, src2w, dst, dstw));
+
+       if (src1 & SLJIT_MEM)
+               src1 = TMP_REG1;
+       if (src2 & SLJIT_MEM)
+               src2 = TMP_REG2;
+
+       if (src1 & SLJIT_IMM)
+               flags |= ARG1_IMM;
+       else
+               src1w = src1;
+       if (src2 & SLJIT_IMM)
+               flags |= ARG2_IMM;
+       else
+               src2w = src2;
+
+       emit_op_imm(compiler, flags | GET_OPCODE(op), dst_r, src1w, src2w);
+
+       if (dst & SLJIT_MEM) {
+               if (!(flags & SLOW_DEST)) {
+                       getput_arg_fast(compiler, mem_flags | STORE, dst_r, dst, dstw);
+                       return compiler->error;
+               }
+               return getput_arg(compiler, mem_flags | STORE, TMP_REG1, dst, dstw, 0, 0);
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
+{
+       CHECK_REG_INDEX(check_sljit_get_register_index(reg));
+       return reg_map[reg];
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg)
+{
+       CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
+       return reg;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
+       void *instruction, sljit_si size)
+{
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
+
+       return push_inst(compiler, *(sljit_ins*)instruction);
+}
+
+/* --------------------------------------------------------------------- */
+/*  Floating point operators                                             */
+/* --------------------------------------------------------------------- */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
+{
+#ifdef SLJIT_IS_FPU_AVAILABLE
+       return SLJIT_IS_FPU_AVAILABLE;
+#else
+       /* Available by default. */
+       return 1;
+#endif
+}
+
+static sljit_si emit_fop_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw)
+{
+       sljit_ui shift = MEM_SIZE_SHIFT(flags);
+       sljit_ins ins_bits = (shift << 30);
+       sljit_si other_r;
+       sljit_sw diff;
+
+       SLJIT_ASSERT(arg & SLJIT_MEM);
+
+       if (!(flags & STORE))
+               ins_bits |= 1 << 22;
+
+       if (arg & OFFS_REG_MASK) {
+               argw &= 3;
+               if (!argw || argw == shift)
+                       return push_inst(compiler, STR_FR | ins_bits | VT(reg)
+                               | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw ? (1 << 12) : 0));
+               other_r = OFFS_REG(arg);
+               arg &= REG_MASK;
+               FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg) | RM(other_r) | (argw << 10)));
+               arg = TMP_REG1;
+               argw = 0;
+       }
+
+       arg &= REG_MASK;
+       if (arg && argw >= 0 && ((argw >> shift) <= 0xfff) && (argw & ((1 << shift) - 1)) == 0)
+               return push_inst(compiler, STR_FI | ins_bits | VT(reg) | RN(arg) | (argw << (10 - shift)));
+
+       if (arg && argw <= 255 && argw >= -256)
+               return push_inst(compiler, STUR_FI | ins_bits | VT(reg) | RN(arg) | ((argw & 0x1ff) << 12));
+
+       /* Slow cases */
+       if (compiler->cache_arg == SLJIT_MEM && argw != compiler->cache_argw) {
+               diff = argw - compiler->cache_argw;
+               if (!arg && diff <= 255 && diff >= -256)
+                       return push_inst(compiler, STUR_FI | ins_bits | VT(reg) | RN(TMP_REG3) | ((diff & 0x1ff) << 12));
+               if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) {
+                       FAIL_IF(compiler->error);
+                       compiler->cache_argw = argw;
+               }
+       }
+
+       if (compiler->cache_arg != SLJIT_MEM || argw != compiler->cache_argw) {
+               compiler->cache_arg = SLJIT_MEM;
+               compiler->cache_argw = argw;
+               FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
+       }
+
+       if (arg & REG_MASK)
+               return push_inst(compiler, STR_FR | ins_bits | VT(reg) | RN(arg) | RM(TMP_REG3));
+       return push_inst(compiler, STR_FI | ins_bits | VT(reg) | RN(TMP_REG3));
+}
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       sljit_si dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
+       sljit_ins inv_bits = (op & SLJIT_SINGLE_OP) ? (1 << 22) : 0;
+
+       if (GET_OPCODE(op) == SLJIT_CONVI_FROMD)
+               inv_bits |= (1 << 31);
+
+       if (src & SLJIT_MEM) {
+               emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) ? INT_SIZE : WORD_SIZE, TMP_FREG1, src, srcw);
+               src = TMP_FREG1;
+       }
+
+       FAIL_IF(push_inst(compiler, (FCVTZS ^ inv_bits) | RD(dst_r) | VN(src)));
+
+       if (dst_r == TMP_REG1 && dst != SLJIT_UNUSED)
+               return emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONVI_FROMD) ? INT_SIZE : WORD_SIZE) | STORE, TMP_REG1, dst, dstw);
+       return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
+       sljit_ins inv_bits = (op & SLJIT_SINGLE_OP) ? (1 << 22) : 0;
+
+       if (GET_OPCODE(op) == SLJIT_CONVD_FROMI)
+               inv_bits |= (1 << 31);
+
+       if (src & SLJIT_MEM) {
+               emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONVD_FROMI) ? INT_SIZE : WORD_SIZE), TMP_REG1, src, srcw);
+               src = TMP_REG1;
+       } else if (src & SLJIT_IMM) {
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+               if (GET_OPCODE(op) == SLJIT_CONVD_FROMI)
+                       srcw = (sljit_si)srcw;
+#endif
+               FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
+               src = TMP_REG1;
+       }
+
+       FAIL_IF(push_inst(compiler, (SCVTF ^ inv_bits) | VD(dst_r) | RN(src)));
+
+       if (dst & SLJIT_MEM)
+               return emit_fop_mem(compiler, ((op & SLJIT_SINGLE_OP) ? INT_SIZE : WORD_SIZE) | STORE, TMP_FREG1, dst, dstw);
+       return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       sljit_si mem_flags = (op & SLJIT_SINGLE_OP) ? INT_SIZE : WORD_SIZE;
+       sljit_ins inv_bits = (op & SLJIT_SINGLE_OP) ? (1 << 22) : 0;
+
+       if (src1 & SLJIT_MEM) {
+               emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w);
+               src1 = TMP_FREG1;
+       }
+
+       if (src2 & SLJIT_MEM) {
+               emit_fop_mem(compiler, mem_flags, TMP_FREG2, src2, src2w);
+               src2 = TMP_FREG2;
+       }
+
+       return push_inst(compiler, (FCMP ^ inv_bits) | VN(src1) | VM(src2));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       sljit_si dst_r, mem_flags = (op & SLJIT_SINGLE_OP) ? INT_SIZE : WORD_SIZE;
+       sljit_ins inv_bits;
+
+       CHECK_ERROR();
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+
+       SLJIT_COMPILE_ASSERT((INT_SIZE ^ 0x100) == WORD_SIZE, must_be_one_bit_difference);
+       SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
+
+       inv_bits = (op & SLJIT_SINGLE_OP) ? (1 << 22) : 0;
+       dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
+
+       if (src & SLJIT_MEM) {
+               emit_fop_mem(compiler, (GET_OPCODE(op) == SLJIT_CONVD_FROMS) ? (mem_flags ^ 0x100) : mem_flags, dst_r, src, srcw);
+               src = dst_r;
+       }
+
+       switch (GET_OPCODE(op)) {
+       case SLJIT_DMOV:
+               if (src != dst_r) {
+                       if (dst_r != TMP_FREG1)
+                               FAIL_IF(push_inst(compiler, (FMOV ^ inv_bits) | VD(dst_r) | VN(src)));
+                       else
+                               dst_r = src;
+               }
+               break;
+       case SLJIT_DNEG:
+               FAIL_IF(push_inst(compiler, (FNEG ^ inv_bits) | VD(dst_r) | VN(src)));
+               break;
+       case SLJIT_DABS:
+               FAIL_IF(push_inst(compiler, (FABS ^ inv_bits) | VD(dst_r) | VN(src)));
+               break;
+       case SLJIT_CONVD_FROMS:
+               FAIL_IF(push_inst(compiler, FCVT | ((op & SLJIT_SINGLE_OP) ? (1 << 22) : (1 << 15)) | VD(dst_r) | VN(src)));
+               break;
+       }
+
+       if (dst & SLJIT_MEM)
+               return emit_fop_mem(compiler, mem_flags | STORE, dst_r, dst, dstw);
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       sljit_si dst_r, mem_flags = (op & SLJIT_SINGLE_OP) ? INT_SIZE : WORD_SIZE;
+       sljit_ins inv_bits = (op & SLJIT_SINGLE_OP) ? (1 << 22) : 0;
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+       ADJUST_LOCAL_OFFSET(src1, src1w);
+       ADJUST_LOCAL_OFFSET(src2, src2w);
+
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+
+       dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
+       if (src1 & SLJIT_MEM) {
+               emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w);
+               src1 = TMP_FREG1;
+       }
+       if (src2 & SLJIT_MEM) {
+               emit_fop_mem(compiler, mem_flags, TMP_FREG2, src2, src2w);
+               src2 = TMP_FREG2;
+       }
+
+       switch (GET_OPCODE(op)) {
+       case SLJIT_DADD:
+               FAIL_IF(push_inst(compiler, (FADD ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2)));
+               break;
+       case SLJIT_DSUB:
+               FAIL_IF(push_inst(compiler, (FSUB ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2)));
+               break;
+       case SLJIT_DMUL:
+               FAIL_IF(push_inst(compiler, (FMUL ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2)));
+               break;
+       case SLJIT_DDIV:
+               FAIL_IF(push_inst(compiler, (FDIV ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2)));
+               break;
+       }
+
+       if (!(dst & SLJIT_MEM))
+               return SLJIT_SUCCESS;
+       return emit_fop_mem(compiler, mem_flags | STORE, TMP_FREG1, dst, dstw);
+}
+
+/* --------------------------------------------------------------------- */
+/*  Other instructions                                                   */
+/* --------------------------------------------------------------------- */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw)
+{
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+
+       /* For UNUSED dst. Uncommon, but possible. */
+       if (dst == SLJIT_UNUSED)
+               return SLJIT_SUCCESS;
+
+       if (FAST_IS_REG(dst))
+               return push_inst(compiler, ORR | RD(dst) | RN(TMP_ZERO) | RM(TMP_LR));
+
+       /* Memory. */
+       return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_LR, dst, dstw);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw)
+{
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       if (FAST_IS_REG(src))
+               FAIL_IF(push_inst(compiler, ORR | RD(TMP_LR) | RN(TMP_ZERO) | RM(src)));
+       else if (src & SLJIT_MEM)
+               FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_LR, src, srcw));
+       else if (src & SLJIT_IMM)
+               FAIL_IF(load_immediate(compiler, TMP_LR, srcw));
+
+       return push_inst(compiler, RET | RN(TMP_LR));
+}
+
+/* --------------------------------------------------------------------- */
+/*  Conditional instructions                                             */
+/* --------------------------------------------------------------------- */
+
+static sljit_uw get_cc(sljit_si type)
+{
+       switch (type) {
+       case SLJIT_EQUAL:
+       case SLJIT_MUL_NOT_OVERFLOW:
+       case SLJIT_D_EQUAL:
+               return 0x1;
+
+       case SLJIT_NOT_EQUAL:
+       case SLJIT_MUL_OVERFLOW:
+       case SLJIT_D_NOT_EQUAL:
+               return 0x0;
+
+       case SLJIT_LESS:
+       case SLJIT_D_LESS:
+               return 0x2;
+
+       case SLJIT_GREATER_EQUAL:
+       case SLJIT_D_GREATER_EQUAL:
+               return 0x3;
+
+       case SLJIT_GREATER:
+       case SLJIT_D_GREATER:
+               return 0x9;
+
+       case SLJIT_LESS_EQUAL:
+       case SLJIT_D_LESS_EQUAL:
+               return 0x8;
+
+       case SLJIT_SIG_LESS:
+               return 0xa;
+
+       case SLJIT_SIG_GREATER_EQUAL:
+               return 0xb;
+
+       case SLJIT_SIG_GREATER:
+               return 0xd;
+
+       case SLJIT_SIG_LESS_EQUAL:
+               return 0xc;
+
+       case SLJIT_OVERFLOW:
+       case SLJIT_D_UNORDERED:
+               return 0x7;
+
+       case SLJIT_NOT_OVERFLOW:
+       case SLJIT_D_ORDERED:
+               return 0x6;
+
+       default:
+               SLJIT_ASSERT_STOP();
+               return 0xe;
+       }
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
+{
+       struct sljit_label *label;
+
+       CHECK_ERROR_PTR();
+       CHECK_PTR(check_sljit_emit_label(compiler));
+
+       if (compiler->last_label && compiler->last_label->size == compiler->size)
+               return compiler->last_label;
+
+       label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
+       PTR_FAIL_IF(!label);
+       set_label(label, compiler);
+       return label;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type)
+{
+       struct sljit_jump *jump;
+
+       CHECK_ERROR_PTR();
+       CHECK_PTR(check_sljit_emit_jump(compiler, type));
+
+       jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
+       PTR_FAIL_IF(!jump);
+       set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
+       type &= 0xff;
+
+       if (type < SLJIT_JUMP) {
+               jump->flags |= IS_COND;
+               PTR_FAIL_IF(push_inst(compiler, B_CC | (6 << 5) | get_cc(type)));
+       }
+       else if (type >= SLJIT_FAST_CALL)
+               jump->flags |= IS_BL;
+
+       PTR_FAIL_IF(emit_imm64_const(compiler, TMP_REG1, 0));
+       jump->addr = compiler->size;
+       PTR_FAIL_IF(push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(TMP_REG1)));
+
+       return jump;
+}
+
+static SLJIT_INLINE struct sljit_jump* emit_cmp_to0(struct sljit_compiler *compiler, sljit_si type,
+       sljit_si src, sljit_sw srcw)
+{
+       struct sljit_jump *jump;
+       sljit_ins inv_bits = (type & SLJIT_INT_OP) ? (1 << 31) : 0;
+
+       SLJIT_ASSERT((type & 0xff) == SLJIT_EQUAL || (type & 0xff) == SLJIT_NOT_EQUAL);
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
+       PTR_FAIL_IF(!jump);
+       set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
+       jump->flags |= IS_CBZ | IS_COND;
+
+       if (src & SLJIT_MEM) {
+               PTR_FAIL_IF(emit_op_mem(compiler, inv_bits ? INT_SIZE : WORD_SIZE, TMP_REG1, src, srcw));
+               src = TMP_REG1;
+       }
+       else if (src & SLJIT_IMM) {
+               PTR_FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
+               src = TMP_REG1;
+       }
+       SLJIT_ASSERT(FAST_IS_REG(src));
+
+       if ((type & 0xff) == SLJIT_EQUAL)
+               inv_bits |= 1 << 24;
+
+       PTR_FAIL_IF(push_inst(compiler, (CBZ ^ inv_bits) | (6 << 5) | RT(src)));
+       PTR_FAIL_IF(emit_imm64_const(compiler, TMP_REG1, 0));
+       jump->addr = compiler->size;
+       PTR_FAIL_IF(push_inst(compiler, BR | RN(TMP_REG1)));
+       return jump;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw)
+{
+       struct sljit_jump *jump;
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       /* In ARM, we don't need to touch the arguments. */
+       if (!(src & SLJIT_IMM)) {
+               if (src & SLJIT_MEM) {
+                       FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw));
+                       src = TMP_REG1;
+               }
+               return push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(src));
+       }
+
+       jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
+       FAIL_IF(!jump);
+       set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0));
+       jump->u.target = srcw;
+
+       FAIL_IF(emit_imm64_const(compiler, TMP_REG1, 0));
+       jump->addr = compiler->size;
+       return push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(TMP_REG1));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw,
+       sljit_si type)
+{
+       sljit_si dst_r, flags, mem_flags;
+       sljit_ins cc;
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       if (dst == SLJIT_UNUSED)
+               return SLJIT_SUCCESS;
+
+       cc = get_cc(type & 0xff);
+       dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
+
+       if (GET_OPCODE(op) < SLJIT_ADD) {
+               FAIL_IF(push_inst(compiler, CSINC | (cc << 12) | RD(dst_r) | RN(TMP_ZERO) | RM(TMP_ZERO)));
+               if (dst_r != TMP_REG1)
+                       return SLJIT_SUCCESS;
+               return emit_op_mem(compiler, (GET_OPCODE(op) == SLJIT_MOV ? WORD_SIZE : INT_SIZE) | STORE, TMP_REG1, dst, dstw);
+       }
+
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+       flags = GET_FLAGS(op) ? SET_FLAGS : 0;
+       mem_flags = WORD_SIZE;
+       if (op & SLJIT_INT_OP) {
+               flags |= INT_OP;
+               mem_flags = INT_SIZE;
+       }
+
+       if (src & SLJIT_MEM) {
+               FAIL_IF(emit_op_mem2(compiler, mem_flags, TMP_REG1, src, srcw, dst, dstw));
+               src = TMP_REG1;
+               srcw = 0;
+       } else if (src & SLJIT_IMM)
+               flags |= ARG1_IMM;
+
+       FAIL_IF(push_inst(compiler, CSINC | (cc << 12) | RD(TMP_REG2) | RN(TMP_ZERO) | RM(TMP_ZERO)));
+       emit_op_imm(compiler, flags | GET_OPCODE(op), dst_r, src, TMP_REG2);
+
+       if (dst_r != TMP_REG1)
+               return SLJIT_SUCCESS;
+       return emit_op_mem2(compiler, mem_flags | STORE, TMP_REG1, dst, dstw, 0, 0);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value)
+{
+       struct sljit_const *const_;
+       sljit_si dst_r;
+
+       CHECK_ERROR_PTR();
+       CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+
+       const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
+       PTR_FAIL_IF(!const_);
+       set_const(const_, compiler);
+
+       dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
+       PTR_FAIL_IF(emit_imm64_const(compiler, dst_r, init_value));
+
+       if (dst & SLJIT_MEM)
+               PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw));
+       return const_;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
+{
+       sljit_ins* inst = (sljit_ins*)addr;
+       modify_imm64_const(inst, new_addr);
+       SLJIT_CACHE_FLUSH(inst, inst + 4);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
+{
+       sljit_ins* inst = (sljit_ins*)addr;
+       modify_imm64_const(inst, new_constant);
+       SLJIT_CACHE_FLUSH(inst, inst + 4);
+}
diff --git a/ext/pcre/pcrelib/sljit/sljitNativeARM_T2_32.c b/ext/pcre/pcrelib/sljit/sljitNativeARM_T2_32.c
new file mode 100644 (file)
index 0000000..6e38cec
--- /dev/null
@@ -0,0 +1,2058 @@
+/*
+ *    Stack-less Just-In-Time compiler
+ *
+ *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice, this list of
+ *      conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *      of conditions and the following disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
+{
+       return "ARM-Thumb2" SLJIT_CPUINFO;
+}
+
+/* Length of an instruction word. */
+typedef sljit_ui sljit_ins;
+
+/* Last register + 1. */
+#define TMP_REG1       (SLJIT_NUMBER_OF_REGISTERS + 2)
+#define TMP_REG2       (SLJIT_NUMBER_OF_REGISTERS + 3)
+#define TMP_REG3       (SLJIT_NUMBER_OF_REGISTERS + 4)
+#define TMP_PC         (SLJIT_NUMBER_OF_REGISTERS + 5)
+
+#define TMP_FREG1      (0)
+#define TMP_FREG2      (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
+
+/* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */
+static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = {
+       0, 0, 1, 2, 12, 11, 10, 9, 8, 7, 6, 5, 13, 3, 4, 14, 15
+};
+
+#define COPY_BITS(src, from, to, bits) \
+       ((from >= to ? (src >> (from - to)) : (src << (to - from))) & (((1 << bits) - 1) << to))
+
+/* Thumb16 encodings. */
+#define RD3(rd) (reg_map[rd])
+#define RN3(rn) (reg_map[rn] << 3)
+#define RM3(rm) (reg_map[rm] << 6)
+#define RDN3(rdn) (reg_map[rdn] << 8)
+#define IMM3(imm) (imm << 6)
+#define IMM8(imm) (imm)
+
+/* Thumb16 helpers. */
+#define SET_REGS44(rd, rn) \
+       ((reg_map[rn] << 3) | (reg_map[rd] & 0x7) | ((reg_map[rd] & 0x8) << 4))
+#define IS_2_LO_REGS(reg1, reg2) \
+       (reg_map[reg1] <= 7 && reg_map[reg2] <= 7)
+#define IS_3_LO_REGS(reg1, reg2, reg3) \
+       (reg_map[reg1] <= 7 && reg_map[reg2] <= 7 && reg_map[reg3] <= 7)
+
+/* Thumb32 encodings. */
+#define RD4(rd) (reg_map[rd] << 8)
+#define RN4(rn) (reg_map[rn] << 16)
+#define RM4(rm) (reg_map[rm])
+#define RT4(rt) (reg_map[rt] << 12)
+#define DD4(dd) ((dd) << 12)
+#define DN4(dn) ((dn) << 16)
+#define DM4(dm) (dm)
+#define IMM5(imm) \
+       (COPY_BITS(imm, 2, 12, 3) | ((imm & 0x3) << 6))
+#define IMM12(imm) \
+       (COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff))
+
+/* --------------------------------------------------------------------- */
+/*  Instrucion forms                                                     */
+/* --------------------------------------------------------------------- */
+
+/* dot '.' changed to _
+   I immediate form (possibly followed by number of immediate bits). */
+#define ADCI           0xf1400000
+#define ADCS           0x4140
+#define ADC_W          0xeb400000
+#define ADD            0x4400
+#define ADDS           0x1800
+#define ADDSI3         0x1c00
+#define ADDSI8         0x3000
+#define ADD_W          0xeb000000
+#define ADDWI          0xf2000000
+#define ADD_SP         0xb000
+#define ADD_W          0xeb000000
+#define ADD_WI         0xf1000000
+#define ANDI           0xf0000000
+#define ANDS           0x4000
+#define AND_W          0xea000000
+#define ASRS           0x4100
+#define ASRSI          0x1000
+#define ASR_W          0xfa40f000
+#define ASR_WI         0xea4f0020
+#define BICI           0xf0200000
+#define BKPT           0xbe00
+#define BLX            0x4780
+#define BX             0x4700
+#define CLZ            0xfab0f080
+#define CMPI           0x2800
+#define CMP_W          0xebb00f00
+#define EORI           0xf0800000
+#define EORS           0x4040
+#define EOR_W          0xea800000
+#define IT             0xbf00
+#define LSLS           0x4080
+#define LSLSI          0x0000
+#define LSL_W          0xfa00f000
+#define LSL_WI         0xea4f0000
+#define LSRS           0x40c0
+#define LSRSI          0x0800
+#define LSR_W          0xfa20f000
+#define LSR_WI         0xea4f0010
+#define MOV            0x4600
+#define MOVS           0x0000
+#define MOVSI          0x2000
+#define MOVT           0xf2c00000
+#define MOVW           0xf2400000
+#define MOV_W          0xea4f0000
+#define MOV_WI         0xf04f0000
+#define MUL            0xfb00f000
+#define MVNS           0x43c0
+#define MVN_W          0xea6f0000
+#define MVN_WI         0xf06f0000
+#define NOP            0xbf00
+#define ORNI           0xf0600000
+#define ORRI           0xf0400000
+#define ORRS           0x4300
+#define ORR_W          0xea400000
+#define POP            0xbc00
+#define POP_W          0xe8bd0000
+#define PUSH           0xb400
+#define PUSH_W         0xe92d0000
+#define RSB_WI         0xf1c00000
+#define RSBSI          0x4240
+#define SBCI           0xf1600000
+#define SBCS           0x4180
+#define SBC_W          0xeb600000
+#define SMULL          0xfb800000
+#define STR_SP         0x9000
+#define SUBS           0x1a00
+#define SUBSI3         0x1e00
+#define SUBSI8         0x3800
+#define SUB_W          0xeba00000
+#define SUBWI          0xf2a00000
+#define SUB_SP         0xb080
+#define SUB_WI         0xf1a00000
+#define SXTB           0xb240
+#define SXTB_W         0xfa4ff080
+#define SXTH           0xb200
+#define SXTH_W         0xfa0ff080
+#define TST            0x4200
+#define UMULL          0xfba00000
+#define UXTB           0xb2c0
+#define UXTB_W         0xfa5ff080
+#define UXTH           0xb280
+#define UXTH_W         0xfa1ff080
+#define VABS_F32       0xeeb00ac0
+#define VADD_F32       0xee300a00
+#define VCMP_F32       0xeeb40a40
+#define VCVT_F32_S32   0xeeb80ac0
+#define VCVT_F64_F32   0xeeb70ac0
+#define VCVT_S32_F32   0xeebd0ac0
+#define VDIV_F32       0xee800a00
+#define VMOV_F32       0xeeb00a40
+#define VMOV           0xee000a10
+#define VMRS           0xeef1fa10
+#define VMUL_F32       0xee200a00
+#define VNEG_F32       0xeeb10a40
+#define VSTR_F32       0xed000a00
+#define VSUB_F32       0xee300a40
+
+static sljit_si push_inst16(struct sljit_compiler *compiler, sljit_ins inst)
+{
+       sljit_uh *ptr;
+       SLJIT_ASSERT(!(inst & 0xffff0000));
+
+       ptr = (sljit_uh*)ensure_buf(compiler, sizeof(sljit_uh));
+       FAIL_IF(!ptr);
+       *ptr = inst;
+       compiler->size++;
+       return SLJIT_SUCCESS;
+}
+
+static sljit_si push_inst32(struct sljit_compiler *compiler, sljit_ins inst)
+{
+       sljit_uh *ptr = (sljit_uh*)ensure_buf(compiler, sizeof(sljit_ins));
+       FAIL_IF(!ptr);
+       *ptr++ = inst >> 16;
+       *ptr = inst;
+       compiler->size += 2;
+       return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_si emit_imm32_const(struct sljit_compiler *compiler, sljit_si dst, sljit_uw imm)
+{
+       FAIL_IF(push_inst32(compiler, MOVW | RD4(dst) |
+               COPY_BITS(imm, 12, 16, 4) | COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff)));
+       return push_inst32(compiler, MOVT | RD4(dst) |
+               COPY_BITS(imm, 12 + 16, 16, 4) | COPY_BITS(imm, 11 + 16, 26, 1) | COPY_BITS(imm, 8 + 16, 12, 3) | ((imm & 0xff0000) >> 16));
+}
+
+static SLJIT_INLINE void modify_imm32_const(sljit_uh *inst, sljit_uw new_imm)
+{
+       sljit_si dst = inst[1] & 0x0f00;
+       SLJIT_ASSERT(((inst[0] & 0xfbf0) == (MOVW >> 16)) && ((inst[2] & 0xfbf0) == (MOVT >> 16)) && dst == (inst[3] & 0x0f00));
+       inst[0] = (MOVW >> 16) | COPY_BITS(new_imm, 12, 0, 4) | COPY_BITS(new_imm, 11, 10, 1);
+       inst[1] = dst | COPY_BITS(new_imm, 8, 12, 3) | (new_imm & 0xff);
+       inst[2] = (MOVT >> 16) | COPY_BITS(new_imm, 12 + 16, 0, 4) | COPY_BITS(new_imm, 11 + 16, 10, 1);
+       inst[3] = dst | COPY_BITS(new_imm, 8 + 16, 12, 3) | ((new_imm & 0xff0000) >> 16);
+}
+
+static SLJIT_INLINE sljit_si detect_jump_type(struct sljit_jump *jump, sljit_uh *code_ptr, sljit_uh *code)
+{
+       sljit_sw diff;
+
+       if (jump->flags & SLJIT_REWRITABLE_JUMP)
+               return 0;
+
+       if (jump->flags & JUMP_ADDR) {
+               /* Branch to ARM code is not optimized yet. */
+               if (!(jump->u.target & 0x1))
+                       return 0;
+               diff = ((sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2)) >> 1;
+       }
+       else {
+               SLJIT_ASSERT(jump->flags & JUMP_LABEL);
+               diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)(code_ptr + 2)) >> 1;
+       }
+
+       if (jump->flags & IS_COND) {
+               SLJIT_ASSERT(!(jump->flags & IS_BL));
+               if (diff <= 127 && diff >= -128) {
+                       jump->flags |= PATCH_TYPE1;
+                       return 5;
+               }
+               if (diff <= 524287 && diff >= -524288) {
+                       jump->flags |= PATCH_TYPE2;
+                       return 4;
+               }
+               /* +1 comes from the prefix IT instruction. */
+               diff--;
+               if (diff <= 8388607 && diff >= -8388608) {
+                       jump->flags |= PATCH_TYPE3;
+                       return 3;
+               }
+       }
+       else if (jump->flags & IS_BL) {
+               if (diff <= 8388607 && diff >= -8388608) {
+                       jump->flags |= PATCH_BL;
+                       return 3;
+               }
+       }
+       else {
+               if (diff <= 1023 && diff >= -1024) {
+                       jump->flags |= PATCH_TYPE4;
+                       return 4;
+               }
+               if (diff <= 8388607 && diff >= -8388608) {
+                       jump->flags |= PATCH_TYPE5;
+                       return 3;
+               }
+       }
+
+       return 0;
+}
+
+static SLJIT_INLINE void set_jump_instruction(struct sljit_jump *jump)
+{
+       sljit_si type = (jump->flags >> 4) & 0xf;
+       sljit_sw diff;
+       sljit_uh *jump_inst;
+       sljit_si s, j1, j2;
+
+       if (SLJIT_UNLIKELY(type == 0)) {
+               modify_imm32_const((sljit_uh*)jump->addr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target);
+               return;
+       }
+
+       if (jump->flags & JUMP_ADDR) {
+               SLJIT_ASSERT(jump->u.target & 0x1);
+               diff = ((sljit_sw)jump->u.target - (sljit_sw)(jump->addr + 4)) >> 1;
+       }
+       else
+               diff = ((sljit_sw)(jump->u.label->addr) - (sljit_sw)(jump->addr + 4)) >> 1;
+       jump_inst = (sljit_uh*)jump->addr;
+
+       switch (type) {
+       case 1:
+               /* Encoding T1 of 'B' instruction */
+               SLJIT_ASSERT(diff <= 127 && diff >= -128 && (jump->flags & IS_COND));
+               jump_inst[0] = 0xd000 | (jump->flags & 0xf00) | (diff & 0xff);
+               return;
+       case 2:
+               /* Encoding T3 of 'B' instruction */
+               SLJIT_ASSERT(diff <= 524287 && diff >= -524288 && (jump->flags & IS_COND));
+               jump_inst[0] = 0xf000 | COPY_BITS(jump->flags, 8, 6, 4) | COPY_BITS(diff, 11, 0, 6) | COPY_BITS(diff, 19, 10, 1);
+               jump_inst[1] = 0x8000 | COPY_BITS(diff, 17, 13, 1) | COPY_BITS(diff, 18, 11, 1) | (diff & 0x7ff);
+               return;
+       case 3:
+               SLJIT_ASSERT(jump->flags & IS_COND);
+               *jump_inst++ = IT | ((jump->flags >> 4) & 0xf0) | 0x8;
+               diff--;
+               type = 5;
+               break;
+       case 4:
+               /* Encoding T2 of 'B' instruction */
+               SLJIT_ASSERT(diff <= 1023 && diff >= -1024 && !(jump->flags & IS_COND));
+               jump_inst[0] = 0xe000 | (diff & 0x7ff);
+               return;
+       }
+
+       SLJIT_ASSERT(diff <= 8388607 && diff >= -8388608);
+
+       /* Really complex instruction form for branches. */
+       s = (diff >> 23) & 0x1;
+       j1 = (~(diff >> 21) ^ s) & 0x1;
+       j2 = (~(diff >> 22) ^ s) & 0x1;
+       jump_inst[0] = 0xf000 | (s << 10) | COPY_BITS(diff, 11, 0, 10);
+       jump_inst[1] = (j1 << 13) | (j2 << 11) | (diff & 0x7ff);
+
+       /* The others have a common form. */
+       if (type == 5) /* Encoding T4 of 'B' instruction */
+               jump_inst[1] |= 0x9000;
+       else if (type == 6) /* Encoding T1 of 'BL' instruction */
+               jump_inst[1] |= 0xd000;
+       else
+               SLJIT_ASSERT_STOP();
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
+{
+       struct sljit_memory_fragment *buf;
+       sljit_uh *code;
+       sljit_uh *code_ptr;
+       sljit_uh *buf_ptr;
+       sljit_uh *buf_end;
+       sljit_uw half_count;
+
+       struct sljit_label *label;
+       struct sljit_jump *jump;
+       struct sljit_const *const_;
+
+       CHECK_ERROR_PTR();
+       CHECK_PTR(check_sljit_generate_code(compiler));
+       reverse_buf(compiler);
+
+       code = (sljit_uh*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_uh));
+       PTR_FAIL_WITH_EXEC_IF(code);
+       buf = compiler->buf;
+
+       code_ptr = code;
+       half_count = 0;
+       label = compiler->labels;
+       jump = compiler->jumps;
+       const_ = compiler->consts;
+
+       do {
+               buf_ptr = (sljit_uh*)buf->memory;
+               buf_end = buf_ptr + (buf->used_size >> 1);
+               do {
+                       *code_ptr = *buf_ptr++;
+                       /* These structures are ordered by their address. */
+                       SLJIT_ASSERT(!label || label->size >= half_count);
+                       SLJIT_ASSERT(!jump || jump->addr >= half_count);
+                       SLJIT_ASSERT(!const_ || const_->addr >= half_count);
+                       if (label && label->size == half_count) {
+                               label->addr = ((sljit_uw)code_ptr) | 0x1;
+                               label->size = code_ptr - code;
+                               label = label->next;
+                       }
+                       if (jump && jump->addr == half_count) {
+                                       jump->addr = (sljit_uw)code_ptr - ((jump->flags & IS_COND) ? 10 : 8);
+                                       code_ptr -= detect_jump_type(jump, code_ptr, code);
+                                       jump = jump->next;
+                       }
+                       if (const_ && const_->addr == half_count) {
+                               const_->addr = (sljit_uw)code_ptr;
+                               const_ = const_->next;
+                       }
+                       code_ptr ++;
+                       half_count ++;
+               } while (buf_ptr < buf_end);
+
+               buf = buf->next;
+       } while (buf);
+
+       if (label && label->size == half_count) {
+               label->addr = ((sljit_uw)code_ptr) | 0x1;
+               label->size = code_ptr - code;
+               label = label->next;
+       }
+
+       SLJIT_ASSERT(!label);
+       SLJIT_ASSERT(!jump);
+       SLJIT_ASSERT(!const_);
+       SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
+
+       jump = compiler->jumps;
+       while (jump) {
+               set_jump_instruction(jump);
+               jump = jump->next;
+       }
+
+       compiler->error = SLJIT_ERR_COMPILED;
+       compiler->executable_size = (code_ptr - code) * sizeof(sljit_uh);
+       SLJIT_CACHE_FLUSH(code, code_ptr);
+       /* Set thumb mode flag. */
+       return (void*)((sljit_uw)code | 0x1);
+}
+
+/* --------------------------------------------------------------------- */
+/*  Core code generator functions.                                       */
+/* --------------------------------------------------------------------- */
+
+#define INVALID_IMM    0x80000000
+static sljit_uw get_imm(sljit_uw imm)
+{
+       /* Thumb immediate form. */
+       sljit_si counter;
+
+       if (imm <= 0xff)
+               return imm;
+
+       if ((imm & 0xffff) == (imm >> 16)) {
+               /* Some special cases. */
+               if (!(imm & 0xff00))
+                       return (1 << 12) | (imm & 0xff);
+               if (!(imm & 0xff))
+                       return (2 << 12) | ((imm >> 8) & 0xff);
+               if ((imm & 0xff00) == ((imm & 0xff) << 8))
+                       return (3 << 12) | (imm & 0xff);
+       }
+
+       /* Assembly optimization: count leading zeroes? */
+       counter = 8;
+       if (!(imm & 0xffff0000)) {
+               counter += 16;
+               imm <<= 16;
+       }
+       if (!(imm & 0xff000000)) {
+               counter += 8;
+               imm <<= 8;
+       }
+       if (!(imm & 0xf0000000)) {
+               counter += 4;
+               imm <<= 4;
+       }
+       if (!(imm & 0xc0000000)) {
+               counter += 2;
+               imm <<= 2;
+       }
+       if (!(imm & 0x80000000)) {
+               counter += 1;
+               imm <<= 1;
+       }
+       /* Since imm >= 128, this must be true. */
+       SLJIT_ASSERT(counter <= 31);
+
+       if (imm & 0x00ffffff)
+               return INVALID_IMM; /* Cannot be encoded. */
+
+       return ((imm >> 24) & 0x7f) | COPY_BITS(counter, 4, 26, 1) | COPY_BITS(counter, 1, 12, 3) | COPY_BITS(counter, 0, 7, 1);
+}
+
+static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si dst, sljit_uw imm)
+{
+       sljit_uw tmp;
+
+       if (imm >= 0x10000) {
+               tmp = get_imm(imm);
+               if (tmp != INVALID_IMM)
+                       return push_inst32(compiler, MOV_WI | RD4(dst) | tmp);
+               tmp = get_imm(~imm);
+               if (tmp != INVALID_IMM)
+                       return push_inst32(compiler, MVN_WI | RD4(dst) | tmp);
+       }
+
+       /* set low 16 bits, set hi 16 bits to 0. */
+       FAIL_IF(push_inst32(compiler, MOVW | RD4(dst) |
+               COPY_BITS(imm, 12, 16, 4) | COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff)));
+
+       /* set hi 16 bit if needed. */
+       if (imm >= 0x10000)
+               return push_inst32(compiler, MOVT | RD4(dst) |
+                       COPY_BITS(imm, 12 + 16, 16, 4) | COPY_BITS(imm, 11 + 16, 26, 1) | COPY_BITS(imm, 8 + 16, 12, 3) | ((imm & 0xff0000) >> 16));
+       return SLJIT_SUCCESS;
+}
+
+#define ARG1_IMM       0x0010000
+#define ARG2_IMM       0x0020000
+#define KEEP_FLAGS     0x0040000
+/* SET_FLAGS must be 0x100000 as it is also the value of S bit (can be used for optimization). */
+#define SET_FLAGS      0x0100000
+#define UNUSED_RETURN  0x0200000
+#define SLOW_DEST      0x0400000
+#define SLOW_SRC1      0x0800000
+#define SLOW_SRC2      0x1000000
+
+static sljit_si emit_op_imm(struct sljit_compiler *compiler, sljit_si flags, sljit_si dst, sljit_uw arg1, sljit_uw arg2)
+{
+       /* dst must be register, TMP_REG1
+          arg1 must be register, TMP_REG1, imm
+          arg2 must be register, TMP_REG2, imm */
+       sljit_si reg;
+       sljit_uw imm, nimm;
+
+       if (SLJIT_UNLIKELY((flags & (ARG1_IMM | ARG2_IMM)) == (ARG1_IMM | ARG2_IMM))) {
+               /* Both are immediates. */
+               flags &= ~ARG1_IMM;
+               FAIL_IF(load_immediate(compiler, TMP_REG1, arg1));
+               arg1 = TMP_REG1;
+       }
+
+       if (flags & (ARG1_IMM | ARG2_IMM)) {
+               reg = (flags & ARG2_IMM) ? arg1 : arg2;
+               imm = (flags & ARG2_IMM) ? arg2 : arg1;
+
+               switch (flags & 0xffff) {
+               case SLJIT_CLZ:
+               case SLJIT_MUL:
+                       /* No form with immediate operand. */
+                       break;
+               case SLJIT_MOV:
+                       SLJIT_ASSERT(!(flags & SET_FLAGS) && (flags & ARG2_IMM) && arg1 == TMP_REG1);
+                       return load_immediate(compiler, dst, imm);
+               case SLJIT_NOT:
+                       if (!(flags & SET_FLAGS))
+                               return load_immediate(compiler, dst, ~imm);
+                       /* Since the flags should be set, we just fallback to the register mode.
+                          Although some clever things could be done here, "NOT IMM" does not worth the efforts. */
+                       break;
+               case SLJIT_ADD:
+                       nimm = -imm;
+                       if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(reg, dst)) {
+                               if (imm <= 0x7)
+                                       return push_inst16(compiler, ADDSI3 | IMM3(imm) | RD3(dst) | RN3(reg));
+                               if (nimm <= 0x7)
+                                       return push_inst16(compiler, SUBSI3 | IMM3(nimm) | RD3(dst) | RN3(reg));
+                               if (reg == dst) {
+                                       if (imm <= 0xff)
+                                               return push_inst16(compiler, ADDSI8 | IMM8(imm) | RDN3(dst));
+                                       if (nimm <= 0xff)
+                                               return push_inst16(compiler, SUBSI8 | IMM8(nimm) | RDN3(dst));
+                               }
+                       }
+                       if (!(flags & SET_FLAGS)) {
+                               if (imm <= 0xfff)
+                                       return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(imm));
+                               if (nimm <= 0xfff)
+                                       return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(nimm));
+                       }
+                       imm = get_imm(imm);
+                       if (imm != INVALID_IMM)
+                               return push_inst32(compiler, ADD_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
+                       break;
+               case SLJIT_ADDC:
+                       imm = get_imm(imm);
+                       if (imm != INVALID_IMM)
+                               return push_inst32(compiler, ADCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
+                       break;
+               case SLJIT_SUB:
+                       if (flags & ARG1_IMM) {
+                               if (!(flags & KEEP_FLAGS) && imm == 0 && IS_2_LO_REGS(reg, dst))
+                                       return push_inst16(compiler, RSBSI | RD3(dst) | RN3(reg));
+                               imm = get_imm(imm);
+                               if (imm != INVALID_IMM)
+                                       return push_inst32(compiler, RSB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
+                               break;
+                       }
+                       nimm = -imm;
+                       if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(reg, dst)) {
+                               if (imm <= 0x7)
+                                       return push_inst16(compiler, SUBSI3 | IMM3(imm) | RD3(dst) | RN3(reg));
+                               if (nimm <= 0x7)
+                                       return push_inst16(compiler, ADDSI3 | IMM3(nimm) | RD3(dst) | RN3(reg));
+                               if (reg == dst) {
+                                       if (imm <= 0xff)
+                                               return push_inst16(compiler, SUBSI8 | IMM8(imm) | RDN3(dst));
+                                       if (nimm <= 0xff)
+                                               return push_inst16(compiler, ADDSI8 | IMM8(nimm) | RDN3(dst));
+                               }
+                               if (imm <= 0xff && (flags & UNUSED_RETURN))
+                                       return push_inst16(compiler, CMPI | IMM8(imm) | RDN3(reg));
+                       }
+                       if (!(flags & SET_FLAGS)) {
+                               if (imm <= 0xfff)
+                                       return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(imm));
+                               if (nimm <= 0xfff)
+                                       return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(nimm));
+                       }
+                       imm = get_imm(imm);
+                       if (imm != INVALID_IMM)
+                               return push_inst32(compiler, SUB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
+                       break;
+               case SLJIT_SUBC:
+                       if (flags & ARG1_IMM)
+                               break;
+                       imm = get_imm(imm);
+                       if (imm != INVALID_IMM)
+                               return push_inst32(compiler, SBCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
+                       break;
+               case SLJIT_AND:
+                       nimm = get_imm(imm);
+                       if (nimm != INVALID_IMM)
+                               return push_inst32(compiler, ANDI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | nimm);
+                       imm = get_imm(imm);
+                       if (imm != INVALID_IMM)
+                               return push_inst32(compiler, BICI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
+                       break;
+               case SLJIT_OR:
+                       nimm = get_imm(imm);
+                       if (nimm != INVALID_IMM)
+                               return push_inst32(compiler, ORRI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | nimm);
+                       imm = get_imm(imm);
+                       if (imm != INVALID_IMM)
+                               return push_inst32(compiler, ORNI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
+                       break;
+               case SLJIT_XOR:
+                       imm = get_imm(imm);
+                       if (imm != INVALID_IMM)
+                               return push_inst32(compiler, EORI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
+                       break;
+               case SLJIT_SHL:
+               case SLJIT_LSHR:
+               case SLJIT_ASHR:
+                       if (flags & ARG1_IMM)
+                               break;
+                       imm &= 0x1f;
+                       if (imm == 0) {
+                               if (!(flags & SET_FLAGS))
+                                       return push_inst16(compiler, MOV | SET_REGS44(dst, reg));
+                               if (IS_2_LO_REGS(dst, reg))
+                                       return push_inst16(compiler, MOVS | RD3(dst) | RN3(reg));
+                               return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(dst) | RM4(reg));
+                       }
+                       switch (flags & 0xffff) {
+                       case SLJIT_SHL:
+                               if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, reg))
+                                       return push_inst16(compiler, LSLSI | RD3(dst) | RN3(reg) | (imm << 6));
+                               return push_inst32(compiler, LSL_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm));
+                       case SLJIT_LSHR:
+                               if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, reg))
+                                       return push_inst16(compiler, LSRSI | RD3(dst) | RN3(reg) | (imm << 6));
+                               return push_inst32(compiler, LSR_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm));
+                       default: /* SLJIT_ASHR */
+                               if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, reg))
+                                       return push_inst16(compiler, ASRSI | RD3(dst) | RN3(reg) | (imm << 6));
+                               return push_inst32(compiler, ASR_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm));
+                       }
+               default:
+                       SLJIT_ASSERT_STOP();
+                       break;
+               }
+
+               if (flags & ARG2_IMM) {
+                       FAIL_IF(load_immediate(compiler, TMP_REG2, arg2));
+                       arg2 = TMP_REG2;
+               }
+               else {
+                       FAIL_IF(load_immediate(compiler, TMP_REG1, arg1));
+                       arg1 = TMP_REG1;
+               }
+       }
+
+       /* Both arguments are registers. */
+       switch (flags & 0xffff) {
+       case SLJIT_MOV:
+       case SLJIT_MOV_UI:
+       case SLJIT_MOV_SI:
+       case SLJIT_MOV_P:
+       case SLJIT_MOVU:
+       case SLJIT_MOVU_UI:
+       case SLJIT_MOVU_SI:
+       case SLJIT_MOVU_P:
+               SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
+               if (dst == arg2)
+                       return SLJIT_SUCCESS;
+               return push_inst16(compiler, MOV | SET_REGS44(dst, arg2));
+       case SLJIT_MOV_UB:
+       case SLJIT_MOVU_UB:
+               SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
+               if (IS_2_LO_REGS(dst, arg2))
+                       return push_inst16(compiler, UXTB | RD3(dst) | RN3(arg2));
+               return push_inst32(compiler, UXTB_W | RD4(dst) | RM4(arg2));
+       case SLJIT_MOV_SB:
+       case SLJIT_MOVU_SB:
+               SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
+               if (IS_2_LO_REGS(dst, arg2))
+                       return push_inst16(compiler, SXTB | RD3(dst) | RN3(arg2));
+               return push_inst32(compiler, SXTB_W | RD4(dst) | RM4(arg2));
+       case SLJIT_MOV_UH:
+       case SLJIT_MOVU_UH:
+               SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
+               if (IS_2_LO_REGS(dst, arg2))
+                       return push_inst16(compiler, UXTH | RD3(dst) | RN3(arg2));
+               return push_inst32(compiler, UXTH_W | RD4(dst) | RM4(arg2));
+       case SLJIT_MOV_SH:
+       case SLJIT_MOVU_SH:
+               SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
+               if (IS_2_LO_REGS(dst, arg2))
+                       return push_inst16(compiler, SXTH | RD3(dst) | RN3(arg2));
+               return push_inst32(compiler, SXTH_W | RD4(dst) | RM4(arg2));
+       case SLJIT_NOT:
+               SLJIT_ASSERT(arg1 == TMP_REG1);
+               if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2))
+                       return push_inst16(compiler, MVNS | RD3(dst) | RN3(arg2));
+               return push_inst32(compiler, MVN_W | (flags & SET_FLAGS) | RD4(dst) | RM4(arg2));
+       case SLJIT_CLZ:
+               SLJIT_ASSERT(arg1 == TMP_REG1);
+               FAIL_IF(push_inst32(compiler, CLZ | RN4(arg2) | RD4(dst) | RM4(arg2)));
+               if (flags & SET_FLAGS) {
+                       if (reg_map[dst] <= 7)
+                               return push_inst16(compiler, CMPI | RDN3(dst));
+                       return push_inst32(compiler, ADD_WI | SET_FLAGS | RN4(dst) | RD4(dst));
+               }
+               return SLJIT_SUCCESS;
+       case SLJIT_ADD:
+               if (!(flags & KEEP_FLAGS) && IS_3_LO_REGS(dst, arg1, arg2))
+                       return push_inst16(compiler, ADDS | RD3(dst) | RN3(arg1) | RM3(arg2));
+               if (dst == arg1 && !(flags & SET_FLAGS))
+                       return push_inst16(compiler, ADD | SET_REGS44(dst, arg2));
+               return push_inst32(compiler, ADD_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
+       case SLJIT_ADDC:
+               if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2))
+                       return push_inst16(compiler, ADCS | RD3(dst) | RN3(arg2));
+               return push_inst32(compiler, ADC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
+       case SLJIT_SUB:
+               if (!(flags & KEEP_FLAGS) && IS_3_LO_REGS(dst, arg1, arg2))
+                       return push_inst16(compiler, SUBS | RD3(dst) | RN3(arg1) | RM3(arg2));
+               return push_inst32(compiler, SUB_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
+       case SLJIT_SUBC:
+               if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2))
+                       return push_inst16(compiler, SBCS | RD3(dst) | RN3(arg2));
+               return push_inst32(compiler, SBC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
+       case SLJIT_MUL:
+               if (!(flags & SET_FLAGS))
+                       return push_inst32(compiler, MUL | RD4(dst) | RN4(arg1) | RM4(arg2));
+               SLJIT_ASSERT(reg_map[TMP_REG2] <= 7 && dst != TMP_REG2);
+               FAIL_IF(push_inst32(compiler, SMULL | RT4(dst) | RD4(TMP_REG2) | RN4(arg1) | RM4(arg2)));
+               /* cmp TMP_REG2, dst asr #31. */
+               return push_inst32(compiler, CMP_W | RN4(TMP_REG2) | 0x70e0 | RM4(dst));
+       case SLJIT_AND:
+               if (!(flags & KEEP_FLAGS)) {
+                       if (dst == arg1 && IS_2_LO_REGS(dst, arg2))
+                               return push_inst16(compiler, ANDS | RD3(dst) | RN3(arg2));
+                       if ((flags & UNUSED_RETURN) && IS_2_LO_REGS(arg1, arg2))
+                               return push_inst16(compiler, TST | RD3(arg1) | RN3(arg2));
+               }
+               return push_inst32(compiler, AND_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
+       case SLJIT_OR:
+               if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2))
+                       return push_inst16(compiler, ORRS | RD3(dst) | RN3(arg2));
+               return push_inst32(compiler, ORR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
+       case SLJIT_XOR:
+               if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2))
+                       return push_inst16(compiler, EORS | RD3(dst) | RN3(arg2));
+               return push_inst32(compiler, EOR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
+       case SLJIT_SHL:
+               if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2))
+                       return push_inst16(compiler, LSLS | RD3(dst) | RN3(arg2));
+               return push_inst32(compiler, LSL_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
+       case SLJIT_LSHR:
+               if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2))
+                       return push_inst16(compiler, LSRS | RD3(dst) | RN3(arg2));
+               return push_inst32(compiler, LSR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
+       case SLJIT_ASHR:
+               if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2))
+                       return push_inst16(compiler, ASRS | RD3(dst) | RN3(arg2));
+               return push_inst32(compiler, ASR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
+       }
+
+       SLJIT_ASSERT_STOP();
+       return SLJIT_SUCCESS;
+}
+
+#define STORE          0x01
+#define SIGNED         0x02
+
+#define WORD_SIZE      0x00
+#define BYTE_SIZE      0x04
+#define HALF_SIZE      0x08
+
+#define UPDATE         0x10
+#define ARG_TEST       0x20
+
+#define IS_WORD_SIZE(flags)            (!(flags & (BYTE_SIZE | HALF_SIZE)))
+#define OFFSET_CHECK(imm, shift)       (!(argw & ~(imm << shift)))
+
+/*
+  1st letter:
+  w = word
+  b = byte
+  h = half
+
+  2nd letter:
+  s = signed
+  u = unsigned
+
+  3rd letter:
+  l = load
+  s = store
+*/
+
+static SLJIT_CONST sljit_ins sljit_mem16[12] = {
+/* w u l */ 0x5800 /* ldr */,
+/* w u s */ 0x5000 /* str */,
+/* w s l */ 0x5800 /* ldr */,
+/* w s s */ 0x5000 /* str */,
+
+/* b u l */ 0x5c00 /* ldrb */,
+/* b u s */ 0x5400 /* strb */,
+/* b s l */ 0x5600 /* ldrsb */,
+/* b s s */ 0x5400 /* strb */,
+
+/* h u l */ 0x5a00 /* ldrh */,
+/* h u s */ 0x5200 /* strh */,
+/* h s l */ 0x5e00 /* ldrsh */,
+/* h s s */ 0x5200 /* strh */,
+};
+
+static SLJIT_CONST sljit_ins sljit_mem16_imm5[12] = {
+/* w u l */ 0x6800 /* ldr imm5 */,
+/* w u s */ 0x6000 /* str imm5 */,
+/* w s l */ 0x6800 /* ldr imm5 */,
+/* w s s */ 0x6000 /* str imm5 */,
+
+/* b u l */ 0x7800 /* ldrb imm5 */,
+/* b u s */ 0x7000 /* strb imm5 */,
+/* b s l */ 0x0000 /* not allowed */,
+/* b s s */ 0x7000 /* strb imm5 */,
+
+/* h u l */ 0x8800 /* ldrh imm5 */,
+/* h u s */ 0x8000 /* strh imm5 */,
+/* h s l */ 0x0000 /* not allowed */,
+/* h s s */ 0x8000 /* strh imm5 */,
+};
+
+#define MEM_IMM8       0xc00
+#define MEM_IMM12      0x800000
+static SLJIT_CONST sljit_ins sljit_mem32[12] = {
+/* w u l */ 0xf8500000 /* ldr.w */,
+/* w u s */ 0xf8400000 /* str.w */,
+/* w s l */ 0xf8500000 /* ldr.w */,
+/* w s s */ 0xf8400000 /* str.w */,
+
+/* b u l */ 0xf8100000 /* ldrb.w */,
+/* b u s */ 0xf8000000 /* strb.w */,
+/* b s l */ 0xf9100000 /* ldrsb.w */,
+/* b s s */ 0xf8000000 /* strb.w */,
+
+/* h u l */ 0xf8300000 /* ldrh.w */,
+/* h u s */ 0xf8200000 /* strsh.w */,
+/* h s l */ 0xf9300000 /* ldrsh.w */,
+/* h s s */ 0xf8200000 /* strsh.w */,
+};
+
+/* Helper function. Dst should be reg + value, using at most 1 instruction, flags does not set. */
+static sljit_si emit_set_delta(struct sljit_compiler *compiler, sljit_si dst, sljit_si reg, sljit_sw value)
+{
+       if (value >= 0) {
+               if (value <= 0xfff)
+                       return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(value));
+               value = get_imm(value);
+               if (value != INVALID_IMM)
+                       return push_inst32(compiler, ADD_WI | RD4(dst) | RN4(reg) | value);
+       }
+       else {
+               value = -value;
+               if (value <= 0xfff)
+                       return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(value));
+               value = get_imm(value);
+               if (value != INVALID_IMM)
+                       return push_inst32(compiler, SUB_WI | RD4(dst) | RN4(reg) | value);
+       }
+       return SLJIT_ERR_UNSUPPORTED;
+}
+
+/* Can perform an operation using at most 1 instruction. */
+static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw)
+{
+       sljit_si other_r, shift;
+
+       SLJIT_ASSERT(arg & SLJIT_MEM);
+
+       if (SLJIT_UNLIKELY(flags & UPDATE)) {
+               if ((arg & REG_MASK) && !(arg & OFFS_REG_MASK) && argw <= 0xff && argw >= -0xff) {
+                       if (SLJIT_UNLIKELY(flags & ARG_TEST))
+                               return 1;
+
+                       flags &= ~UPDATE;
+                       arg &= 0xf;
+                       if (argw >= 0)
+                               argw |= 0x200;
+                       else {
+                               argw = -argw;
+                       }
+
+                       SLJIT_ASSERT(argw >= 0 && (argw & 0xff) <= 0xff);
+                       FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(arg) | 0x100 | argw));
+                       return -1;
+               }
+               return 0;
+       }
+
+       if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
+               if (SLJIT_UNLIKELY(flags & ARG_TEST))
+                       return 1;
+
+               argw &= 0x3;
+               other_r = OFFS_REG(arg);
+               arg &= 0xf;
+
+               if (!argw && IS_3_LO_REGS(reg, arg, other_r))
+                       FAIL_IF(push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(other_r)));
+               else
+                       FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(other_r) | (argw << 4)));
+               return -1;
+       }
+
+       if (!(arg & REG_MASK) || argw > 0xfff || argw < -0xff)
+               return 0;
+
+       if (SLJIT_UNLIKELY(flags & ARG_TEST))
+               return 1;
+
+       arg &= 0xf;
+       if (IS_2_LO_REGS(reg, arg) && sljit_mem16_imm5[flags]) {
+               shift = 3;
+               if (IS_WORD_SIZE(flags)) {
+                       if (OFFSET_CHECK(0x1f, 2))
+                               shift = 2;
+               }
+               else if (flags & BYTE_SIZE)
+               {
+                       if (OFFSET_CHECK(0x1f, 0))
+                               shift = 0;
+               }
+               else {
+                       SLJIT_ASSERT(flags & HALF_SIZE);
+                       if (OFFSET_CHECK(0x1f, 1))
+                               shift = 1;
+               }
+
+               if (shift != 3) {
+                       FAIL_IF(push_inst16(compiler, sljit_mem16_imm5[flags] | RD3(reg) | RN3(arg) | (argw << (6 - shift))));
+                       return -1;
+               }
+       }
+
+       /* SP based immediate. */
+       if (SLJIT_UNLIKELY(arg == SLJIT_SP) && OFFSET_CHECK(0xff, 2) && IS_WORD_SIZE(flags) && reg_map[reg] <= 7) {
+               FAIL_IF(push_inst16(compiler, STR_SP | ((flags & STORE) ? 0 : 0x800) | RDN3(reg) | (argw >> 2)));
+               return -1;
+       }
+
+       if (argw >= 0)
+               FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(arg) | argw));
+       else
+               FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(arg) | -argw));
+       return -1;
+}
+
+/* see getput_arg below.
+   Note: can_cache is called only for binary operators. Those
+   operators always uses word arguments without write back. */
+static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw)
+{
+       sljit_sw diff;
+       if ((arg & OFFS_REG_MASK) || !(next_arg & SLJIT_MEM))
+               return 0;
+
+       if (!(arg & REG_MASK)) {
+               diff = argw - next_argw;
+               if (diff <= 0xfff && diff >= -0xfff)
+                       return 1;
+               return 0;
+       }
+
+       if (argw == next_argw)
+               return 1;
+
+       diff = argw - next_argw;
+       if (arg == next_arg && diff <= 0xfff && diff >= -0xfff)
+               return 1;
+
+       return 0;
+}
+
+/* Emit the necessary instructions. See can_cache above. */
+static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg,
+       sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw)
+{
+       sljit_si tmp_r, other_r;
+       sljit_sw diff;
+
+       SLJIT_ASSERT(arg & SLJIT_MEM);
+       if (!(next_arg & SLJIT_MEM)) {
+               next_arg = 0;
+               next_argw = 0;
+       }
+
+       tmp_r = (flags & STORE) ? TMP_REG3 : reg;
+
+       if (SLJIT_UNLIKELY((flags & UPDATE) && (arg & REG_MASK))) {
+               /* Update only applies if a base register exists. */
+               /* There is no caching here. */
+               other_r = OFFS_REG(arg);
+               arg &= 0xf;
+               flags &= ~UPDATE;
+
+               if (!other_r) {
+                       if (!(argw & ~0xfff)) {
+                               FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(arg) | argw));
+                               return push_inst32(compiler, ADDWI | RD4(arg) | RN4(arg) | IMM12(argw));
+                       }
+
+                       if (compiler->cache_arg == SLJIT_MEM) {
+                               if (argw == compiler->cache_argw) {
+                                       other_r = TMP_REG3;
+                                       argw = 0;
+                               }
+                               else if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) {
+                                       FAIL_IF(compiler->error);
+                                       compiler->cache_argw = argw;
+                                       other_r = TMP_REG3;
+                                       argw = 0;
+                               }
+                       }
+
+                       if (argw) {
+                               FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
+                               compiler->cache_arg = SLJIT_MEM;
+                               compiler->cache_argw = argw;
+                               other_r = TMP_REG3;
+                               argw = 0;
+                       }
+               }
+
+               argw &= 0x3;
+               if (!argw && IS_3_LO_REGS(reg, arg, other_r)) {
+                       FAIL_IF(push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(other_r)));
+                       return push_inst16(compiler, ADD | SET_REGS44(arg, other_r));
+               }
+               FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(other_r) | (argw << 4)));
+               return push_inst32(compiler, ADD_W | RD4(arg) | RN4(arg) | RM4(other_r) | (argw << 6));
+       }
+       flags &= ~UPDATE;
+
+       SLJIT_ASSERT(!(arg & OFFS_REG_MASK));
+
+       if (compiler->cache_arg == arg) {
+               diff = argw - compiler->cache_argw;
+               if (!(diff & ~0xfff))
+                       return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(TMP_REG3) | diff);
+               if (!((compiler->cache_argw - argw) & ~0xff))
+                       return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(TMP_REG3) | (compiler->cache_argw - argw));
+               if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, diff) != SLJIT_ERR_UNSUPPORTED) {
+                       FAIL_IF(compiler->error);
+                       return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(TMP_REG3) | 0);
+               }
+       }
+
+       next_arg = (arg & REG_MASK) && (arg == next_arg) && (argw != next_argw);
+       arg &= 0xf;
+       if (arg && compiler->cache_arg == SLJIT_MEM) {
+               if (compiler->cache_argw == argw)
+                       return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(TMP_REG3));
+               if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) {
+                       FAIL_IF(compiler->error);
+                       compiler->cache_argw = argw;
+                       return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(TMP_REG3));
+               }
+       }
+
+       compiler->cache_argw = argw;
+       if (next_arg && emit_set_delta(compiler, TMP_REG3, arg, argw) != SLJIT_ERR_UNSUPPORTED) {
+               FAIL_IF(compiler->error);
+               compiler->cache_arg = SLJIT_MEM | arg;
+               arg = 0;
+       }
+       else {
+               FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
+               compiler->cache_arg = SLJIT_MEM;
+
+               diff = argw - next_argw;
+               if (next_arg && diff <= 0xfff && diff >= -0xfff) {
+                       FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG3, arg)));
+                       compiler->cache_arg = SLJIT_MEM | arg;
+                       arg = 0;
+               }
+       }
+
+       if (arg)
+               return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(TMP_REG3));
+       return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(TMP_REG3) | 0);
+}
+
+static SLJIT_INLINE sljit_si emit_op_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw)
+{
+       if (getput_arg_fast(compiler, flags, reg, arg, argw))
+               return compiler->error;
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+       return getput_arg(compiler, flags, reg, arg, argw, 0, 0);
+}
+
+static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg1, sljit_sw arg1w, sljit_si arg2, sljit_sw arg2w)
+{
+       if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
+               return compiler->error;
+       return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
+}
+
+/* --------------------------------------------------------------------- */
+/*  Entry, exit                                                          */
+/* --------------------------------------------------------------------- */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler,
+       sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds,
+       sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
+{
+       sljit_si size, i, tmp;
+       sljit_ins push;
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
+       set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
+
+       push = (1 << 4);
+
+       tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
+       for (i = SLJIT_S0; i >= tmp; i--)
+               push |= 1 << reg_map[i];
+
+       for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--)
+               push |= 1 << reg_map[i];
+
+       FAIL_IF((push & 0xff00)
+               ? push_inst32(compiler, PUSH_W | (1 << 14) | push)
+               : push_inst16(compiler, PUSH | (1 << 8) | push));
+
+       /* Stack must be aligned to 8 bytes: (LR, R4) */
+       size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2);
+       local_size = ((size + local_size + 7) & ~7) - size;
+       compiler->local_size = local_size;
+       if (local_size > 0) {
+               if (local_size <= (127 << 2))
+                       FAIL_IF(push_inst16(compiler, SUB_SP | (local_size >> 2)));
+               else
+                       FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_SP, SLJIT_SP, local_size));
+       }
+
+       if (args >= 1)
+               FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S0, SLJIT_R0)));
+       if (args >= 2)
+               FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S1, SLJIT_R1)));
+       if (args >= 3)
+               FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S2, SLJIT_R2)));
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compiler,
+       sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds,
+       sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
+{
+       sljit_si size;
+
+       CHECK_ERROR();
+       CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
+       set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
+
+       size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2);
+       compiler->local_size = ((size + local_size + 7) & ~7) - size;
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
+{
+       sljit_si i, tmp;
+       sljit_ins pop;
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_return(compiler, op, src, srcw));
+
+       FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
+
+       if (compiler->local_size > 0) {
+               if (compiler->local_size <= (127 << 2))
+                       FAIL_IF(push_inst16(compiler, ADD_SP | (compiler->local_size >> 2)));
+               else
+                       FAIL_IF(emit_op_imm(compiler, SLJIT_ADD | ARG2_IMM, SLJIT_SP, SLJIT_SP, compiler->local_size));
+       }
+
+       pop = (1 << 4);
+
+       tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
+       for (i = SLJIT_S0; i >= tmp; i--)
+               pop |= 1 << reg_map[i];
+
+       for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--)
+               pop |= 1 << reg_map[i];
+
+       return (pop & 0xff00)
+               ? push_inst32(compiler, POP_W | (1 << 15) | pop)
+               : push_inst16(compiler, POP | (1 << 8) | pop);
+}
+
+/* --------------------------------------------------------------------- */
+/*  Operators                                                            */
+/* --------------------------------------------------------------------- */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined(__GNUC__)
+extern unsigned int __aeabi_uidivmod(unsigned int numerator, int unsigned denominator);
+extern int __aeabi_idivmod(int numerator, int denominator);
+#else
+#error "Software divmod functions are needed"
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op)
+{
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_op0(compiler, op));
+
+       op = GET_OPCODE(op);
+       switch (op) {
+       case SLJIT_BREAKPOINT:
+               return push_inst16(compiler, BKPT);
+       case SLJIT_NOP:
+               return push_inst16(compiler, NOP);
+       case SLJIT_LUMUL:
+       case SLJIT_LSMUL:
+               return push_inst32(compiler, (op == SLJIT_LUMUL ? UMULL : SMULL)
+                       | (reg_map[SLJIT_R1] << 8)
+                       | (reg_map[SLJIT_R0] << 12)
+                       | (reg_map[SLJIT_R0] << 16)
+                       | reg_map[SLJIT_R1]);
+       case SLJIT_LUDIV:
+       case SLJIT_LSDIV:
+               if (compiler->scratches >= 4) {
+                       FAIL_IF(push_inst32(compiler, 0xf84d2d04 /* str r2, [sp, #-4]! */));
+                       FAIL_IF(push_inst32(compiler, 0xf84dcd04 /* str ip, [sp, #-4]! */));
+               } else if (compiler->scratches >= 3)
+                       FAIL_IF(push_inst32(compiler, 0xf84d2d08 /* str r2, [sp, #-8]! */));
+#if defined(__GNUC__)
+               FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
+                       (op == SLJIT_LUDIV ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod))));
+#else
+#error "Software divmod functions are needed"
+#endif
+               if (compiler->scratches >= 4) {
+                       FAIL_IF(push_inst32(compiler, 0xf85dcb04 /* ldr ip, [sp], #4 */));
+                       return push_inst32(compiler, 0xf85d2b04 /* ldr r2, [sp], #4 */);
+               } else if (compiler->scratches >= 3)
+                       return push_inst32(compiler, 0xf85d2b08 /* ldr r2, [sp], #8 */);
+               return SLJIT_SUCCESS;
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       sljit_si dst_r, flags;
+       sljit_si op_flags = GET_ALL_FLAGS(op);
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+
+       dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
+
+       op = GET_OPCODE(op);
+       if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) {
+               switch (op) {
+               case SLJIT_MOV:
+               case SLJIT_MOV_UI:
+               case SLJIT_MOV_SI:
+               case SLJIT_MOV_P:
+                       flags = WORD_SIZE;
+                       break;
+               case SLJIT_MOV_UB:
+                       flags = BYTE_SIZE;
+                       if (src & SLJIT_IMM)
+                               srcw = (sljit_ub)srcw;
+                       break;
+               case SLJIT_MOV_SB:
+                       flags = BYTE_SIZE | SIGNED;
+                       if (src & SLJIT_IMM)
+                               srcw = (sljit_sb)srcw;
+                       break;
+               case SLJIT_MOV_UH:
+                       flags = HALF_SIZE;
+                       if (src & SLJIT_IMM)
+                               srcw = (sljit_uh)srcw;
+                       break;
+               case SLJIT_MOV_SH:
+                       flags = HALF_SIZE | SIGNED;
+                       if (src & SLJIT_IMM)
+                               srcw = (sljit_sh)srcw;
+                       break;
+               case SLJIT_MOVU:
+               case SLJIT_MOVU_UI:
+               case SLJIT_MOVU_SI:
+               case SLJIT_MOVU_P:
+                       flags = WORD_SIZE | UPDATE;
+                       break;
+               case SLJIT_MOVU_UB:
+                       flags = BYTE_SIZE | UPDATE;
+                       if (src & SLJIT_IMM)
+                               srcw = (sljit_ub)srcw;
+                       break;
+               case SLJIT_MOVU_SB:
+                       flags = BYTE_SIZE | SIGNED | UPDATE;
+                       if (src & SLJIT_IMM)
+                               srcw = (sljit_sb)srcw;
+                       break;
+               case SLJIT_MOVU_UH:
+                       flags = HALF_SIZE | UPDATE;
+                       if (src & SLJIT_IMM)
+                               srcw = (sljit_uh)srcw;
+                       break;
+               case SLJIT_MOVU_SH:
+                       flags = HALF_SIZE | SIGNED | UPDATE;
+                       if (src & SLJIT_IMM)
+                               srcw = (sljit_sh)srcw;
+                       break;
+               default:
+                       SLJIT_ASSERT_STOP();
+                       flags = 0;
+                       break;
+               }
+
+               if (src & SLJIT_IMM)
+                       FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG1, srcw));
+               else if (src & SLJIT_MEM) {
+                       if (getput_arg_fast(compiler, flags, dst_r, src, srcw))
+                               FAIL_IF(compiler->error);
+                       else
+                               FAIL_IF(getput_arg(compiler, flags, dst_r, src, srcw, dst, dstw));
+               } else {
+                       if (dst_r != TMP_REG1)
+                               return emit_op_imm(compiler, op, dst_r, TMP_REG1, src);
+                       dst_r = src;
+               }
+
+               if (dst & SLJIT_MEM) {
+                       if (getput_arg_fast(compiler, flags | STORE, dst_r, dst, dstw))
+                               return compiler->error;
+                       else
+                               return getput_arg(compiler, flags | STORE, dst_r, dst, dstw, 0, 0);
+               }
+               return SLJIT_SUCCESS;
+       }
+
+       if (op == SLJIT_NEG) {
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+                       || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+               compiler->skip_checks = 1;
+#endif
+               return sljit_emit_op2(compiler, SLJIT_SUB | op_flags, dst, dstw, SLJIT_IMM, 0, src, srcw);
+       }
+
+       flags = (GET_FLAGS(op_flags) ? SET_FLAGS : 0) | ((op_flags & SLJIT_KEEP_FLAGS) ? KEEP_FLAGS : 0);
+       if (src & SLJIT_MEM) {
+               if (getput_arg_fast(compiler, WORD_SIZE, TMP_REG2, src, srcw))
+                       FAIL_IF(compiler->error);
+               else
+                       FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src, srcw, dst, dstw));
+               src = TMP_REG2;
+       }
+
+       if (src & SLJIT_IMM)
+               flags |= ARG2_IMM;
+       else
+               srcw = src;
+
+       emit_op_imm(compiler, flags | op, dst_r, TMP_REG1, srcw);
+
+       if (dst & SLJIT_MEM) {
+               if (getput_arg_fast(compiler, flags | STORE, dst_r, dst, dstw))
+                       return compiler->error;
+               else
+                       return getput_arg(compiler, flags | STORE, dst_r, dst, dstw, 0, 0);
+       }
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       sljit_si dst_r, flags;
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+       ADJUST_LOCAL_OFFSET(src1, src1w);
+       ADJUST_LOCAL_OFFSET(src2, src2w);
+
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+
+       dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
+       flags = (GET_FLAGS(op) ? SET_FLAGS : 0) | ((op & SLJIT_KEEP_FLAGS) ? KEEP_FLAGS : 0);
+
+       if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, WORD_SIZE | STORE | ARG_TEST, TMP_REG1, dst, dstw))
+               flags |= SLOW_DEST;
+
+       if (src1 & SLJIT_MEM) {
+               if (getput_arg_fast(compiler, WORD_SIZE, TMP_REG1, src1, src1w))
+                       FAIL_IF(compiler->error);
+               else
+                       flags |= SLOW_SRC1;
+       }
+       if (src2 & SLJIT_MEM) {
+               if (getput_arg_fast(compiler, WORD_SIZE, TMP_REG2, src2, src2w))
+                       FAIL_IF(compiler->error);
+               else
+                       flags |= SLOW_SRC2;
+       }
+
+       if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
+               if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
+                       FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src2, src2w, src1, src1w));
+                       FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG1, src1, src1w, dst, dstw));
+               }
+               else {
+                       FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG1, src1, src1w, src2, src2w));
+                       FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src2, src2w, dst, dstw));
+               }
+       }
+       else if (flags & SLOW_SRC1)
+               FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG1, src1, src1w, dst, dstw));
+       else if (flags & SLOW_SRC2)
+               FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src2, src2w, dst, dstw));
+
+       if (src1 & SLJIT_MEM)
+               src1 = TMP_REG1;
+       if (src2 & SLJIT_MEM)
+               src2 = TMP_REG2;
+
+       if (src1 & SLJIT_IMM)
+               flags |= ARG1_IMM;
+       else
+               src1w = src1;
+       if (src2 & SLJIT_IMM)
+               flags |= ARG2_IMM;
+       else
+               src2w = src2;
+
+       if (dst == SLJIT_UNUSED)
+               flags |= UNUSED_RETURN;
+
+       emit_op_imm(compiler, flags | GET_OPCODE(op), dst_r, src1w, src2w);
+
+       if (dst & SLJIT_MEM) {
+               if (!(flags & SLOW_DEST)) {
+                       getput_arg_fast(compiler, WORD_SIZE | STORE, dst_r, dst, dstw);
+                       return compiler->error;
+               }
+               return getput_arg(compiler, WORD_SIZE | STORE, TMP_REG1, dst, dstw, 0, 0);
+       }
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
+{
+       CHECK_REG_INDEX(check_sljit_get_register_index(reg));
+       return reg_map[reg];
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg)
+{
+       CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
+       return reg << 1;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
+       void *instruction, sljit_si size)
+{
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
+
+       if (size == 2)
+               return push_inst16(compiler, *(sljit_uh*)instruction);
+       return push_inst32(compiler, *(sljit_ins*)instruction);
+}
+
+/* --------------------------------------------------------------------- */
+/*  Floating point operators                                             */
+/* --------------------------------------------------------------------- */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
+{
+#ifdef SLJIT_IS_FPU_AVAILABLE
+       return SLJIT_IS_FPU_AVAILABLE;
+#else
+       /* Available by default. */
+       return 1;
+#endif
+}
+
+#define FPU_LOAD (1 << 20)
+
+static sljit_si emit_fop_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw)
+{
+       sljit_sw tmp;
+       sljit_uw imm;
+       sljit_sw inst = VSTR_F32 | (flags & (SLJIT_SINGLE_OP | FPU_LOAD));
+
+       SLJIT_ASSERT(arg & SLJIT_MEM);
+
+       /* Fast loads and stores. */
+       if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
+               FAIL_IF(push_inst32(compiler, ADD_W | RD4(TMP_REG2) | RN4(arg & REG_MASK) | RM4(OFFS_REG(arg)) | ((argw & 0x3) << 6)));
+               arg = SLJIT_MEM | TMP_REG2;
+               argw = 0;
+       }
+
+       if ((arg & REG_MASK) && (argw & 0x3) == 0) {
+               if (!(argw & ~0x3fc))
+                       return push_inst32(compiler, inst | 0x800000 | RN4(arg & REG_MASK) | DD4(reg) | (argw >> 2));
+               if (!(-argw & ~0x3fc))
+                       return push_inst32(compiler, inst | RN4(arg & REG_MASK) | DD4(reg) | (-argw >> 2));
+       }
+
+       /* Slow cases */
+       SLJIT_ASSERT(!(arg & OFFS_REG_MASK));
+       if (compiler->cache_arg == arg) {
+               tmp = argw - compiler->cache_argw;
+               if (!(tmp & ~0x3fc))
+                       return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG3) | DD4(reg) | (tmp >> 2));
+               if (!(-tmp & ~0x3fc))
+                       return push_inst32(compiler, inst | RN4(TMP_REG3) | DD4(reg) | (-tmp >> 2));
+               if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, tmp) != SLJIT_ERR_UNSUPPORTED) {
+                       FAIL_IF(compiler->error);
+                       compiler->cache_argw = argw;
+                       return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG3) | DD4(reg));
+               }
+       }
+
+       if (arg & REG_MASK) {
+               if (emit_set_delta(compiler, TMP_REG1, arg & REG_MASK, argw) != SLJIT_ERR_UNSUPPORTED) {
+                       FAIL_IF(compiler->error);
+                       return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | DD4(reg));
+               }
+               imm = get_imm(argw & ~0x3fc);
+               if (imm != INVALID_IMM) {
+                       FAIL_IF(push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(arg & REG_MASK) | imm));
+                       return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | DD4(reg) | ((argw & 0x3fc) >> 2));
+               }
+               imm = get_imm(-argw & ~0x3fc);
+               if (imm != INVALID_IMM) {
+                       argw = -argw;
+                       FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(arg & REG_MASK) | imm));
+                       return push_inst32(compiler, inst | RN4(TMP_REG1) | DD4(reg) | ((argw & 0x3fc) >> 2));
+               }
+       }
+
+       compiler->cache_arg = arg;
+       compiler->cache_argw = argw;
+
+       FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
+       if (arg & REG_MASK)
+               FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG3, (arg & REG_MASK))));
+       return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG3) | DD4(reg));
+}
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       if (src & SLJIT_MEM) {
+               FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, src, srcw));
+               src = TMP_FREG1;
+       }
+
+       FAIL_IF(push_inst32(compiler, VCVT_S32_F32 | (op & SLJIT_SINGLE_OP) | DD4(TMP_FREG1) | DM4(src)));
+
+       if (dst == SLJIT_UNUSED)
+               return SLJIT_SUCCESS;
+
+       if (FAST_IS_REG(dst))
+               return push_inst32(compiler, VMOV | (1 << 20) | RT4(dst) | DN4(TMP_FREG1));
+
+       /* Store the integer value from a VFP register. */
+       return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw);
+}
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
+
+       if (FAST_IS_REG(src))
+               FAIL_IF(push_inst32(compiler, VMOV | RT4(src) | DN4(TMP_FREG1)));
+       else if (src & SLJIT_MEM) {
+               /* Load the integer value into a VFP register. */
+               FAIL_IF(emit_fop_mem(compiler, FPU_LOAD, TMP_FREG1, src, srcw));
+       }
+       else {
+               FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
+               FAIL_IF(push_inst32(compiler, VMOV | RT4(TMP_REG1) | DN4(TMP_FREG1)));
+       }
+
+       FAIL_IF(push_inst32(compiler, VCVT_F32_S32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DM4(TMP_FREG1)));
+
+       if (dst & SLJIT_MEM)
+               return emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), TMP_FREG1, dst, dstw);
+       return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       if (src1 & SLJIT_MEM) {
+               emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, src1, src1w);
+               src1 = TMP_FREG1;
+       }
+
+       if (src2 & SLJIT_MEM) {
+               emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG2, src2, src2w);
+               src2 = TMP_FREG2;
+       }
+
+       FAIL_IF(push_inst32(compiler, VCMP_F32 | (op & SLJIT_SINGLE_OP) | DD4(src1) | DM4(src2)));
+       return push_inst32(compiler, VMRS);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       sljit_si dst_r;
+
+       CHECK_ERROR();
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+       if (GET_OPCODE(op) != SLJIT_CONVD_FROMS)
+               op ^= SLJIT_SINGLE_OP;
+
+       SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100), float_transfer_bit_error);
+       SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
+
+       dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
+
+       if (src & SLJIT_MEM) {
+               emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, dst_r, src, srcw);
+               src = dst_r;
+       }
+
+       switch (GET_OPCODE(op)) {
+       case SLJIT_DMOV:
+               if (src != dst_r) {
+                       if (dst_r != TMP_FREG1)
+                               FAIL_IF(push_inst32(compiler, VMOV_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DM4(src)));
+                       else
+                               dst_r = src;
+               }
+               break;
+       case SLJIT_DNEG:
+               FAIL_IF(push_inst32(compiler, VNEG_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DM4(src)));
+               break;
+       case SLJIT_DABS:
+               FAIL_IF(push_inst32(compiler, VABS_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DM4(src)));
+               break;
+       case SLJIT_CONVD_FROMS:
+               FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DM4(src)));
+               op ^= SLJIT_SINGLE_OP;
+               break;
+       }
+
+       if (dst & SLJIT_MEM)
+               return emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), dst_r, dst, dstw);
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       sljit_si dst_r;
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+       ADJUST_LOCAL_OFFSET(src1, src1w);
+       ADJUST_LOCAL_OFFSET(src2, src2w);
+
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+       op ^= SLJIT_SINGLE_OP;
+
+       dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
+       if (src1 & SLJIT_MEM) {
+               emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, src1, src1w);
+               src1 = TMP_FREG1;
+       }
+       if (src2 & SLJIT_MEM) {
+               emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG2, src2, src2w);
+               src2 = TMP_FREG2;
+       }
+
+       switch (GET_OPCODE(op)) {
+       case SLJIT_DADD:
+               FAIL_IF(push_inst32(compiler, VADD_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DN4(src1) | DM4(src2)));
+               break;
+       case SLJIT_DSUB:
+               FAIL_IF(push_inst32(compiler, VSUB_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DN4(src1) | DM4(src2)));
+               break;
+       case SLJIT_DMUL:
+               FAIL_IF(push_inst32(compiler, VMUL_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DN4(src1) | DM4(src2)));
+               break;
+       case SLJIT_DDIV:
+               FAIL_IF(push_inst32(compiler, VDIV_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DN4(src1) | DM4(src2)));
+               break;
+       }
+
+       if (!(dst & SLJIT_MEM))
+               return SLJIT_SUCCESS;
+       return emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), TMP_FREG1, dst, dstw);
+}
+
+#undef FPU_LOAD
+
+/* --------------------------------------------------------------------- */
+/*  Other instructions                                                   */
+/* --------------------------------------------------------------------- */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw)
+{
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+
+       /* For UNUSED dst. Uncommon, but possible. */
+       if (dst == SLJIT_UNUSED)
+               return SLJIT_SUCCESS;
+
+       if (FAST_IS_REG(dst))
+               return push_inst16(compiler, MOV | SET_REGS44(dst, TMP_REG3));
+
+       /* Memory. */
+       if (getput_arg_fast(compiler, WORD_SIZE | STORE, TMP_REG3, dst, dstw))
+               return compiler->error;
+       /* TMP_REG3 is used for caching. */
+       FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG2, TMP_REG3)));
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+       return getput_arg(compiler, WORD_SIZE | STORE, TMP_REG2, dst, dstw, 0, 0);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw)
+{
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       if (FAST_IS_REG(src))
+               FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG3, src)));
+       else if (src & SLJIT_MEM) {
+               if (getput_arg_fast(compiler, WORD_SIZE, TMP_REG3, src, srcw))
+                       FAIL_IF(compiler->error);
+               else {
+                       compiler->cache_arg = 0;
+                       compiler->cache_argw = 0;
+                       FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src, srcw, 0, 0));
+                       FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG3, TMP_REG2)));
+               }
+       }
+       else if (src & SLJIT_IMM)
+               FAIL_IF(load_immediate(compiler, TMP_REG3, srcw));
+       return push_inst16(compiler, BLX | RN3(TMP_REG3));
+}
+
+/* --------------------------------------------------------------------- */
+/*  Conditional instructions                                             */
+/* --------------------------------------------------------------------- */
+
+static sljit_uw get_cc(sljit_si type)
+{
+       switch (type) {
+       case SLJIT_EQUAL:
+       case SLJIT_MUL_NOT_OVERFLOW:
+       case SLJIT_D_EQUAL:
+               return 0x0;
+
+       case SLJIT_NOT_EQUAL:
+       case SLJIT_MUL_OVERFLOW:
+       case SLJIT_D_NOT_EQUAL:
+               return 0x1;
+
+       case SLJIT_LESS:
+       case SLJIT_D_LESS:
+               return 0x3;
+
+       case SLJIT_GREATER_EQUAL:
+       case SLJIT_D_GREATER_EQUAL:
+               return 0x2;
+
+       case SLJIT_GREATER:
+       case SLJIT_D_GREATER:
+               return 0x8;
+
+       case SLJIT_LESS_EQUAL:
+       case SLJIT_D_LESS_EQUAL:
+               return 0x9;
+
+       case SLJIT_SIG_LESS:
+               return 0xb;
+
+       case SLJIT_SIG_GREATER_EQUAL:
+               return 0xa;
+
+       case SLJIT_SIG_GREATER:
+               return 0xc;
+
+       case SLJIT_SIG_LESS_EQUAL:
+               return 0xd;
+
+       case SLJIT_OVERFLOW:
+       case SLJIT_D_UNORDERED:
+               return 0x6;
+
+       case SLJIT_NOT_OVERFLOW:
+       case SLJIT_D_ORDERED:
+               return 0x7;
+
+       default: /* SLJIT_JUMP */
+               SLJIT_ASSERT_STOP();
+               return 0xe;
+       }
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
+{
+       struct sljit_label *label;
+
+       CHECK_ERROR_PTR();
+       CHECK_PTR(check_sljit_emit_label(compiler));
+
+       if (compiler->last_label && compiler->last_label->size == compiler->size)
+               return compiler->last_label;
+
+       label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
+       PTR_FAIL_IF(!label);
+       set_label(label, compiler);
+       return label;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type)
+{
+       struct sljit_jump *jump;
+       sljit_ins cc;
+
+       CHECK_ERROR_PTR();
+       CHECK_PTR(check_sljit_emit_jump(compiler, type));
+
+       jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
+       PTR_FAIL_IF(!jump);
+       set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
+       type &= 0xff;
+
+       /* In ARM, we don't need to touch the arguments. */
+       PTR_FAIL_IF(emit_imm32_const(compiler, TMP_REG1, 0));
+       if (type < SLJIT_JUMP) {
+               jump->flags |= IS_COND;
+               cc = get_cc(type);
+               jump->flags |= cc << 8;
+               PTR_FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
+       }
+
+       jump->addr = compiler->size;
+       if (type <= SLJIT_JUMP)
+               PTR_FAIL_IF(push_inst16(compiler, BX | RN3(TMP_REG1)));
+       else {
+               jump->flags |= IS_BL;
+               PTR_FAIL_IF(push_inst16(compiler, BLX | RN3(TMP_REG1)));
+       }
+
+       return jump;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw)
+{
+       struct sljit_jump *jump;
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       /* In ARM, we don't need to touch the arguments. */
+       if (!(src & SLJIT_IMM)) {
+               if (FAST_IS_REG(src))
+                       return push_inst16(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RN3(src));
+
+               FAIL_IF(emit_op_mem(compiler, WORD_SIZE, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, src, srcw));
+               if (type >= SLJIT_FAST_CALL)
+                       return push_inst16(compiler, BLX | RN3(TMP_REG1));
+       }
+
+       jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
+       FAIL_IF(!jump);
+       set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0));
+       jump->u.target = srcw;
+
+       FAIL_IF(emit_imm32_const(compiler, TMP_REG1, 0));
+       jump->addr = compiler->size;
+       return push_inst16(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RN3(TMP_REG1));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw,
+       sljit_si type)
+{
+       sljit_si dst_r, flags = GET_ALL_FLAGS(op);
+       sljit_ins cc, ins;
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       if (dst == SLJIT_UNUSED)
+               return SLJIT_SUCCESS;
+
+       op = GET_OPCODE(op);
+       cc = get_cc(type & 0xff);
+       dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
+
+       if (op < SLJIT_ADD) {
+               FAIL_IF(push_inst16(compiler, IT | (cc << 4) | (((cc & 0x1) ^ 0x1) << 3) | 0x4));
+               if (reg_map[dst_r] > 7) {
+                       FAIL_IF(push_inst32(compiler, MOV_WI | RD4(dst_r) | 1));
+                       FAIL_IF(push_inst32(compiler, MOV_WI | RD4(dst_r) | 0));
+               } else {
+                       FAIL_IF(push_inst16(compiler, MOVSI | RDN3(dst_r) | 1));
+                       FAIL_IF(push_inst16(compiler, MOVSI | RDN3(dst_r) | 0));
+               }
+               if (dst_r != TMP_REG2)
+                       return SLJIT_SUCCESS;
+               return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, dst, dstw);
+       }
+
+       ins = (op == SLJIT_AND ? ANDI : (op == SLJIT_OR ? ORRI : EORI));
+       if ((op == SLJIT_OR || op == SLJIT_XOR) && FAST_IS_REG(dst) && dst == src) {
+               /* Does not change the other bits. */
+               FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
+               FAIL_IF(push_inst32(compiler, ins | RN4(src) | RD4(dst) | 1));
+               if (flags & SLJIT_SET_E) {
+                       /* The condition must always be set, even if the ORRI/EORI is not executed above. */
+                       if (reg_map[dst] <= 7)
+                               return push_inst16(compiler, MOVS | RD3(TMP_REG1) | RN3(dst));
+                       return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(TMP_REG1) | RM4(dst));
+               }
+               return SLJIT_SUCCESS;
+       }
+
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+       if (src & SLJIT_MEM) {
+               FAIL_IF(emit_op_mem2(compiler, WORD_SIZE, TMP_REG2, src, srcw, dst, dstw));
+               src = TMP_REG2;
+               srcw = 0;
+       } else if (src & SLJIT_IMM) {
+               FAIL_IF(load_immediate(compiler, TMP_REG2, srcw));
+               src = TMP_REG2;
+               srcw = 0;
+       }
+
+       if (op == SLJIT_AND || src != dst_r) {
+               FAIL_IF(push_inst16(compiler, IT | (cc << 4) | (((cc & 0x1) ^ 0x1) << 3) | 0x4));
+               FAIL_IF(push_inst32(compiler, ins | RN4(src) | RD4(dst_r) | 1));
+               FAIL_IF(push_inst32(compiler, ins | RN4(src) | RD4(dst_r) | 0));
+       }
+       else {
+               FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
+               FAIL_IF(push_inst32(compiler, ins | RN4(src) | RD4(dst_r) | 1));
+       }
+
+       if (dst_r == TMP_REG2)
+               FAIL_IF(emit_op_mem2(compiler, WORD_SIZE | STORE, TMP_REG2, dst, dstw, 0, 0));
+
+       if (flags & SLJIT_SET_E) {
+               /* The condition must always be set, even if the ORR/EORI is not executed above. */
+               if (reg_map[dst_r] <= 7)
+                       return push_inst16(compiler, MOVS | RD3(TMP_REG1) | RN3(dst_r));
+               return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(TMP_REG1) | RM4(dst_r));
+       }
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value)
+{
+       struct sljit_const *const_;
+       sljit_si dst_r;
+
+       CHECK_ERROR_PTR();
+       CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+
+       const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
+       PTR_FAIL_IF(!const_);
+       set_const(const_, compiler);
+
+       dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
+       PTR_FAIL_IF(emit_imm32_const(compiler, dst_r, init_value));
+
+       if (dst & SLJIT_MEM)
+               PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw));
+       return const_;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
+{
+       sljit_uh *inst = (sljit_uh*)addr;
+       modify_imm32_const(inst, new_addr);
+       SLJIT_CACHE_FLUSH(inst, inst + 4);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
+{
+       sljit_uh *inst = (sljit_uh*)addr;
+       modify_imm32_const(inst, new_constant);
+       SLJIT_CACHE_FLUSH(inst, inst + 4);
+}
diff --git a/ext/pcre/pcrelib/sljit/sljitNativeMIPS_32.c b/ext/pcre/pcrelib/sljit/sljitNativeMIPS_32.c
new file mode 100644 (file)
index 0000000..b2b60d7
--- /dev/null
@@ -0,0 +1,366 @@
+/*
+ *    Stack-less Just-In-Time compiler
+ *
+ *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice, this list of
+ *      conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *      of conditions and the following disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* mips 32-bit arch dependent functions. */
+
+static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si dst_ar, sljit_sw imm)
+{
+       if (!(imm & ~0xffff))
+               return push_inst(compiler, ORI | SA(0) | TA(dst_ar) | IMM(imm), dst_ar);
+
+       if (imm < 0 && imm >= SIMM_MIN)
+               return push_inst(compiler, ADDIU | SA(0) | TA(dst_ar) | IMM(imm), dst_ar);
+
+       FAIL_IF(push_inst(compiler, LUI | TA(dst_ar) | IMM(imm >> 16), dst_ar));
+       return (imm & 0xffff) ? push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(imm), dst_ar) : SLJIT_SUCCESS;
+}
+
+#define EMIT_LOGICAL(op_imm, op_norm) \
+       if (flags & SRC2_IMM) { \
+               if (op & SLJIT_SET_E) \
+                       FAIL_IF(push_inst(compiler, op_imm | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); \
+               if (CHECK_FLAGS(SLJIT_SET_E)) \
+                       FAIL_IF(push_inst(compiler, op_imm | S(src1) | T(dst) | IMM(src2), DR(dst))); \
+       } \
+       else { \
+               if (op & SLJIT_SET_E) \
+                       FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); \
+               if (CHECK_FLAGS(SLJIT_SET_E)) \
+                       FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | D(dst), DR(dst))); \
+       }
+
+#define EMIT_SHIFT(op_imm, op_v) \
+       if (flags & SRC2_IMM) { \
+               if (op & SLJIT_SET_E) \
+                       FAIL_IF(push_inst(compiler, op_imm | T(src1) | DA(EQUAL_FLAG) | SH_IMM(src2), EQUAL_FLAG)); \
+               if (CHECK_FLAGS(SLJIT_SET_E)) \
+                       FAIL_IF(push_inst(compiler, op_imm | T(src1) | D(dst) | SH_IMM(src2), DR(dst))); \
+       } \
+       else { \
+               if (op & SLJIT_SET_E) \
+                       FAIL_IF(push_inst(compiler, op_v | S(src2) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); \
+               if (CHECK_FLAGS(SLJIT_SET_E)) \
+                       FAIL_IF(push_inst(compiler, op_v | S(src2) | T(src1) | D(dst), DR(dst))); \
+       }
+
+static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags,
+       sljit_si dst, sljit_si src1, sljit_sw src2)
+{
+       switch (GET_OPCODE(op)) {
+       case SLJIT_MOV:
+       case SLJIT_MOV_UI:
+       case SLJIT_MOV_SI:
+       case SLJIT_MOV_P:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+               if (dst != src2)
+                       return push_inst(compiler, ADDU | S(src2) | TA(0) | D(dst), DR(dst));
+               return SLJIT_SUCCESS;
+
+       case SLJIT_MOV_UB:
+       case SLJIT_MOV_SB:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+               if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
+                       if (op == SLJIT_MOV_SB) {
+#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
+                               return push_inst(compiler, SEB | T(src2) | D(dst), DR(dst));
+#else
+                               FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(24), DR(dst)));
+                               return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(24), DR(dst));
+#endif
+                       }
+                       return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst));
+               }
+               else if (dst != src2)
+                       SLJIT_ASSERT_STOP();
+               return SLJIT_SUCCESS;
+
+       case SLJIT_MOV_UH:
+       case SLJIT_MOV_SH:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+               if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
+                       if (op == SLJIT_MOV_SH) {
+#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
+                               return push_inst(compiler, SEH | T(src2) | D(dst), DR(dst));
+#else
+                               FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(16), DR(dst)));
+                               return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(16), DR(dst));
+#endif
+                       }
+                       return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst));
+               }
+               else if (dst != src2)
+                       SLJIT_ASSERT_STOP();
+               return SLJIT_SUCCESS;
+
+       case SLJIT_NOT:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+               if (op & SLJIT_SET_E)
+                       FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
+               if (CHECK_FLAGS(SLJIT_SET_E))
+                       FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | D(dst), DR(dst)));
+               return SLJIT_SUCCESS;
+
+       case SLJIT_CLZ:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
+               if (op & SLJIT_SET_E)
+                       FAIL_IF(push_inst(compiler, CLZ | S(src2) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
+               if (CHECK_FLAGS(SLJIT_SET_E))
+                       FAIL_IF(push_inst(compiler, CLZ | S(src2) | T(dst) | D(dst), DR(dst)));
+#else
+               if (SLJIT_UNLIKELY(flags & UNUSED_DEST)) {
+                       FAIL_IF(push_inst(compiler, SRL | T(src2) | DA(EQUAL_FLAG) | SH_IMM(31), EQUAL_FLAG));
+                       return push_inst(compiler, XORI | SA(EQUAL_FLAG) | TA(EQUAL_FLAG) | IMM(1), EQUAL_FLAG);
+               }
+               /* Nearly all instructions are unmovable in the following sequence. */
+               FAIL_IF(push_inst(compiler, ADDU | S(src2) | TA(0) | D(TMP_REG1), DR(TMP_REG1)));
+               /* Check zero. */
+               FAIL_IF(push_inst(compiler, BEQ | S(TMP_REG1) | TA(0) | IMM(5), UNMOVABLE_INS));
+               FAIL_IF(push_inst(compiler, ORI | SA(0) | T(dst) | IMM(32), UNMOVABLE_INS));
+               FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(dst) | IMM(-1), DR(dst)));
+               /* Loop for searching the highest bit. */
+               FAIL_IF(push_inst(compiler, ADDIU | S(dst) | T(dst) | IMM(1), DR(dst)));
+               FAIL_IF(push_inst(compiler, BGEZ | S(TMP_REG1) | IMM(-2), UNMOVABLE_INS));
+               FAIL_IF(push_inst(compiler, SLL | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(1), UNMOVABLE_INS));
+               if (op & SLJIT_SET_E)
+                       return push_inst(compiler, ADDU | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG);
+#endif
+               return SLJIT_SUCCESS;
+
+       case SLJIT_ADD:
+               if (flags & SRC2_IMM) {
+                       if (op & SLJIT_SET_O) {
+                               if (src2 >= 0)
+                                       FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+                               else
+                                       FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+                       }
+                       if (op & SLJIT_SET_E)
+                               FAIL_IF(push_inst(compiler, ADDIU | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
+                       if (op & (SLJIT_SET_C | SLJIT_SET_O)) {
+                               if (src2 >= 0)
+                                       FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG));
+                               else {
+                                       FAIL_IF(push_inst(compiler, ADDIU | SA(0) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG));
+                                       FAIL_IF(push_inst(compiler, OR | S(src1) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG));
+                               }
+                       }
+                       /* dst may be the same as src1 or src2. */
+                       if (CHECK_FLAGS(SLJIT_SET_E))
+                               FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(src2), DR(dst)));
+               }
+               else {
+                       if (op & SLJIT_SET_O)
+                               FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+                       if (op & SLJIT_SET_E)
+                               FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
+                       if (op & (SLJIT_SET_C | SLJIT_SET_O))
+                               FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(ULESS_FLAG), ULESS_FLAG));
+                       /* dst may be the same as src1 or src2. */
+                       if (CHECK_FLAGS(SLJIT_SET_E))
+                               FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | D(dst), DR(dst)));
+               }
+
+               /* a + b >= a | b (otherwise, the carry should be set to 1). */
+               if (op & (SLJIT_SET_C | SLJIT_SET_O))
+                       FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG));
+               if (!(op & SLJIT_SET_O))
+                       return SLJIT_SUCCESS;
+               FAIL_IF(push_inst(compiler, SLL | TA(ULESS_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1)));
+               FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+               FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+               return push_inst(compiler, SLL | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG);
+
+       case SLJIT_ADDC:
+               if (flags & SRC2_IMM) {
+                       if (op & SLJIT_SET_C) {
+                               if (src2 >= 0)
+                                       FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG));
+                               else {
+                                       FAIL_IF(push_inst(compiler, ADDIU | SA(0) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG));
+                                       FAIL_IF(push_inst(compiler, OR | S(src1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+                               }
+                       }
+                       FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(src2), DR(dst)));
+               } else {
+                       if (op & SLJIT_SET_C)
+                               FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+                       /* dst may be the same as src1 or src2. */
+                       FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | D(dst), DR(dst)));
+               }
+               if (op & SLJIT_SET_C)
+                       FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+
+               FAIL_IF(push_inst(compiler, ADDU | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst)));
+               if (!(op & SLJIT_SET_C))
+                       return SLJIT_SUCCESS;
+
+               /* Set ULESS_FLAG (dst == 0) && (ULESS_FLAG == 1). */
+               FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG));
+               /* Set carry flag. */
+               return push_inst(compiler, OR | SA(ULESS_FLAG) | TA(OVERFLOW_FLAG) | DA(ULESS_FLAG), ULESS_FLAG);
+
+       case SLJIT_SUB:
+               if ((flags & SRC2_IMM) && ((op & (SLJIT_SET_U | SLJIT_SET_S)) || src2 == SIMM_MIN)) {
+                       FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2)));
+                       src2 = TMP_REG2;
+                       flags &= ~SRC2_IMM;
+               }
+
+               if (flags & SRC2_IMM) {
+                       if (op & SLJIT_SET_O) {
+                               if (src2 >= 0)
+                                       FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+                               else
+                                       FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+                       }
+                       if (op & SLJIT_SET_E)
+                               FAIL_IF(push_inst(compiler, ADDIU | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG));
+                       if (op & (SLJIT_SET_C | SLJIT_SET_O))
+                               FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG));
+                       /* dst may be the same as src1 or src2. */
+                       if (CHECK_FLAGS(SLJIT_SET_E))
+                               FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(-src2), DR(dst)));
+               }
+               else {
+                       if (op & SLJIT_SET_O)
+                               FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+                       if (op & SLJIT_SET_E)
+                               FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
+                       if (op & (SLJIT_SET_U | SLJIT_SET_C | SLJIT_SET_O))
+                               FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(ULESS_FLAG), ULESS_FLAG));
+                       if (op & SLJIT_SET_U)
+                               FAIL_IF(push_inst(compiler, SLTU | S(src2) | T(src1) | DA(UGREATER_FLAG), UGREATER_FLAG));
+                       if (op & SLJIT_SET_S) {
+                               FAIL_IF(push_inst(compiler, SLT | S(src1) | T(src2) | DA(LESS_FLAG), LESS_FLAG));
+                               FAIL_IF(push_inst(compiler, SLT | S(src2) | T(src1) | DA(GREATER_FLAG), GREATER_FLAG));
+                       }
+                       /* dst may be the same as src1 or src2. */
+                       if (CHECK_FLAGS(SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_C))
+                               FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | D(dst), DR(dst)));
+               }
+
+               if (!(op & SLJIT_SET_O))
+                       return SLJIT_SUCCESS;
+               FAIL_IF(push_inst(compiler, SLL | TA(ULESS_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1)));
+               FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+               FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+               return push_inst(compiler, SRL | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG);
+
+       case SLJIT_SUBC:
+               if ((flags & SRC2_IMM) && src2 == SIMM_MIN) {
+                       FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2)));
+                       src2 = TMP_REG2;
+                       flags &= ~SRC2_IMM;
+               }
+
+               if (flags & SRC2_IMM) {
+                       if (op & SLJIT_SET_C)
+                               FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG));
+                       /* dst may be the same as src1 or src2. */
+                       FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(-src2), DR(dst)));
+               }
+               else {
+                       if (op & SLJIT_SET_C)
+                               FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+                       /* dst may be the same as src1 or src2. */
+                       FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | D(dst), DR(dst)));
+               }
+
+               if (op & SLJIT_SET_C)
+                       FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(LESS_FLAG), LESS_FLAG));
+
+               FAIL_IF(push_inst(compiler, SUBU | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst)));
+               return (op & SLJIT_SET_C) ? push_inst(compiler, OR | SA(OVERFLOW_FLAG) | TA(LESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG) : SLJIT_SUCCESS;
+
+       case SLJIT_MUL:
+               SLJIT_ASSERT(!(flags & SRC2_IMM));
+               if (!(op & SLJIT_SET_O)) {
+#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
+                       return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst));
+#else
+                       FAIL_IF(push_inst(compiler, MULT | S(src1) | T(src2), MOVABLE_INS));
+                       return push_inst(compiler, MFLO | D(dst), DR(dst));
+#endif
+               }
+               FAIL_IF(push_inst(compiler, MULT | S(src1) | T(src2), MOVABLE_INS));
+               FAIL_IF(push_inst(compiler, MFHI | DA(ULESS_FLAG), ULESS_FLAG));
+               FAIL_IF(push_inst(compiler, MFLO | D(dst), DR(dst)));
+               FAIL_IF(push_inst(compiler, SRA | T(dst) | DA(UGREATER_FLAG) | SH_IMM(31), UGREATER_FLAG));
+               return push_inst(compiler, SUBU | SA(ULESS_FLAG) | TA(UGREATER_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG);
+
+       case SLJIT_AND:
+               EMIT_LOGICAL(ANDI, AND);
+               return SLJIT_SUCCESS;
+
+       case SLJIT_OR:
+               EMIT_LOGICAL(ORI, OR);
+               return SLJIT_SUCCESS;
+
+       case SLJIT_XOR:
+               EMIT_LOGICAL(XORI, XOR);
+               return SLJIT_SUCCESS;
+
+       case SLJIT_SHL:
+               EMIT_SHIFT(SLL, SLLV);
+               return SLJIT_SUCCESS;
+
+       case SLJIT_LSHR:
+               EMIT_SHIFT(SRL, SRLV);
+               return SLJIT_SUCCESS;
+
+       case SLJIT_ASHR:
+               EMIT_SHIFT(SRA, SRAV);
+               return SLJIT_SUCCESS;
+       }
+
+       SLJIT_ASSERT_STOP();
+       return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_si emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw init_value)
+{
+       FAIL_IF(push_inst(compiler, LUI | T(dst) | IMM(init_value >> 16), DR(dst)));
+       return push_inst(compiler, ORI | S(dst) | T(dst) | IMM(init_value), DR(dst));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
+{
+       sljit_ins *inst = (sljit_ins*)addr;
+
+       inst[0] = (inst[0] & 0xffff0000) | ((new_addr >> 16) & 0xffff);
+       inst[1] = (inst[1] & 0xffff0000) | (new_addr & 0xffff);
+       SLJIT_CACHE_FLUSH(inst, inst + 2);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
+{
+       sljit_ins *inst = (sljit_ins*)addr;
+
+       inst[0] = (inst[0] & 0xffff0000) | ((new_constant >> 16) & 0xffff);
+       inst[1] = (inst[1] & 0xffff0000) | (new_constant & 0xffff);
+       SLJIT_CACHE_FLUSH(inst, inst + 2);
+}
diff --git a/ext/pcre/pcrelib/sljit/sljitNativeMIPS_64.c b/ext/pcre/pcrelib/sljit/sljitNativeMIPS_64.c
new file mode 100644 (file)
index 0000000..185fb57
--- /dev/null
@@ -0,0 +1,469 @@
+/*
+ *    Stack-less Just-In-Time compiler
+ *
+ *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice, this list of
+ *      conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *      of conditions and the following disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* mips 64-bit arch dependent functions. */
+
+static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si dst_ar, sljit_sw imm)
+{
+       sljit_si shift = 32;
+       sljit_si shift2;
+       sljit_si inv = 0;
+       sljit_ins ins;
+       sljit_uw uimm;
+
+       if (!(imm & ~0xffff))
+               return push_inst(compiler, ORI | SA(0) | TA(dst_ar) | IMM(imm), dst_ar);
+
+       if (imm < 0 && imm >= SIMM_MIN)
+               return push_inst(compiler, ADDIU | SA(0) | TA(dst_ar) | IMM(imm), dst_ar);
+
+       if (imm <= 0x7fffffffl && imm >= -0x80000000l) {
+               FAIL_IF(push_inst(compiler, LUI | TA(dst_ar) | IMM(imm >> 16), dst_ar));
+               return (imm & 0xffff) ? push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(imm), dst_ar) : SLJIT_SUCCESS;
+       }
+
+       /* Zero extended number. */
+       uimm = imm;
+       if (imm < 0) {
+               uimm = ~imm;
+               inv = 1;
+       }
+
+       while (!(uimm & 0xff00000000000000l)) {
+               shift -= 8;
+               uimm <<= 8;
+       }
+
+       if (!(uimm & 0xf000000000000000l)) {
+               shift -= 4;
+               uimm <<= 4;
+       }
+
+       if (!(uimm & 0xc000000000000000l)) {
+               shift -= 2;
+               uimm <<= 2;
+       }
+
+       if ((sljit_sw)uimm < 0) {
+               uimm >>= 1;
+               shift += 1;
+       }
+       SLJIT_ASSERT(((uimm & 0xc000000000000000l) == 0x4000000000000000l) && (shift > 0) && (shift <= 32));
+
+       if (inv)
+               uimm = ~uimm;
+
+       FAIL_IF(push_inst(compiler, LUI | TA(dst_ar) | IMM(uimm >> 48), dst_ar));
+       if (uimm & 0x0000ffff00000000l)
+               FAIL_IF(push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(uimm >> 32), dst_ar));
+
+       imm &= (1l << shift) - 1;
+       if (!(imm & ~0xffff)) {
+               ins = (shift == 32) ? DSLL32 : DSLL;
+               if (shift < 32)
+                       ins |= SH_IMM(shift);
+               FAIL_IF(push_inst(compiler, ins | TA(dst_ar) | DA(dst_ar), dst_ar));
+               return !(imm & 0xffff) ? SLJIT_SUCCESS : push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(imm), dst_ar);
+       }
+
+       /* Double shifts needs to be performed. */
+       uimm <<= 32;
+       shift2 = shift - 16;
+
+       while (!(uimm & 0xf000000000000000l)) {
+               shift2 -= 4;
+               uimm <<= 4;
+       }
+
+       if (!(uimm & 0xc000000000000000l)) {
+               shift2 -= 2;
+               uimm <<= 2;
+       }
+
+       if (!(uimm & 0x8000000000000000l)) {
+               shift2--;
+               uimm <<= 1;
+       }
+
+       SLJIT_ASSERT((uimm & 0x8000000000000000l) && (shift2 > 0) && (shift2 <= 16));
+
+       FAIL_IF(push_inst(compiler, DSLL | TA(dst_ar) | DA(dst_ar) | SH_IMM(shift - shift2), dst_ar));
+       FAIL_IF(push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(uimm >> 48), dst_ar));
+       FAIL_IF(push_inst(compiler, DSLL | TA(dst_ar) | DA(dst_ar) | SH_IMM(shift2), dst_ar));
+
+       imm &= (1l << shift2) - 1;
+       return !(imm & 0xffff) ? SLJIT_SUCCESS : push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(imm), dst_ar);
+}
+
+#define SELECT_OP(a, b) \
+       (!(op & SLJIT_INT_OP) ? a : b)
+
+#define EMIT_LOGICAL(op_imm, op_norm) \
+       if (flags & SRC2_IMM) { \
+               if (op & SLJIT_SET_E) \
+                       FAIL_IF(push_inst(compiler, op_imm | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); \
+               if (CHECK_FLAGS(SLJIT_SET_E)) \
+                       FAIL_IF(push_inst(compiler, op_imm | S(src1) | T(dst) | IMM(src2), DR(dst))); \
+       } \
+       else { \
+               if (op & SLJIT_SET_E) \
+                       FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); \
+               if (CHECK_FLAGS(SLJIT_SET_E)) \
+                       FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | D(dst), DR(dst))); \
+       }
+
+#define EMIT_SHIFT(op_dimm, op_dimm32, op_imm, op_dv, op_v) \
+       if (flags & SRC2_IMM) { \
+               if (src2 >= 32) { \
+                       SLJIT_ASSERT(!(op & SLJIT_INT_OP)); \
+                       ins = op_dimm32; \
+                       src2 -= 32; \
+               } \
+               else \
+                       ins = (op & SLJIT_INT_OP) ? op_imm : op_dimm; \
+               if (op & SLJIT_SET_E) \
+                       FAIL_IF(push_inst(compiler, ins | T(src1) | DA(EQUAL_FLAG) | SH_IMM(src2), EQUAL_FLAG)); \
+               if (CHECK_FLAGS(SLJIT_SET_E)) \
+                       FAIL_IF(push_inst(compiler, ins | T(src1) | D(dst) | SH_IMM(src2), DR(dst))); \
+       } \
+       else { \
+               ins = (op & SLJIT_INT_OP) ? op_v : op_dv; \
+               if (op & SLJIT_SET_E) \
+                       FAIL_IF(push_inst(compiler, ins | S(src2) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); \
+               if (CHECK_FLAGS(SLJIT_SET_E)) \
+                       FAIL_IF(push_inst(compiler, ins | S(src2) | T(src1) | D(dst), DR(dst))); \
+       }
+
+static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags,
+       sljit_si dst, sljit_si src1, sljit_sw src2)
+{
+       sljit_ins ins;
+
+       switch (GET_OPCODE(op)) {
+       case SLJIT_MOV:
+       case SLJIT_MOV_P:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+               if (dst != src2)
+                       return push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src2) | TA(0) | D(dst), DR(dst));
+               return SLJIT_SUCCESS;
+
+       case SLJIT_MOV_UB:
+       case SLJIT_MOV_SB:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+               if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
+                       if (op == SLJIT_MOV_SB) {
+                               FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(24), DR(dst)));
+                               return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(24), DR(dst));
+                       }
+                       return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst));
+               }
+               else if (dst != src2)
+                       SLJIT_ASSERT_STOP();
+               return SLJIT_SUCCESS;
+
+       case SLJIT_MOV_UH:
+       case SLJIT_MOV_SH:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+               if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
+                       if (op == SLJIT_MOV_SH) {
+                               FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(16), DR(dst)));
+                               return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(16), DR(dst));
+                       }
+                       return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst));
+               }
+               else if (dst != src2)
+                       SLJIT_ASSERT_STOP();
+               return SLJIT_SUCCESS;
+
+       case SLJIT_MOV_UI:
+               SLJIT_ASSERT(!(op & SLJIT_INT_OP));
+               FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(0), DR(dst)));
+               return push_inst(compiler, DSRL32 | T(dst) | D(dst) | SH_IMM(0), DR(dst));
+
+       case SLJIT_MOV_SI:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+               return push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(0), DR(dst));
+
+       case SLJIT_NOT:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+               if (op & SLJIT_SET_E)
+                       FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
+               if (CHECK_FLAGS(SLJIT_SET_E))
+                       FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | D(dst), DR(dst)));
+               return SLJIT_SUCCESS;
+
+       case SLJIT_CLZ:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
+               if (op & SLJIT_SET_E)
+                       FAIL_IF(push_inst(compiler, SELECT_OP(DCLZ, CLZ) | S(src2) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
+               if (CHECK_FLAGS(SLJIT_SET_E))
+                       FAIL_IF(push_inst(compiler, SELECT_OP(DCLZ, CLZ) | S(src2) | T(dst) | D(dst), DR(dst)));
+#else
+               if (SLJIT_UNLIKELY(flags & UNUSED_DEST)) {
+                       FAIL_IF(push_inst(compiler, SELECT_OP(DSRL32, SRL) | T(src2) | DA(EQUAL_FLAG) | SH_IMM(31), EQUAL_FLAG));
+                       return push_inst(compiler, XORI | SA(EQUAL_FLAG) | TA(EQUAL_FLAG) | IMM(1), EQUAL_FLAG);
+               }
+               /* Nearly all instructions are unmovable in the following sequence. */
+               FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src2) | TA(0) | D(TMP_REG1), DR(TMP_REG1)));
+               /* Check zero. */
+               FAIL_IF(push_inst(compiler, BEQ | S(TMP_REG1) | TA(0) | IMM(5), UNMOVABLE_INS));
+               FAIL_IF(push_inst(compiler, ORI | SA(0) | T(dst) | IMM((op & SLJIT_INT_OP) ? 32 : 64), UNMOVABLE_INS));
+               FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | T(dst) | IMM(-1), DR(dst)));
+               /* Loop for searching the highest bit. */
+               FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(dst) | T(dst) | IMM(1), DR(dst)));
+               FAIL_IF(push_inst(compiler, BGEZ | S(TMP_REG1) | IMM(-2), UNMOVABLE_INS));
+               FAIL_IF(push_inst(compiler, SELECT_OP(DSLL, SLL) | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(1), UNMOVABLE_INS));
+               if (op & SLJIT_SET_E)
+                       return push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG);
+#endif
+               return SLJIT_SUCCESS;
+
+       case SLJIT_ADD:
+               if (flags & SRC2_IMM) {
+                       if (op & SLJIT_SET_O) {
+                               if (src2 >= 0)
+                                       FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+                               else
+                                       FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+                       }
+                       if (op & SLJIT_SET_E)
+                               FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
+                       if (op & (SLJIT_SET_C | SLJIT_SET_O)) {
+                               if (src2 >= 0)
+                                       FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG));
+                               else {
+                                       FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG));
+                                       FAIL_IF(push_inst(compiler, OR | S(src1) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG));
+                               }
+                       }
+                       /* dst may be the same as src1 or src2. */
+                       if (CHECK_FLAGS(SLJIT_SET_E))
+                               FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(src2), DR(dst)));
+               }
+               else {
+                       if (op & SLJIT_SET_O)
+                               FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+                       if (op & SLJIT_SET_E)
+                               FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
+                       if (op & (SLJIT_SET_C | SLJIT_SET_O))
+                               FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(ULESS_FLAG), ULESS_FLAG));
+                       /* dst may be the same as src1 or src2. */
+                       if (CHECK_FLAGS(SLJIT_SET_E))
+                               FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | D(dst), DR(dst)));
+               }
+
+               /* a + b >= a | b (otherwise, the carry should be set to 1). */
+               if (op & (SLJIT_SET_C | SLJIT_SET_O))
+                       FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG));
+               if (!(op & SLJIT_SET_O))
+                       return SLJIT_SUCCESS;
+               FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | TA(ULESS_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1)));
+               FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+               FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+               return push_inst(compiler, SELECT_OP(DSRL32, SLL) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG);
+
+       case SLJIT_ADDC:
+               if (flags & SRC2_IMM) {
+                       if (op & SLJIT_SET_C) {
+                               if (src2 >= 0)
+                                       FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG));
+                               else {
+                                       FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG));
+                                       FAIL_IF(push_inst(compiler, OR | S(src1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+                               }
+                       }
+                       FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(src2), DR(dst)));
+               } else {
+                       if (op & SLJIT_SET_C)
+                               FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+                       /* dst may be the same as src1 or src2. */
+                       FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | D(dst), DR(dst)));
+               }
+               if (op & SLJIT_SET_C)
+                       FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+
+               FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst)));
+               if (!(op & SLJIT_SET_C))
+                       return SLJIT_SUCCESS;
+
+               /* Set ULESS_FLAG (dst == 0) && (ULESS_FLAG == 1). */
+               FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG));
+               /* Set carry flag. */
+               return push_inst(compiler, OR | SA(ULESS_FLAG) | TA(OVERFLOW_FLAG) | DA(ULESS_FLAG), ULESS_FLAG);
+
+       case SLJIT_SUB:
+               if ((flags & SRC2_IMM) && ((op & (SLJIT_SET_U | SLJIT_SET_S)) || src2 == SIMM_MIN)) {
+                       FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2)));
+                       src2 = TMP_REG2;
+                       flags &= ~SRC2_IMM;
+               }
+
+               if (flags & SRC2_IMM) {
+                       if (op & SLJIT_SET_O) {
+                               if (src2 >= 0)
+                                       FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+                               else
+                                       FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+                       }
+                       if (op & SLJIT_SET_E)
+                               FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG));
+                       if (op & (SLJIT_SET_C | SLJIT_SET_O))
+                               FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG));
+                       /* dst may be the same as src1 or src2. */
+                       if (CHECK_FLAGS(SLJIT_SET_E))
+                               FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(-src2), DR(dst)));
+               }
+               else {
+                       if (op & SLJIT_SET_O)
+                               FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+                       if (op & SLJIT_SET_E)
+                               FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
+                       if (op & (SLJIT_SET_U | SLJIT_SET_C | SLJIT_SET_O))
+                               FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(ULESS_FLAG), ULESS_FLAG));
+                       if (op & SLJIT_SET_U)
+                               FAIL_IF(push_inst(compiler, SLTU | S(src2) | T(src1) | DA(UGREATER_FLAG), UGREATER_FLAG));
+                       if (op & SLJIT_SET_S) {
+                               FAIL_IF(push_inst(compiler, SLT | S(src1) | T(src2) | DA(LESS_FLAG), LESS_FLAG));
+                               FAIL_IF(push_inst(compiler, SLT | S(src2) | T(src1) | DA(GREATER_FLAG), GREATER_FLAG));
+                       }
+                       /* dst may be the same as src1 or src2. */
+                       if (CHECK_FLAGS(SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_C))
+                               FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | D(dst), DR(dst)));
+               }
+
+               if (!(op & SLJIT_SET_O))
+                       return SLJIT_SUCCESS;
+               FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | TA(ULESS_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1)));
+               FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+               FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+               return push_inst(compiler, SELECT_OP(DSRL32, SRL) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG);
+
+       case SLJIT_SUBC:
+               if ((flags & SRC2_IMM) && src2 == SIMM_MIN) {
+                       FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2)));
+                       src2 = TMP_REG2;
+                       flags &= ~SRC2_IMM;
+               }
+
+               if (flags & SRC2_IMM) {
+                       if (op & SLJIT_SET_C)
+                               FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG));
+                       /* dst may be the same as src1 or src2. */
+                       FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(-src2), DR(dst)));
+               }
+               else {
+                       if (op & SLJIT_SET_C)
+                               FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+                       /* dst may be the same as src1 or src2. */
+                       FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | D(dst), DR(dst)));
+               }
+
+               if (op & SLJIT_SET_C)
+                       FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(LESS_FLAG), LESS_FLAG));
+
+               FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst)));
+               return (op & SLJIT_SET_C) ? push_inst(compiler, OR | SA(OVERFLOW_FLAG) | TA(LESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG) : SLJIT_SUCCESS;
+
+       case SLJIT_MUL:
+               SLJIT_ASSERT(!(flags & SRC2_IMM));
+               if (!(op & SLJIT_SET_O)) {
+#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
+                       if (op & SLJIT_INT_OP)
+                               return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst));
+                       FAIL_IF(push_inst(compiler, DMULT | S(src1) | T(src2), MOVABLE_INS));
+                       return push_inst(compiler, MFLO | D(dst), DR(dst));
+#else
+                       FAIL_IF(push_inst(compiler, SELECT_OP(DMULT, MULT) | S(src1) | T(src2), MOVABLE_INS));
+                       return push_inst(compiler, MFLO | D(dst), DR(dst));
+#endif
+               }
+               FAIL_IF(push_inst(compiler, SELECT_OP(DMULT, MULT) | S(src1) | T(src2), MOVABLE_INS));
+               FAIL_IF(push_inst(compiler, MFHI | DA(ULESS_FLAG), ULESS_FLAG));
+               FAIL_IF(push_inst(compiler, MFLO | D(dst), DR(dst)));
+               FAIL_IF(push_inst(compiler, SELECT_OP(DSRA32, SRA) | T(dst) | DA(UGREATER_FLAG) | SH_IMM(31), UGREATER_FLAG));
+               return push_inst(compiler, SELECT_OP(DSUBU, SUBU) | SA(ULESS_FLAG) | TA(UGREATER_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG);
+
+       case SLJIT_AND:
+               EMIT_LOGICAL(ANDI, AND);
+               return SLJIT_SUCCESS;
+
+       case SLJIT_OR:
+               EMIT_LOGICAL(ORI, OR);
+               return SLJIT_SUCCESS;
+
+       case SLJIT_XOR:
+               EMIT_LOGICAL(XORI, XOR);
+               return SLJIT_SUCCESS;
+
+       case SLJIT_SHL:
+               EMIT_SHIFT(DSLL, DSLL32, SLL, DSLLV, SLLV);
+               return SLJIT_SUCCESS;
+
+       case SLJIT_LSHR:
+               EMIT_SHIFT(DSRL, DSRL32, SRL, DSRLV, SRLV);
+               return SLJIT_SUCCESS;
+
+       case SLJIT_ASHR:
+               EMIT_SHIFT(DSRA, DSRA32, SRA, DSRAV, SRAV);
+               return SLJIT_SUCCESS;
+       }
+
+       SLJIT_ASSERT_STOP();
+       return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_si emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw init_value)
+{
+       FAIL_IF(push_inst(compiler, LUI | T(dst) | IMM(init_value >> 48), DR(dst)));
+       FAIL_IF(push_inst(compiler, ORI | S(dst) | T(dst) | IMM(init_value >> 32), DR(dst)));
+       FAIL_IF(push_inst(compiler, DSLL | T(dst) | D(dst) | SH_IMM(16), DR(dst)));
+       FAIL_IF(push_inst(compiler, ORI | S(dst) | T(dst) | IMM(init_value >> 16), DR(dst)));
+       FAIL_IF(push_inst(compiler, DSLL | T(dst) | D(dst) | SH_IMM(16), DR(dst)));
+       return push_inst(compiler, ORI | S(dst) | T(dst) | IMM(init_value), DR(dst));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
+{
+       sljit_ins *inst = (sljit_ins*)addr;
+
+       inst[0] = (inst[0] & 0xffff0000) | ((new_addr >> 48) & 0xffff);
+       inst[1] = (inst[1] & 0xffff0000) | ((new_addr >> 32) & 0xffff);
+       inst[3] = (inst[3] & 0xffff0000) | ((new_addr >> 16) & 0xffff);
+       inst[5] = (inst[5] & 0xffff0000) | (new_addr & 0xffff);
+       SLJIT_CACHE_FLUSH(inst, inst + 6);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
+{
+       sljit_ins *inst = (sljit_ins*)addr;
+
+       inst[0] = (inst[0] & 0xffff0000) | ((new_constant >> 48) & 0xffff);
+       inst[1] = (inst[1] & 0xffff0000) | ((new_constant >> 32) & 0xffff);
+       inst[3] = (inst[3] & 0xffff0000) | ((new_constant >> 16) & 0xffff);
+       inst[5] = (inst[5] & 0xffff0000) | (new_constant & 0xffff);
+       SLJIT_CACHE_FLUSH(inst, inst + 6);
+}
diff --git a/ext/pcre/pcrelib/sljit/sljitNativeMIPS_common.c b/ext/pcre/pcrelib/sljit/sljitNativeMIPS_common.c
new file mode 100644 (file)
index 0000000..3e2c9f0
--- /dev/null
@@ -0,0 +1,2135 @@
+/*
+ *    Stack-less Just-In-Time compiler
+ *
+ *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice, this list of
+ *      conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *      of conditions and the following disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* Latest MIPS architecture. */
+/* Automatically detect SLJIT_MIPS_R1 */
+
+SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
+{
+#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+       return "MIPS32-R1" SLJIT_CPUINFO;
+#else
+       return "MIPS64-R1" SLJIT_CPUINFO;
+#endif
+#else /* SLJIT_MIPS_R1 */
+       return "MIPS III" SLJIT_CPUINFO;
+#endif
+}
+
+/* Length of an instruction word
+   Both for mips-32 and mips-64 */
+typedef sljit_ui sljit_ins;
+
+#define TMP_REG1       (SLJIT_NUMBER_OF_REGISTERS + 2)
+#define TMP_REG2       (SLJIT_NUMBER_OF_REGISTERS + 3)
+#define TMP_REG3       (SLJIT_NUMBER_OF_REGISTERS + 4)
+
+/* For position independent code, t9 must contain the function address. */
+#define PIC_ADDR_REG   TMP_REG2
+
+/* Floating point status register. */
+#define FCSR_REG       31
+/* Return address register. */
+#define RETURN_ADDR_REG        31
+
+/* Flags are kept in volatile registers. */
+#define EQUAL_FLAG     12
+/* And carry flag as well. */
+#define ULESS_FLAG     13
+#define UGREATER_FLAG  14
+#define LESS_FLAG      15
+#define GREATER_FLAG   31
+#define OVERFLOW_FLAG  1
+
+#define TMP_FREG1      (0)
+#define TMP_FREG2      ((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) << 1)
+
+static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
+       0, 2, 5, 6, 7, 8, 9, 10, 11, 24, 23, 22, 21, 20, 19, 18, 17, 16, 29, 3, 25, 4
+};
+
+/* --------------------------------------------------------------------- */
+/*  Instrucion forms                                                     */
+/* --------------------------------------------------------------------- */
+
+#define S(s)           (reg_map[s] << 21)
+#define T(t)           (reg_map[t] << 16)
+#define D(d)           (reg_map[d] << 11)
+/* Absolute registers. */
+#define SA(s)          ((s) << 21)
+#define TA(t)          ((t) << 16)
+#define DA(d)          ((d) << 11)
+#define FT(t)          ((t) << 16)
+#define FS(s)          ((s) << 11)
+#define FD(d)          ((d) << 6)
+#define IMM(imm)       ((imm) & 0xffff)
+#define SH_IMM(imm)    ((imm) << 6)
+
+#define DR(dr)         (reg_map[dr])
+#define HI(opcode)     ((opcode) << 26)
+#define LO(opcode)     (opcode)
+/* S = (16 << 21) D = (17 << 21) */
+#define FMT_S          (16 << 21)
+
+#define ABS_S          (HI(17) | FMT_S | LO(5))
+#define ADD_S          (HI(17) | FMT_S | LO(0))
+#define ADDIU          (HI(9))
+#define ADDU           (HI(0) | LO(33))
+#define AND            (HI(0) | LO(36))
+#define ANDI           (HI(12))
+#define B              (HI(4))
+#define BAL            (HI(1) | (17 << 16))
+#define BC1F           (HI(17) | (8 << 21))
+#define BC1T           (HI(17) | (8 << 21) | (1 << 16))
+#define BEQ            (HI(4))
+#define BGEZ           (HI(1) | (1 << 16))
+#define BGTZ           (HI(7))
+#define BLEZ           (HI(6))
+#define BLTZ           (HI(1) | (0 << 16))
+#define BNE            (HI(5))
+#define BREAK          (HI(0) | LO(13))
+#define CFC1           (HI(17) | (2 << 21))
+#define C_UN_S         (HI(17) | FMT_S | LO(49))
+#define C_UEQ_S                (HI(17) | FMT_S | LO(51))
+#define C_ULE_S                (HI(17) | FMT_S | LO(55))
+#define C_ULT_S                (HI(17) | FMT_S | LO(53))
+#define CVT_S_S                (HI(17) | FMT_S | LO(32))
+#define DADDIU         (HI(25))
+#define DADDU          (HI(0) | LO(45))
+#define DDIV           (HI(0) | LO(30))
+#define DDIVU          (HI(0) | LO(31))
+#define DIV            (HI(0) | LO(26))
+#define DIVU           (HI(0) | LO(27))
+#define DIV_S          (HI(17) | FMT_S | LO(3))
+#define DMULT          (HI(0) | LO(28))
+#define DMULTU         (HI(0) | LO(29))
+#define DSLL           (HI(0) | LO(56))
+#define DSLL32         (HI(0) | LO(60))
+#define DSLLV          (HI(0) | LO(20))
+#define DSRA           (HI(0) | LO(59))
+#define DSRA32         (HI(0) | LO(63))
+#define DSRAV          (HI(0) | LO(23))
+#define DSRL           (HI(0) | LO(58))
+#define DSRL32         (HI(0) | LO(62))
+#define DSRLV          (HI(0) | LO(22))
+#define DSUBU          (HI(0) | LO(47))
+#define J              (HI(2))
+#define JAL            (HI(3))
+#define JALR           (HI(0) | LO(9))
+#define JR             (HI(0) | LO(8))
+#define LD             (HI(55))
+#define LUI            (HI(15))
+#define LW             (HI(35))
+#define MFC1           (HI(17))
+#define MFHI           (HI(0) | LO(16))
+#define MFLO           (HI(0) | LO(18))
+#define MOV_S          (HI(17) | FMT_S | LO(6))
+#define MTC1           (HI(17) | (4 << 21))
+#define MUL_S          (HI(17) | FMT_S | LO(2))
+#define MULT           (HI(0) | LO(24))
+#define MULTU          (HI(0) | LO(25))
+#define NEG_S          (HI(17) | FMT_S | LO(7))
+#define NOP            (HI(0) | LO(0))
+#define NOR            (HI(0) | LO(39))
+#define OR             (HI(0) | LO(37))
+#define ORI            (HI(13))
+#define SD             (HI(63))
+#define SLT            (HI(0) | LO(42))
+#define SLTI           (HI(10))
+#define SLTIU          (HI(11))
+#define SLTU           (HI(0) | LO(43))
+#define SLL            (HI(0) | LO(0))
+#define SLLV           (HI(0) | LO(4))
+#define SRL            (HI(0) | LO(2))
+#define SRLV           (HI(0) | LO(6))
+#define SRA            (HI(0) | LO(3))
+#define SRAV           (HI(0) | LO(7))
+#define SUB_S          (HI(17) | FMT_S | LO(1))
+#define SUBU           (HI(0) | LO(35))
+#define SW             (HI(43))
+#define TRUNC_W_S      (HI(17) | FMT_S | LO(13))
+#define XOR            (HI(0) | LO(38))
+#define XORI           (HI(14))
+
+#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
+#define CLZ            (HI(28) | LO(32))
+#define DCLZ           (HI(28) | LO(36))
+#define MUL            (HI(28) | LO(2))
+#define SEB            (HI(31) | (16 << 6) | LO(32))
+#define SEH            (HI(31) | (24 << 6) | LO(32))
+#endif
+
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+#define ADDU_W         ADDU
+#define ADDIU_W                ADDIU
+#define SLL_W          SLL
+#define SUBU_W         SUBU
+#else
+#define ADDU_W         DADDU
+#define ADDIU_W                DADDIU
+#define SLL_W          DSLL
+#define SUBU_W         DSUBU
+#endif
+
+#define SIMM_MAX       (0x7fff)
+#define SIMM_MIN       (-0x8000)
+#define UIMM_MAX       (0xffff)
+
+/* dest_reg is the absolute name of the register
+   Useful for reordering instructions in the delay slot. */
+static sljit_si push_inst(struct sljit_compiler *compiler, sljit_ins ins, sljit_si delay_slot)
+{
+       SLJIT_ASSERT(delay_slot == MOVABLE_INS || delay_slot >= UNMOVABLE_INS
+               || delay_slot == ((ins >> 11) & 0x1f) || delay_slot == ((ins >> 16) & 0x1f));
+       sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
+       FAIL_IF(!ptr);
+       *ptr = ins;
+       compiler->size++;
+       compiler->delay_slot = delay_slot;
+       return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_ins invert_branch(sljit_si flags)
+{
+       return (flags & IS_BIT26_COND) ? (1 << 26) : (1 << 16);
+}
+
+static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code)
+{
+       sljit_sw diff;
+       sljit_uw target_addr;
+       sljit_ins *inst;
+       sljit_ins saved_inst;
+
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+       if (jump->flags & (SLJIT_REWRITABLE_JUMP | IS_CALL))
+               return code_ptr;
+#else
+       if (jump->flags & SLJIT_REWRITABLE_JUMP)
+               return code_ptr;
+#endif
+
+       if (jump->flags & JUMP_ADDR)
+               target_addr = jump->u.target;
+       else {
+               SLJIT_ASSERT(jump->flags & JUMP_LABEL);
+               target_addr = (sljit_uw)(code + jump->u.label->size);
+       }
+       inst = (sljit_ins*)jump->addr;
+       if (jump->flags & IS_COND)
+               inst--;
+
+#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+       if (jump->flags & IS_CALL)
+               goto keep_address;
+#endif
+
+       /* B instructions. */
+       if (jump->flags & IS_MOVABLE) {
+               diff = ((sljit_sw)target_addr - (sljit_sw)(inst)) >> 2;
+               if (diff <= SIMM_MAX && diff >= SIMM_MIN) {
+                       jump->flags |= PATCH_B;
+
+                       if (!(jump->flags & IS_COND)) {
+                               inst[0] = inst[-1];
+                               inst[-1] = (jump->flags & IS_JAL) ? BAL : B;
+                               jump->addr -= sizeof(sljit_ins);
+                               return inst;
+                       }
+                       saved_inst = inst[0];
+                       inst[0] = inst[-1];
+                       inst[-1] = saved_inst ^ invert_branch(jump->flags);
+                       jump->addr -= 2 * sizeof(sljit_ins);
+                       return inst;
+               }
+       }
+       else {
+               diff = ((sljit_sw)target_addr - (sljit_sw)(inst + 1)) >> 2;
+               if (diff <= SIMM_MAX && diff >= SIMM_MIN) {
+                       jump->flags |= PATCH_B;
+
+                       if (!(jump->flags & IS_COND)) {
+                               inst[0] = (jump->flags & IS_JAL) ? BAL : B;
+                               inst[1] = NOP;
+                               return inst + 1;
+                       }
+                       inst[0] = inst[0] ^ invert_branch(jump->flags);
+                       inst[1] = NOP;
+                       jump->addr -= sizeof(sljit_ins);
+                       return inst + 1;
+               }
+       }
+
+       if (jump->flags & IS_COND) {
+               if ((jump->flags & IS_MOVABLE) && (target_addr & ~0xfffffff) == ((jump->addr + 2 * sizeof(sljit_ins)) & ~0xfffffff)) {
+                       jump->flags |= PATCH_J;
+                       saved_inst = inst[0];
+                       inst[0] = inst[-1];
+                       inst[-1] = (saved_inst & 0xffff0000) | 3;
+                       inst[1] = J;
+                       inst[2] = NOP;
+                       return inst + 2;
+               }
+               else if ((target_addr & ~0xfffffff) == ((jump->addr + 3 * sizeof(sljit_ins)) & ~0xfffffff)) {
+                       jump->flags |= PATCH_J;
+                       inst[0] = (inst[0] & 0xffff0000) | 3;
+                       inst[1] = NOP;
+                       inst[2] = J;
+                       inst[3] = NOP;
+                       jump->addr += sizeof(sljit_ins);
+                       return inst + 3;
+               }
+       }
+       else {
+               /* J instuctions. */
+               if ((jump->flags & IS_MOVABLE) && (target_addr & ~0xfffffff) == (jump->addr & ~0xfffffff)) {
+                       jump->flags |= PATCH_J;
+                       inst[0] = inst[-1];
+                       inst[-1] = (jump->flags & IS_JAL) ? JAL : J;
+                       jump->addr -= sizeof(sljit_ins);
+                       return inst;
+               }
+
+               if ((target_addr & ~0xfffffff) == ((jump->addr + sizeof(sljit_ins)) & ~0xfffffff)) {
+                       jump->flags |= PATCH_J;
+                       inst[0] = (jump->flags & IS_JAL) ? JAL : J;
+                       inst[1] = NOP;
+                       return inst + 1;
+               }
+       }
+
+#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+keep_address:
+       if (target_addr <= 0x7fffffff) {
+               jump->flags |= PATCH_ABS32;
+               if (jump->flags & IS_COND) {
+                       inst[0] -= 4;
+                       inst++;
+               }
+               inst[2] = inst[6];
+               inst[3] = inst[7];
+               return inst + 3;
+       }
+       if (target_addr <= 0x7fffffffffffl) {
+               jump->flags |= PATCH_ABS48;
+               if (jump->flags & IS_COND) {
+                       inst[0] -= 2;
+                       inst++;
+               }
+               inst[4] = inst[6];
+               inst[5] = inst[7];
+               return inst + 5;
+       }
+#endif
+
+       return code_ptr;
+}
+
+#ifdef __GNUC__
+static __attribute__ ((noinline)) void sljit_cache_flush(void* code, void* code_ptr)
+{
+       SLJIT_CACHE_FLUSH(code, code_ptr);
+}
+#endif
+
+SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
+{
+       struct sljit_memory_fragment *buf;
+       sljit_ins *code;
+       sljit_ins *code_ptr;
+       sljit_ins *buf_ptr;
+       sljit_ins *buf_end;
+       sljit_uw word_count;
+       sljit_uw addr;
+
+       struct sljit_label *label;
+       struct sljit_jump *jump;
+       struct sljit_const *const_;
+
+       CHECK_ERROR_PTR();
+       CHECK_PTR(check_sljit_generate_code(compiler));
+       reverse_buf(compiler);
+
+       code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins));
+       PTR_FAIL_WITH_EXEC_IF(code);
+       buf = compiler->buf;
+
+       code_ptr = code;
+       word_count = 0;
+       label = compiler->labels;
+       jump = compiler->jumps;
+       const_ = compiler->consts;
+       do {
+               buf_ptr = (sljit_ins*)buf->memory;
+               buf_end = buf_ptr + (buf->used_size >> 2);
+               do {
+                       *code_ptr = *buf_ptr++;
+                       SLJIT_ASSERT(!label || label->size >= word_count);
+                       SLJIT_ASSERT(!jump || jump->addr >= word_count);
+                       SLJIT_ASSERT(!const_ || const_->addr >= word_count);
+                       /* These structures are ordered by their address. */
+                       if (label && label->size == word_count) {
+                               /* Just recording the address. */
+                               label->addr = (sljit_uw)code_ptr;
+                               label->size = code_ptr - code;
+                               label = label->next;
+                       }
+                       if (jump && jump->addr == word_count) {
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+                               jump->addr = (sljit_uw)(code_ptr - 3);
+#else
+                               jump->addr = (sljit_uw)(code_ptr - 7);
+#endif
+                               code_ptr = detect_jump_type(jump, code_ptr, code);
+                               jump = jump->next;
+                       }
+                       if (const_ && const_->addr == word_count) {
+                               /* Just recording the address. */
+                               const_->addr = (sljit_uw)code_ptr;
+                               const_ = const_->next;
+                       }
+                       code_ptr ++;
+                       word_count ++;
+               } while (buf_ptr < buf_end);
+
+               buf = buf->next;
+       } while (buf);
+
+       if (label && label->size == word_count) {
+               label->addr = (sljit_uw)code_ptr;
+               label->size = code_ptr - code;
+               label = label->next;
+       }
+
+       SLJIT_ASSERT(!label);
+       SLJIT_ASSERT(!jump);
+       SLJIT_ASSERT(!const_);
+       SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
+
+       jump = compiler->jumps;
+       while (jump) {
+               do {
+                       addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
+                       buf_ptr = (sljit_ins*)jump->addr;
+
+                       if (jump->flags & PATCH_B) {
+                               addr = (sljit_sw)(addr - (jump->addr + sizeof(sljit_ins))) >> 2;
+                               SLJIT_ASSERT((sljit_sw)addr <= SIMM_MAX && (sljit_sw)addr >= SIMM_MIN);
+                               buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | (addr & 0xffff);
+                               break;
+                       }
+                       if (jump->flags & PATCH_J) {
+                               SLJIT_ASSERT((addr & ~0xfffffff) == ((jump->addr + sizeof(sljit_ins)) & ~0xfffffff));
+                               buf_ptr[0] |= (addr >> 2) & 0x03ffffff;
+                               break;
+                       }
+
+                       /* Set the fields of immediate loads. */
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+                       buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff);
+                       buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff);
+#else
+                       if (jump->flags & PATCH_ABS32) {
+                               SLJIT_ASSERT(addr <= 0x7fffffff);
+                               buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff);
+                               buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff);
+                       }
+                       else if (jump->flags & PATCH_ABS48) {
+                               SLJIT_ASSERT(addr <= 0x7fffffffffffl);
+                               buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 32) & 0xffff);
+                               buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 16) & 0xffff);
+                               buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | (addr & 0xffff);
+                       }
+                       else {
+                               buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 48) & 0xffff);
+                               buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 32) & 0xffff);
+                               buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | ((addr >> 16) & 0xffff);
+                               buf_ptr[5] = (buf_ptr[5] & 0xffff0000) | (addr & 0xffff);
+                       }
+#endif
+               } while (0);
+               jump = jump->next;
+       }
+
+       compiler->error = SLJIT_ERR_COMPILED;
+       compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
+#ifndef __GNUC__
+       SLJIT_CACHE_FLUSH(code, code_ptr);
+#else
+       /* GCC workaround for invalid code generation with -O2. */
+       sljit_cache_flush(code, code_ptr);
+#endif
+       return code;
+}
+
+/* --------------------------------------------------------------------- */
+/*  Entry, exit                                                          */
+/* --------------------------------------------------------------------- */
+
+/* Creates an index in data_transfer_insts array. */
+#define LOAD_DATA      0x01
+#define WORD_DATA      0x00
+#define BYTE_DATA      0x02
+#define HALF_DATA      0x04
+#define INT_DATA       0x06
+#define SIGNED_DATA    0x08
+/* Separates integer and floating point registers */
+#define GPR_REG                0x0f
+#define DOUBLE_DATA    0x10
+#define SINGLE_DATA    0x12
+
+#define MEM_MASK       0x1f
+
+#define WRITE_BACK     0x00020
+#define ARG_TEST       0x00040
+#define ALT_KEEP_CACHE 0x00080
+#define CUMULATIVE_OP  0x00100
+#define LOGICAL_OP     0x00200
+#define IMM_OP         0x00400
+#define SRC2_IMM       0x00800
+
+#define UNUSED_DEST    0x01000
+#define REG_DEST       0x02000
+#define REG1_SOURCE    0x04000
+#define REG2_SOURCE    0x08000
+#define SLOW_SRC1      0x10000
+#define SLOW_SRC2      0x20000
+#define SLOW_DEST      0x40000
+
+/* Only these flags are set. UNUSED_DEST is not set when no flags should be set. */
+#define CHECK_FLAGS(list) \
+       (!(flags & UNUSED_DEST) || (op & GET_FLAGS(~(list))))
+
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+#define STACK_STORE    SW
+#define STACK_LOAD     LW
+#else
+#define STACK_STORE    SD
+#define STACK_LOAD     LD
+#endif
+
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+#include "sljitNativeMIPS_32.c"
+#else
+#include "sljitNativeMIPS_64.c"
+#endif
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler,
+       sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds,
+       sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
+{
+       sljit_ins base;
+       sljit_si i, tmp, offs;
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
+       set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
+
+       local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET;
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+       local_size = (local_size + 15) & ~0xf;
+#else
+       local_size = (local_size + 31) & ~0x1f;
+#endif
+       compiler->local_size = local_size;
+
+       if (local_size <= SIMM_MAX) {
+               /* Frequent case. */
+               FAIL_IF(push_inst(compiler, ADDIU_W | S(SLJIT_SP) | T(SLJIT_SP) | IMM(-local_size), DR(SLJIT_SP)));
+               base = S(SLJIT_SP);
+       }
+       else {
+               FAIL_IF(load_immediate(compiler, DR(TMP_REG1), local_size));
+               FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SP) | TA(0) | D(TMP_REG2), DR(TMP_REG2)));
+               FAIL_IF(push_inst(compiler, SUBU_W | S(SLJIT_SP) | T(TMP_REG1) | D(SLJIT_SP), DR(SLJIT_SP)));
+               base = S(TMP_REG2);
+               local_size = 0;
+       }
+
+       offs = local_size - (sljit_sw)(sizeof(sljit_sw));
+       FAIL_IF(push_inst(compiler, STACK_STORE | base | TA(RETURN_ADDR_REG) | IMM(offs), MOVABLE_INS));
+
+       tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
+       for (i = SLJIT_S0; i >= tmp; i--) {
+               offs -= (sljit_si)(sizeof(sljit_sw));
+               FAIL_IF(push_inst(compiler, STACK_STORE | base | T(i) | IMM(offs), MOVABLE_INS));
+       }
+
+       for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
+               offs -= (sljit_si)(sizeof(sljit_sw));
+               FAIL_IF(push_inst(compiler, STACK_STORE | base | T(i) | IMM(offs), MOVABLE_INS));
+       }
+
+       if (args >= 1)
+               FAIL_IF(push_inst(compiler, ADDU_W | SA(4) | TA(0) | D(SLJIT_S0), DR(SLJIT_S0)));
+       if (args >= 2)
+               FAIL_IF(push_inst(compiler, ADDU_W | SA(5) | TA(0) | D(SLJIT_S1), DR(SLJIT_S1)));
+       if (args >= 3)
+               FAIL_IF(push_inst(compiler, ADDU_W | SA(6) | TA(0) | D(SLJIT_S2), DR(SLJIT_S2)));
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compiler,
+       sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds,
+       sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
+{
+       CHECK_ERROR();
+       CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
+       set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
+
+       local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET;
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+       compiler->local_size = (local_size + 15) & ~0xf;
+#else
+       compiler->local_size = (local_size + 31) & ~0x1f;
+#endif
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
+{
+       sljit_si local_size, i, tmp, offs;
+       sljit_ins base;
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_return(compiler, op, src, srcw));
+
+       FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
+
+       local_size = compiler->local_size;
+       if (local_size <= SIMM_MAX)
+               base = S(SLJIT_SP);
+       else {
+               FAIL_IF(load_immediate(compiler, DR(TMP_REG1), local_size));
+               FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SP) | T(TMP_REG1) | D(TMP_REG1), DR(TMP_REG1)));
+               base = S(TMP_REG1);
+               local_size = 0;
+       }
+
+       FAIL_IF(push_inst(compiler, STACK_LOAD | base | TA(RETURN_ADDR_REG) | IMM(local_size - (sljit_si)sizeof(sljit_sw)), RETURN_ADDR_REG));
+       offs = local_size - (sljit_si)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1);
+
+       tmp = compiler->scratches;
+       for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) {
+               FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(i) | IMM(offs), DR(i)));
+               offs += (sljit_si)(sizeof(sljit_sw));
+       }
+
+       tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
+       for (i = tmp; i <= SLJIT_S0; i++) {
+               FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(i) | IMM(offs), DR(i)));
+               offs += (sljit_si)(sizeof(sljit_sw));
+       }
+
+       SLJIT_ASSERT(offs == local_size - (sljit_sw)(sizeof(sljit_sw)));
+
+       FAIL_IF(push_inst(compiler, JR | SA(RETURN_ADDR_REG), UNMOVABLE_INS));
+       if (compiler->local_size <= SIMM_MAX)
+               return push_inst(compiler, ADDIU_W | S(SLJIT_SP) | T(SLJIT_SP) | IMM(compiler->local_size), UNMOVABLE_INS);
+       else
+               return push_inst(compiler, ADDU_W | S(TMP_REG1) | TA(0) | D(SLJIT_SP), UNMOVABLE_INS);
+}
+
+#undef STACK_STORE
+#undef STACK_LOAD
+
+/* --------------------------------------------------------------------- */
+/*  Operators                                                            */
+/* --------------------------------------------------------------------- */
+
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+#define ARCH_32_64(a, b)       a
+#else
+#define ARCH_32_64(a, b)       b
+#endif
+
+static SLJIT_CONST sljit_ins data_transfer_insts[16 + 4] = {
+/* u w s */ ARCH_32_64(HI(43) /* sw */, HI(63) /* sd */),
+/* u w l */ ARCH_32_64(HI(35) /* lw */, HI(55) /* ld */),
+/* u b s */ HI(40) /* sb */,
+/* u b l */ HI(36) /* lbu */,
+/* u h s */ HI(41) /* sh */,
+/* u h l */ HI(37) /* lhu */,
+/* u i s */ HI(43) /* sw */,
+/* u i l */ ARCH_32_64(HI(35) /* lw */, HI(39) /* lwu */),
+
+/* s w s */ ARCH_32_64(HI(43) /* sw */, HI(63) /* sd */),
+/* s w l */ ARCH_32_64(HI(35) /* lw */, HI(55) /* ld */),
+/* s b s */ HI(40) /* sb */,
+/* s b l */ HI(32) /* lb */,
+/* s h s */ HI(41) /* sh */,
+/* s h l */ HI(33) /* lh */,
+/* s i s */ HI(43) /* sw */,
+/* s i l */ HI(35) /* lw */,
+
+/* d   s */ HI(61) /* sdc1 */,
+/* d   l */ HI(53) /* ldc1 */,
+/* s   s */ HI(57) /* swc1 */,
+/* s   l */ HI(49) /* lwc1 */,
+};
+
+#undef ARCH_32_64
+
+/* reg_ar is an absoulute register! */
+
+/* Can perform an operation using at most 1 instruction. */
+static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg_ar, sljit_si arg, sljit_sw argw)
+{
+       SLJIT_ASSERT(arg & SLJIT_MEM);
+
+       if ((!(flags & WRITE_BACK) || !(arg & REG_MASK)) && !(arg & OFFS_REG_MASK) && argw <= SIMM_MAX && argw >= SIMM_MIN) {
+               /* Works for both absoulte and relative addresses. */
+               if (SLJIT_UNLIKELY(flags & ARG_TEST))
+                       return 1;
+               FAIL_IF(push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(arg & REG_MASK)
+                       | TA(reg_ar) | IMM(argw), ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) ? reg_ar : MOVABLE_INS));
+               return -1;
+       }
+       return 0;
+}
+
+/* See getput_arg below.
+   Note: can_cache is called only for binary operators. Those
+   operators always uses word arguments without write back. */
+static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw)
+{
+       SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM));
+
+       /* Simple operation except for updates. */
+       if (arg & OFFS_REG_MASK) {
+               argw &= 0x3;
+               next_argw &= 0x3;
+               if (argw && argw == next_argw && (arg == next_arg || (arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK)))
+                       return 1;
+               return 0;
+       }
+
+       if (arg == next_arg) {
+               if (((next_argw - argw) <= SIMM_MAX && (next_argw - argw) >= SIMM_MIN))
+                       return 1;
+               return 0;
+       }
+
+       return 0;
+}
+
+/* Emit the necessary instructions. See can_cache above. */
+static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg_ar, sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw)
+{
+       sljit_si tmp_ar, base, delay_slot;
+
+       SLJIT_ASSERT(arg & SLJIT_MEM);
+       if (!(next_arg & SLJIT_MEM)) {
+               next_arg = 0;
+               next_argw = 0;
+       }
+
+       if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) {
+               tmp_ar = reg_ar;
+               delay_slot = reg_ar;
+       } else {
+               tmp_ar = DR(TMP_REG1);
+               delay_slot = MOVABLE_INS;
+       }
+       base = arg & REG_MASK;
+
+       if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
+               argw &= 0x3;
+               if ((flags & WRITE_BACK) && reg_ar == DR(base)) {
+                       SLJIT_ASSERT(!(flags & LOAD_DATA) && DR(TMP_REG1) != reg_ar);
+                       FAIL_IF(push_inst(compiler, ADDU_W | SA(reg_ar) | TA(0) | D(TMP_REG1), DR(TMP_REG1)));
+                       reg_ar = DR(TMP_REG1);
+               }
+
+               /* Using the cache. */
+               if (argw == compiler->cache_argw) {
+                       if (!(flags & WRITE_BACK)) {
+                               if (arg == compiler->cache_arg)
+                                       return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot);
+                               if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) {
+                                       if (arg == next_arg && argw == (next_argw & 0x3)) {
+                                               compiler->cache_arg = arg;
+                                               compiler->cache_argw = argw;
+                                               FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | D(TMP_REG3), DR(TMP_REG3)));
+                                               return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot);
+                                       }
+                                       FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | DA(tmp_ar), tmp_ar));
+                                       return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot);
+                               }
+                       }
+                       else {
+                               if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) {
+                                       FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | D(base), DR(base)));
+                                       return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar), delay_slot);
+                               }
+                       }
+               }
+
+               if (SLJIT_UNLIKELY(argw)) {
+                       compiler->cache_arg = SLJIT_MEM | (arg & OFFS_REG_MASK);
+                       compiler->cache_argw = argw;
+                       FAIL_IF(push_inst(compiler, SLL_W | T(OFFS_REG(arg)) | D(TMP_REG3) | SH_IMM(argw), DR(TMP_REG3)));
+               }
+
+               if (!(flags & WRITE_BACK)) {
+                       if (arg == next_arg && argw == (next_argw & 0x3)) {
+                               compiler->cache_arg = arg;
+                               compiler->cache_argw = argw;
+                               FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(!argw ? OFFS_REG(arg) : TMP_REG3) | D(TMP_REG3), DR(TMP_REG3)));
+                               tmp_ar = DR(TMP_REG3);
+                       }
+                       else
+                               FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(!argw ? OFFS_REG(arg) : TMP_REG3) | DA(tmp_ar), tmp_ar));
+                       return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot);
+               }
+               FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(!argw ? OFFS_REG(arg) : TMP_REG3) | D(base), DR(base)));
+               return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar), delay_slot);
+       }
+
+       if (SLJIT_UNLIKELY(flags & WRITE_BACK) && base) {
+               /* Update only applies if a base register exists. */
+               if (reg_ar == DR(base)) {
+                       SLJIT_ASSERT(!(flags & LOAD_DATA) && DR(TMP_REG1) != reg_ar);
+                       if (argw <= SIMM_MAX && argw >= SIMM_MIN) {
+                               FAIL_IF(push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar) | IMM(argw), MOVABLE_INS));
+                               if (argw)
+                                       return push_inst(compiler, ADDIU_W | S(base) | T(base) | IMM(argw), DR(base));
+                               return SLJIT_SUCCESS;
+                       }
+                       FAIL_IF(push_inst(compiler, ADDU_W | SA(reg_ar) | TA(0) | D(TMP_REG1), DR(TMP_REG1)));
+                       reg_ar = DR(TMP_REG1);
+               }
+
+               if (argw <= SIMM_MAX && argw >= SIMM_MIN) {
+                       if (argw)
+                               FAIL_IF(push_inst(compiler, ADDIU_W | S(base) | T(base) | IMM(argw), DR(base)));
+               }
+               else {
+                       if (compiler->cache_arg == SLJIT_MEM && argw - compiler->cache_argw <= SIMM_MAX && argw - compiler->cache_argw >= SIMM_MIN) {
+                               if (argw != compiler->cache_argw) {
+                                       FAIL_IF(push_inst(compiler, ADDIU_W | S(TMP_REG3) | T(TMP_REG3) | IMM(argw - compiler->cache_argw), DR(TMP_REG3)));
+                                       compiler->cache_argw = argw;
+                               }
+                               FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | D(base), DR(base)));
+                       }
+                       else {
+                               compiler->cache_arg = SLJIT_MEM;
+                               compiler->cache_argw = argw;
+                               FAIL_IF(load_immediate(compiler, DR(TMP_REG3), argw));
+                               FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | D(base), DR(base)));
+                       }
+               }
+               return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar), delay_slot);
+       }
+
+       if (compiler->cache_arg == arg && argw - compiler->cache_argw <= SIMM_MAX && argw - compiler->cache_argw >= SIMM_MIN) {
+               if (argw != compiler->cache_argw) {
+                       FAIL_IF(push_inst(compiler, ADDIU_W | S(TMP_REG3) | T(TMP_REG3) | IMM(argw - compiler->cache_argw), DR(TMP_REG3)));
+                       compiler->cache_argw = argw;
+               }
+               return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot);
+       }
+
+       if (compiler->cache_arg == SLJIT_MEM && argw - compiler->cache_argw <= SIMM_MAX && argw - compiler->cache_argw >= SIMM_MIN) {
+               if (argw != compiler->cache_argw)
+                       FAIL_IF(push_inst(compiler, ADDIU_W | S(TMP_REG3) | T(TMP_REG3) | IMM(argw - compiler->cache_argw), DR(TMP_REG3)));
+       }
+       else {
+               compiler->cache_arg = SLJIT_MEM;
+               FAIL_IF(load_immediate(compiler, DR(TMP_REG3), argw));
+       }
+       compiler->cache_argw = argw;
+
+       if (!base)
+               return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot);
+
+       if (arg == next_arg && next_argw - argw <= SIMM_MAX && next_argw - argw >= SIMM_MIN) {
+               compiler->cache_arg = arg;
+               FAIL_IF(push_inst(compiler, ADDU_W | S(TMP_REG3) | T(base) | D(TMP_REG3), DR(TMP_REG3)));
+               return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot);
+       }
+
+       FAIL_IF(push_inst(compiler, ADDU_W | S(TMP_REG3) | T(base) | DA(tmp_ar), tmp_ar));
+       return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot);
+}
+
+static SLJIT_INLINE sljit_si emit_op_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg_ar, sljit_si arg, sljit_sw argw)
+{
+       if (getput_arg_fast(compiler, flags, reg_ar, arg, argw))
+               return compiler->error;
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+       return getput_arg(compiler, flags, reg_ar, arg, argw, 0, 0);
+}
+
+static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg1, sljit_sw arg1w, sljit_si arg2, sljit_sw arg2w)
+{
+       if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
+               return compiler->error;
+       return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
+}
+
+static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       /* arg1 goes to TMP_REG1 or src reg
+          arg2 goes to TMP_REG2, imm or src reg
+          TMP_REG3 can be used for caching
+          result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */
+       sljit_si dst_r = TMP_REG2;
+       sljit_si src1_r;
+       sljit_sw src2_r = 0;
+       sljit_si sugg_src2_r = TMP_REG2;
+
+       if (!(flags & ALT_KEEP_CACHE)) {
+               compiler->cache_arg = 0;
+               compiler->cache_argw = 0;
+       }
+
+       if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
+               if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI && !(src2 & SLJIT_MEM))
+                       return SLJIT_SUCCESS;
+               if (GET_FLAGS(op))
+                       flags |= UNUSED_DEST;
+       }
+       else if (FAST_IS_REG(dst)) {
+               dst_r = dst;
+               flags |= REG_DEST;
+               if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI)
+                       sugg_src2_r = dst_r;
+       }
+       else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, DR(TMP_REG1), dst, dstw))
+               flags |= SLOW_DEST;
+
+       if (flags & IMM_OP) {
+               if ((src2 & SLJIT_IMM) && src2w) {
+                       if ((!(flags & LOGICAL_OP) && (src2w <= SIMM_MAX && src2w >= SIMM_MIN))
+                               || ((flags & LOGICAL_OP) && !(src2w & ~UIMM_MAX))) {
+                               flags |= SRC2_IMM;
+                               src2_r = src2w;
+                       }
+               }
+               if (!(flags & SRC2_IMM) && (flags & CUMULATIVE_OP) && (src1 & SLJIT_IMM) && src1w) {
+                       if ((!(flags & LOGICAL_OP) && (src1w <= SIMM_MAX && src1w >= SIMM_MIN))
+                               || ((flags & LOGICAL_OP) && !(src1w & ~UIMM_MAX))) {
+                               flags |= SRC2_IMM;
+                               src2_r = src1w;
+
+                               /* And swap arguments. */
+                               src1 = src2;
+                               src1w = src2w;
+                               src2 = SLJIT_IMM;
+                               /* src2w = src2_r unneeded. */
+                       }
+               }
+       }
+
+       /* Source 1. */
+       if (FAST_IS_REG(src1)) {
+               src1_r = src1;
+               flags |= REG1_SOURCE;
+       }
+       else if (src1 & SLJIT_IMM) {
+               if (src1w) {
+                       FAIL_IF(load_immediate(compiler, DR(TMP_REG1), src1w));
+                       src1_r = TMP_REG1;
+               }
+               else
+                       src1_r = 0;
+       }
+       else {
+               if (getput_arg_fast(compiler, flags | LOAD_DATA, DR(TMP_REG1), src1, src1w))
+                       FAIL_IF(compiler->error);
+               else
+                       flags |= SLOW_SRC1;
+               src1_r = TMP_REG1;
+       }
+
+       /* Source 2. */
+       if (FAST_IS_REG(src2)) {
+               src2_r = src2;
+               flags |= REG2_SOURCE;
+               if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_SI)
+                       dst_r = src2_r;
+       }
+       else if (src2 & SLJIT_IMM) {
+               if (!(flags & SRC2_IMM)) {
+                       if (src2w) {
+                               FAIL_IF(load_immediate(compiler, DR(sugg_src2_r), src2w));
+                               src2_r = sugg_src2_r;
+                       }
+                       else {
+                               src2_r = 0;
+                               if ((op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) && (dst & SLJIT_MEM))
+                                       dst_r = 0;
+                       }
+               }
+       }
+       else {
+               if (getput_arg_fast(compiler, flags | LOAD_DATA, DR(sugg_src2_r), src2, src2w))
+                       FAIL_IF(compiler->error);
+               else
+                       flags |= SLOW_SRC2;
+               src2_r = sugg_src2_r;
+       }
+
+       if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
+               SLJIT_ASSERT(src2_r == TMP_REG2);
+               if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
+                       FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG2), src2, src2w, src1, src1w));
+                       FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG1), src1, src1w, dst, dstw));
+               }
+               else {
+                       FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG1), src1, src1w, src2, src2w));
+                       FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG2), src2, src2w, dst, dstw));
+               }
+       }
+       else if (flags & SLOW_SRC1)
+               FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG1), src1, src1w, dst, dstw));
+       else if (flags & SLOW_SRC2)
+               FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(sugg_src2_r), src2, src2w, dst, dstw));
+
+       FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));
+
+       if (dst & SLJIT_MEM) {
+               if (!(flags & SLOW_DEST)) {
+                       getput_arg_fast(compiler, flags, DR(dst_r), dst, dstw);
+                       return compiler->error;
+               }
+               return getput_arg(compiler, flags, DR(dst_r), dst, dstw, 0, 0);
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op)
+{
+#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+       sljit_si int_op = op & SLJIT_INT_OP;
+#endif
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_op0(compiler, op));
+
+       op = GET_OPCODE(op);
+       switch (op) {
+       case SLJIT_BREAKPOINT:
+               return push_inst(compiler, BREAK, UNMOVABLE_INS);
+       case SLJIT_NOP:
+               return push_inst(compiler, NOP, UNMOVABLE_INS);
+       case SLJIT_LUMUL:
+       case SLJIT_LSMUL:
+#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+               FAIL_IF(push_inst(compiler, (op == SLJIT_LUMUL ? DMULTU : DMULT) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS));
+#else
+               FAIL_IF(push_inst(compiler, (op == SLJIT_LUMUL ? MULTU : MULT) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS));
+#endif
+               FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_R0), DR(SLJIT_R0)));
+               return push_inst(compiler, MFHI | D(SLJIT_R1), DR(SLJIT_R1));
+       case SLJIT_LUDIV:
+       case SLJIT_LSDIV:
+#if !(defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
+               FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
+               FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
+#endif
+
+#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+               if (int_op)
+                       FAIL_IF(push_inst(compiler, (op == SLJIT_LUDIV ? DIVU : DIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS));
+               else
+                       FAIL_IF(push_inst(compiler, (op == SLJIT_LUDIV ? DDIVU : DDIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS));
+#else
+               FAIL_IF(push_inst(compiler, (op == SLJIT_LUDIV ? DIVU : DIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS));
+#endif
+
+               FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_R0), DR(SLJIT_R0)));
+               return push_inst(compiler, MFHI | D(SLJIT_R1), DR(SLJIT_R1));
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+#      define flags 0
+#else
+       sljit_si flags = 0;
+#endif
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+       if ((op & SLJIT_INT_OP) && GET_OPCODE(op) >= SLJIT_NOT) {
+               flags |= INT_DATA | SIGNED_DATA;
+               if (src & SLJIT_IMM)
+                       srcw = (sljit_si)srcw;
+       }
+#endif
+
+       switch (GET_OPCODE(op)) {
+       case SLJIT_MOV:
+       case SLJIT_MOV_P:
+               return emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
+
+       case SLJIT_MOV_UI:
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+               return emit_op(compiler, SLJIT_MOV_UI, INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
+#else
+               return emit_op(compiler, SLJIT_MOV_UI, INT_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ui)srcw : srcw);
+#endif
+
+       case SLJIT_MOV_SI:
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+               return emit_op(compiler, SLJIT_MOV_SI, INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
+#else
+               return emit_op(compiler, SLJIT_MOV_SI, INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_si)srcw : srcw);
+#endif
+
+       case SLJIT_MOV_UB:
+               return emit_op(compiler, SLJIT_MOV_UB, BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ub)srcw : srcw);
+
+       case SLJIT_MOV_SB:
+               return emit_op(compiler, SLJIT_MOV_SB, BYTE_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sb)srcw : srcw);
+
+       case SLJIT_MOV_UH:
+               return emit_op(compiler, SLJIT_MOV_UH, HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_uh)srcw : srcw);
+
+       case SLJIT_MOV_SH:
+               return emit_op(compiler, SLJIT_MOV_SH, HALF_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sh)srcw : srcw);
+
+       case SLJIT_MOVU:
+       case SLJIT_MOVU_P:
+               return emit_op(compiler, SLJIT_MOV, WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
+
+       case SLJIT_MOVU_UI:
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+               return emit_op(compiler, SLJIT_MOV_UI, INT_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
+#else
+               return emit_op(compiler, SLJIT_MOV_UI, INT_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ui)srcw : srcw);
+#endif
+
+       case SLJIT_MOVU_SI:
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+               return emit_op(compiler, SLJIT_MOV_SI, INT_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
+#else
+               return emit_op(compiler, SLJIT_MOV_SI, INT_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_si)srcw : srcw);
+#endif
+
+       case SLJIT_MOVU_UB:
+               return emit_op(compiler, SLJIT_MOV_UB, BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ub)srcw : srcw);
+
+       case SLJIT_MOVU_SB:
+               return emit_op(compiler, SLJIT_MOV_SB, BYTE_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sb)srcw : srcw);
+
+       case SLJIT_MOVU_UH:
+               return emit_op(compiler, SLJIT_MOV_UH, HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_uh)srcw : srcw);
+
+       case SLJIT_MOVU_SH:
+               return emit_op(compiler, SLJIT_MOV_SH, HALF_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sh)srcw : srcw);
+
+       case SLJIT_NOT:
+               return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw);
+
+       case SLJIT_NEG:
+               return emit_op(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), flags | IMM_OP, dst, dstw, SLJIT_IMM, 0, src, srcw);
+
+       case SLJIT_CLZ:
+               return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw);
+       }
+
+       return SLJIT_SUCCESS;
+
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+#      undef flags
+#endif
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+#      define flags 0
+#else
+       sljit_si flags = 0;
+#endif
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+       ADJUST_LOCAL_OFFSET(src1, src1w);
+       ADJUST_LOCAL_OFFSET(src2, src2w);
+
+#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+       if (op & SLJIT_INT_OP) {
+               flags |= INT_DATA | SIGNED_DATA;
+               if (src1 & SLJIT_IMM)
+                       src1w = (sljit_si)src1w;
+               if (src2 & SLJIT_IMM)
+                       src2w = (sljit_si)src2w;
+       }
+#endif
+
+       switch (GET_OPCODE(op)) {
+       case SLJIT_ADD:
+       case SLJIT_ADDC:
+               return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
+
+       case SLJIT_SUB:
+       case SLJIT_SUBC:
+               return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
+
+       case SLJIT_MUL:
+               return emit_op(compiler, op, flags | CUMULATIVE_OP, dst, dstw, src1, src1w, src2, src2w);
+
+       case SLJIT_AND:
+       case SLJIT_OR:
+       case SLJIT_XOR:
+               return emit_op(compiler, op, flags | CUMULATIVE_OP | LOGICAL_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
+
+       case SLJIT_SHL:
+       case SLJIT_LSHR:
+       case SLJIT_ASHR:
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+               if (src2 & SLJIT_IMM)
+                       src2w &= 0x1f;
+#else
+               if (src2 & SLJIT_IMM) {
+                       if (op & SLJIT_INT_OP)
+                               src2w &= 0x1f;
+                       else
+                               src2w &= 0x3f;
+               }
+#endif
+               return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
+       }
+
+       return SLJIT_SUCCESS;
+
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+#      undef flags
+#endif
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
+{
+       CHECK_REG_INDEX(check_sljit_get_register_index(reg));
+       return reg_map[reg];
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg)
+{
+       CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
+       return reg << 1;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
+       void *instruction, sljit_si size)
+{
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
+
+       return push_inst(compiler, *(sljit_ins*)instruction, UNMOVABLE_INS);
+}
+
+/* --------------------------------------------------------------------- */
+/*  Floating point operators                                             */
+/* --------------------------------------------------------------------- */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
+{
+#ifdef SLJIT_IS_FPU_AVAILABLE
+       return SLJIT_IS_FPU_AVAILABLE;
+#elif defined(__GNUC__)
+       sljit_sw fir;
+       asm ("cfc1 %0, $0" : "=r"(fir));
+       return (fir >> 22) & 0x1;
+#else
+#error "FIR check is not implemented for this architecture"
+#endif
+}
+
+#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_SINGLE_OP) >> 7))
+#define FMT(op) (((op & SLJIT_SINGLE_OP) ^ SLJIT_SINGLE_OP) << (21 - 8))
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+#      define flags 0
+#else
+       sljit_si flags = (GET_OPCODE(op) == SLJIT_CONVW_FROMD) << 21;
+#endif
+
+       if (src & SLJIT_MEM) {
+               FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
+               src = TMP_FREG1;
+       }
+       else
+               src <<= 1;
+
+       FAIL_IF(push_inst(compiler, (TRUNC_W_S ^ (flags >> 19)) | FMT(op) | FS(src) | FD(TMP_FREG1), MOVABLE_INS));
+
+       if (dst == SLJIT_UNUSED)
+               return SLJIT_SUCCESS;
+
+       if (FAST_IS_REG(dst))
+               return push_inst(compiler, MFC1 | flags | T(dst) | FS(TMP_FREG1), MOVABLE_INS);
+
+       /* Store the integer value from a VFP register. */
+       return emit_op_mem2(compiler, flags ? DOUBLE_DATA : SINGLE_DATA, TMP_FREG1, dst, dstw, 0, 0);
+
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+#      undef is_long
+#endif
+}
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+#      define flags 0
+#else
+       sljit_si flags = (GET_OPCODE(op) == SLJIT_CONVD_FROMW) << 21;
+#endif
+
+       sljit_si dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1;
+
+       if (FAST_IS_REG(src))
+               FAIL_IF(push_inst(compiler, MTC1 | flags | T(src) | FS(TMP_FREG1), MOVABLE_INS));
+       else if (src & SLJIT_MEM) {
+               /* Load the integer value into a VFP register. */
+               FAIL_IF(emit_op_mem2(compiler, ((flags) ? DOUBLE_DATA : SINGLE_DATA) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
+       }
+       else {
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+               if (GET_OPCODE(op) == SLJIT_CONVD_FROMI)
+                       srcw = (sljit_si)srcw;
+#endif
+               FAIL_IF(load_immediate(compiler, DR(TMP_REG1), srcw));
+               FAIL_IF(push_inst(compiler, MTC1 | flags | T(TMP_REG1) | FS(TMP_FREG1), MOVABLE_INS));
+       }
+
+       FAIL_IF(push_inst(compiler, CVT_S_S | flags | (4 << 21) | (((op & SLJIT_SINGLE_OP) ^ SLJIT_SINGLE_OP) >> 8) | FS(TMP_FREG1) | FD(dst_r), MOVABLE_INS));
+
+       if (dst & SLJIT_MEM)
+               return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
+       return SLJIT_SUCCESS;
+
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+#      undef flags
+#endif
+}
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       if (src1 & SLJIT_MEM) {
+               FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
+               src1 = TMP_FREG1;
+       }
+       else
+               src1 <<= 1;
+
+       if (src2 & SLJIT_MEM) {
+               FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0));
+               src2 = TMP_FREG2;
+       }
+       else
+               src2 <<= 1;
+
+       /* src2 and src1 are swapped. */
+       if (op & SLJIT_SET_E) {
+               FAIL_IF(push_inst(compiler, C_UEQ_S | FMT(op) | FT(src2) | FS(src1), UNMOVABLE_INS));
+               FAIL_IF(push_inst(compiler, CFC1 | TA(EQUAL_FLAG) | DA(FCSR_REG), EQUAL_FLAG));
+               FAIL_IF(push_inst(compiler, SRL | TA(EQUAL_FLAG) | DA(EQUAL_FLAG) | SH_IMM(23), EQUAL_FLAG));
+               FAIL_IF(push_inst(compiler, ANDI | SA(EQUAL_FLAG) | TA(EQUAL_FLAG) | IMM(1), EQUAL_FLAG));
+       }
+       if (op & SLJIT_SET_S) {
+               /* Mixing the instructions for the two checks. */
+               FAIL_IF(push_inst(compiler, C_ULT_S | FMT(op) | FT(src2) | FS(src1), UNMOVABLE_INS));
+               FAIL_IF(push_inst(compiler, CFC1 | TA(ULESS_FLAG) | DA(FCSR_REG), ULESS_FLAG));
+               FAIL_IF(push_inst(compiler, C_ULT_S | FMT(op) | FT(src1) | FS(src2), UNMOVABLE_INS));
+               FAIL_IF(push_inst(compiler, SRL | TA(ULESS_FLAG) | DA(ULESS_FLAG) | SH_IMM(23), ULESS_FLAG));
+               FAIL_IF(push_inst(compiler, ANDI | SA(ULESS_FLAG) | TA(ULESS_FLAG) | IMM(1), ULESS_FLAG));
+               FAIL_IF(push_inst(compiler, CFC1 | TA(UGREATER_FLAG) | DA(FCSR_REG), UGREATER_FLAG));
+               FAIL_IF(push_inst(compiler, SRL | TA(UGREATER_FLAG) | DA(UGREATER_FLAG) | SH_IMM(23), UGREATER_FLAG));
+               FAIL_IF(push_inst(compiler, ANDI | SA(UGREATER_FLAG) | TA(UGREATER_FLAG) | IMM(1), UGREATER_FLAG));
+       }
+       return push_inst(compiler, C_UN_S | FMT(op) | FT(src2) | FS(src1), FCSR_FCC);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       sljit_si dst_r;
+
+       CHECK_ERROR();
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+
+       SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error);
+       SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
+
+       if (GET_OPCODE(op) == SLJIT_CONVD_FROMS)
+               op ^= SLJIT_SINGLE_OP;
+
+       dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1;
+
+       if (src & SLJIT_MEM) {
+               FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw));
+               src = dst_r;
+       }
+       else
+               src <<= 1;
+
+       switch (GET_OPCODE(op)) {
+       case SLJIT_DMOV:
+               if (src != dst_r) {
+                       if (dst_r != TMP_FREG1)
+                               FAIL_IF(push_inst(compiler, MOV_S | FMT(op) | FS(src) | FD(dst_r), MOVABLE_INS));
+                       else
+                               dst_r = src;
+               }
+               break;
+       case SLJIT_DNEG:
+               FAIL_IF(push_inst(compiler, NEG_S | FMT(op) | FS(src) | FD(dst_r), MOVABLE_INS));
+               break;
+       case SLJIT_DABS:
+               FAIL_IF(push_inst(compiler, ABS_S | FMT(op) | FS(src) | FD(dst_r), MOVABLE_INS));
+               break;
+       case SLJIT_CONVD_FROMS:
+               FAIL_IF(push_inst(compiler, CVT_S_S | ((op & SLJIT_SINGLE_OP) ? 1 : (1 << 21)) | FS(src) | FD(dst_r), MOVABLE_INS));
+               op ^= SLJIT_SINGLE_OP;
+               break;
+       }
+
+       if (dst & SLJIT_MEM)
+               return emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0);
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       sljit_si dst_r, flags = 0;
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+       ADJUST_LOCAL_OFFSET(src1, src1w);
+       ADJUST_LOCAL_OFFSET(src2, src2w);
+
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+
+       dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG2;
+
+       if (src1 & SLJIT_MEM) {
+               if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) {
+                       FAIL_IF(compiler->error);
+                       src1 = TMP_FREG1;
+               } else
+                       flags |= SLOW_SRC1;
+       }
+       else
+               src1 <<= 1;
+
+       if (src2 & SLJIT_MEM) {
+               if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) {
+                       FAIL_IF(compiler->error);
+                       src2 = TMP_FREG2;
+               } else
+                       flags |= SLOW_SRC2;
+       }
+       else
+               src2 <<= 1;
+
+       if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
+               if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
+                       FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w));
+                       FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
+               }
+               else {
+                       FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
+                       FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
+               }
+       }
+       else if (flags & SLOW_SRC1)
+               FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
+       else if (flags & SLOW_SRC2)
+               FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
+
+       if (flags & SLOW_SRC1)
+               src1 = TMP_FREG1;
+       if (flags & SLOW_SRC2)
+               src2 = TMP_FREG2;
+
+       switch (GET_OPCODE(op)) {
+       case SLJIT_DADD:
+               FAIL_IF(push_inst(compiler, ADD_S | FMT(op) | FT(src2) | FS(src1) | FD(dst_r), MOVABLE_INS));
+               break;
+
+       case SLJIT_DSUB:
+               FAIL_IF(push_inst(compiler, SUB_S | FMT(op) | FT(src2) | FS(src1) | FD(dst_r), MOVABLE_INS));
+               break;
+
+       case SLJIT_DMUL:
+               FAIL_IF(push_inst(compiler, MUL_S | FMT(op) | FT(src2) | FS(src1) | FD(dst_r), MOVABLE_INS));
+               break;
+
+       case SLJIT_DDIV:
+               FAIL_IF(push_inst(compiler, DIV_S | FMT(op) | FT(src2) | FS(src1) | FD(dst_r), MOVABLE_INS));
+               break;
+       }
+
+       if (dst_r == TMP_FREG2)
+               FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0));
+
+       return SLJIT_SUCCESS;
+}
+
+/* --------------------------------------------------------------------- */
+/*  Other instructions                                                   */
+/* --------------------------------------------------------------------- */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw)
+{
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+
+       /* For UNUSED dst. Uncommon, but possible. */
+       if (dst == SLJIT_UNUSED)
+               return SLJIT_SUCCESS;
+
+       if (FAST_IS_REG(dst))
+               return push_inst(compiler, ADDU_W | SA(RETURN_ADDR_REG) | TA(0) | D(dst), DR(dst));
+
+       /* Memory. */
+       return emit_op_mem(compiler, WORD_DATA, RETURN_ADDR_REG, dst, dstw);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw)
+{
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       if (FAST_IS_REG(src))
+               FAIL_IF(push_inst(compiler, ADDU_W | S(src) | TA(0) | DA(RETURN_ADDR_REG), RETURN_ADDR_REG));
+       else if (src & SLJIT_MEM)
+               FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RETURN_ADDR_REG, src, srcw));
+       else if (src & SLJIT_IMM)
+               FAIL_IF(load_immediate(compiler, RETURN_ADDR_REG, srcw));
+
+       FAIL_IF(push_inst(compiler, JR | SA(RETURN_ADDR_REG), UNMOVABLE_INS));
+       return push_inst(compiler, NOP, UNMOVABLE_INS);
+}
+
+/* --------------------------------------------------------------------- */
+/*  Conditional instructions                                             */
+/* --------------------------------------------------------------------- */
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
+{
+       struct sljit_label *label;
+
+       CHECK_ERROR_PTR();
+       CHECK_PTR(check_sljit_emit_label(compiler));
+
+       if (compiler->last_label && compiler->last_label->size == compiler->size)
+               return compiler->last_label;
+
+       label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
+       PTR_FAIL_IF(!label);
+       set_label(label, compiler);
+       compiler->delay_slot = UNMOVABLE_INS;
+       return label;
+}
+
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+#define JUMP_LENGTH    4
+#else
+#define JUMP_LENGTH    8
+#endif
+
+#define BR_Z(src) \
+       inst = BEQ | SA(src) | TA(0) | JUMP_LENGTH; \
+       flags = IS_BIT26_COND; \
+       delay_check = src;
+
+#define BR_NZ(src) \
+       inst = BNE | SA(src) | TA(0) | JUMP_LENGTH; \
+       flags = IS_BIT26_COND; \
+       delay_check = src;
+
+#define BR_T() \
+       inst = BC1T | JUMP_LENGTH; \
+       flags = IS_BIT16_COND; \
+       delay_check = FCSR_FCC;
+
+#define BR_F() \
+       inst = BC1F | JUMP_LENGTH; \
+       flags = IS_BIT16_COND; \
+       delay_check = FCSR_FCC;
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type)
+{
+       struct sljit_jump *jump;
+       sljit_ins inst;
+       sljit_si flags = 0;
+       sljit_si delay_check = UNMOVABLE_INS;
+
+       CHECK_ERROR_PTR();
+       CHECK_PTR(check_sljit_emit_jump(compiler, type));
+
+       jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
+       PTR_FAIL_IF(!jump);
+       set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
+       type &= 0xff;
+
+       switch (type) {
+       case SLJIT_EQUAL:
+       case SLJIT_D_NOT_EQUAL:
+               BR_NZ(EQUAL_FLAG);
+               break;
+       case SLJIT_NOT_EQUAL:
+       case SLJIT_D_EQUAL:
+               BR_Z(EQUAL_FLAG);
+               break;
+       case SLJIT_LESS:
+       case SLJIT_D_LESS:
+               BR_Z(ULESS_FLAG);
+               break;
+       case SLJIT_GREATER_EQUAL:
+       case SLJIT_D_GREATER_EQUAL:
+               BR_NZ(ULESS_FLAG);
+               break;
+       case SLJIT_GREATER:
+       case SLJIT_D_GREATER:
+               BR_Z(UGREATER_FLAG);
+               break;
+       case SLJIT_LESS_EQUAL:
+       case SLJIT_D_LESS_EQUAL:
+               BR_NZ(UGREATER_FLAG);
+               break;
+       case SLJIT_SIG_LESS:
+               BR_Z(LESS_FLAG);
+               break;
+       case SLJIT_SIG_GREATER_EQUAL:
+               BR_NZ(LESS_FLAG);
+               break;
+       case SLJIT_SIG_GREATER:
+               BR_Z(GREATER_FLAG);
+               break;
+       case SLJIT_SIG_LESS_EQUAL:
+               BR_NZ(GREATER_FLAG);
+               break;
+       case SLJIT_OVERFLOW:
+       case SLJIT_MUL_OVERFLOW:
+               BR_Z(OVERFLOW_FLAG);
+               break;
+       case SLJIT_NOT_OVERFLOW:
+       case SLJIT_MUL_NOT_OVERFLOW:
+               BR_NZ(OVERFLOW_FLAG);
+               break;
+       case SLJIT_D_UNORDERED:
+               BR_F();
+               break;
+       case SLJIT_D_ORDERED:
+               BR_T();
+               break;
+       default:
+               /* Not conditional branch. */
+               inst = 0;
+               break;
+       }
+
+       jump->flags |= flags;
+       if (compiler->delay_slot == MOVABLE_INS || (compiler->delay_slot != UNMOVABLE_INS && compiler->delay_slot != delay_check))
+               jump->flags |= IS_MOVABLE;
+
+       if (inst)
+               PTR_FAIL_IF(push_inst(compiler, inst, UNMOVABLE_INS));
+
+       PTR_FAIL_IF(emit_const(compiler, TMP_REG2, 0));
+       if (type <= SLJIT_JUMP) {
+               PTR_FAIL_IF(push_inst(compiler, JR | S(TMP_REG2), UNMOVABLE_INS));
+               jump->addr = compiler->size;
+               PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
+       } else {
+               SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2);
+               /* Cannot be optimized out if type is >= CALL0. */
+               jump->flags |= IS_JAL | (type >= SLJIT_CALL0 ? IS_CALL : 0);
+               PTR_FAIL_IF(push_inst(compiler, JALR | S(TMP_REG2) | DA(RETURN_ADDR_REG), UNMOVABLE_INS));
+               jump->addr = compiler->size;
+               /* A NOP if type < CALL1. */
+               PTR_FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_R0) | TA(0) | DA(4), UNMOVABLE_INS));
+       }
+       return jump;
+}
+
+#define RESOLVE_IMM1() \
+       if (src1 & SLJIT_IMM) { \
+               if (src1w) { \
+                       PTR_FAIL_IF(load_immediate(compiler, DR(TMP_REG1), src1w)); \
+                       src1 = TMP_REG1; \
+               } \
+               else \
+                       src1 = 0; \
+       }
+
+#define RESOLVE_IMM2() \
+       if (src2 & SLJIT_IMM) { \
+               if (src2w) { \
+                       PTR_FAIL_IF(load_immediate(compiler, DR(TMP_REG2), src2w)); \
+                       src2 = TMP_REG2; \
+               } \
+               else \
+                       src2 = 0; \
+       }
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_si type,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       struct sljit_jump *jump;
+       sljit_si flags;
+       sljit_ins inst;
+
+       CHECK_ERROR_PTR();
+       CHECK_PTR(check_sljit_emit_cmp(compiler, type, src1, src1w, src2, src2w));
+       ADJUST_LOCAL_OFFSET(src1, src1w);
+       ADJUST_LOCAL_OFFSET(src2, src2w);
+
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+       flags = ((type & SLJIT_INT_OP) ? INT_DATA : WORD_DATA) | LOAD_DATA;
+       if (src1 & SLJIT_MEM) {
+               PTR_FAIL_IF(emit_op_mem2(compiler, flags, DR(TMP_REG1), src1, src1w, src2, src2w));
+               src1 = TMP_REG1;
+       }
+       if (src2 & SLJIT_MEM) {
+               PTR_FAIL_IF(emit_op_mem2(compiler, flags, DR(TMP_REG2), src2, src2w, 0, 0));
+               src2 = TMP_REG2;
+       }
+
+       jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
+       PTR_FAIL_IF(!jump);
+       set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
+       type &= 0xff;
+
+       if (type <= SLJIT_NOT_EQUAL) {
+               RESOLVE_IMM1();
+               RESOLVE_IMM2();
+               jump->flags |= IS_BIT26_COND;
+               if (compiler->delay_slot == MOVABLE_INS || (compiler->delay_slot != UNMOVABLE_INS && compiler->delay_slot != DR(src1) && compiler->delay_slot != DR(src2)))
+                       jump->flags |= IS_MOVABLE;
+               PTR_FAIL_IF(push_inst(compiler, (type == SLJIT_EQUAL ? BNE : BEQ) | S(src1) | T(src2) | JUMP_LENGTH, UNMOVABLE_INS));
+       }
+       else if (type >= SLJIT_SIG_LESS && (((src1 & SLJIT_IMM) && (src1w == 0)) || ((src2 & SLJIT_IMM) && (src2w == 0)))) {
+               inst = NOP;
+               if ((src1 & SLJIT_IMM) && (src1w == 0)) {
+                       RESOLVE_IMM2();
+                       switch (type) {
+                       case SLJIT_SIG_LESS:
+                               inst = BLEZ;
+                               jump->flags |= IS_BIT26_COND;
+                               break;
+                       case SLJIT_SIG_GREATER_EQUAL:
+                               inst = BGTZ;
+                               jump->flags |= IS_BIT26_COND;
+                               break;
+                       case SLJIT_SIG_GREATER:
+                               inst = BGEZ;
+                               jump->flags |= IS_BIT16_COND;
+                               break;
+                       case SLJIT_SIG_LESS_EQUAL:
+                               inst = BLTZ;
+                               jump->flags |= IS_BIT16_COND;
+                               break;
+                       }
+                       src1 = src2;
+               }
+               else {
+                       RESOLVE_IMM1();
+                       switch (type) {
+                       case SLJIT_SIG_LESS:
+                               inst = BGEZ;
+                               jump->flags |= IS_BIT16_COND;
+                               break;
+                       case SLJIT_SIG_GREATER_EQUAL:
+                               inst = BLTZ;
+                               jump->flags |= IS_BIT16_COND;
+                               break;
+                       case SLJIT_SIG_GREATER:
+                               inst = BLEZ;
+                               jump->flags |= IS_BIT26_COND;
+                               break;
+                       case SLJIT_SIG_LESS_EQUAL:
+                               inst = BGTZ;
+                               jump->flags |= IS_BIT26_COND;
+                               break;
+                       }
+               }
+               PTR_FAIL_IF(push_inst(compiler, inst | S(src1) | JUMP_LENGTH, UNMOVABLE_INS));
+       }
+       else {
+               if (type == SLJIT_LESS || type == SLJIT_GREATER_EQUAL || type == SLJIT_SIG_LESS || type == SLJIT_SIG_GREATER_EQUAL) {
+                       RESOLVE_IMM1();
+                       if ((src2 & SLJIT_IMM) && src2w <= SIMM_MAX && src2w >= SIMM_MIN)
+                               PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_LESS_EQUAL ? SLTIU : SLTI) | S(src1) | T(TMP_REG1) | IMM(src2w), DR(TMP_REG1)));
+                       else {
+                               RESOLVE_IMM2();
+                               PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_LESS_EQUAL ? SLTU : SLT) | S(src1) | T(src2) | D(TMP_REG1), DR(TMP_REG1)));
+                       }
+                       type = (type == SLJIT_LESS || type == SLJIT_SIG_LESS) ? SLJIT_NOT_EQUAL : SLJIT_EQUAL;
+               }
+               else {
+                       RESOLVE_IMM2();
+                       if ((src1 & SLJIT_IMM) && src1w <= SIMM_MAX && src1w >= SIMM_MIN)
+                               PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_LESS_EQUAL ? SLTIU : SLTI) | S(src2) | T(TMP_REG1) | IMM(src1w), DR(TMP_REG1)));
+                       else {
+                               RESOLVE_IMM1();
+                               PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_LESS_EQUAL ? SLTU : SLT) | S(src2) | T(src1) | D(TMP_REG1), DR(TMP_REG1)));
+                       }
+                       type = (type == SLJIT_GREATER || type == SLJIT_SIG_GREATER) ? SLJIT_NOT_EQUAL : SLJIT_EQUAL;
+               }
+
+               jump->flags |= IS_BIT26_COND;
+               PTR_FAIL_IF(push_inst(compiler, (type == SLJIT_EQUAL ? BNE : BEQ) | S(TMP_REG1) | TA(0) | JUMP_LENGTH, UNMOVABLE_INS));
+       }
+
+       PTR_FAIL_IF(emit_const(compiler, TMP_REG2, 0));
+       PTR_FAIL_IF(push_inst(compiler, JR | S(TMP_REG2), UNMOVABLE_INS));
+       jump->addr = compiler->size;
+       PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
+       return jump;
+}
+
+#undef RESOLVE_IMM1
+#undef RESOLVE_IMM2
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_si type,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       struct sljit_jump *jump;
+       sljit_ins inst;
+       sljit_si if_true;
+
+       CHECK_ERROR_PTR();
+       CHECK_PTR(check_sljit_emit_fcmp(compiler, type, src1, src1w, src2, src2w));
+
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+
+       if (src1 & SLJIT_MEM) {
+               PTR_FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(type) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
+               src1 = TMP_FREG1;
+       }
+       else
+               src1 <<= 1;
+
+       if (src2 & SLJIT_MEM) {
+               PTR_FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(type) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0));
+               src2 = TMP_FREG2;
+       }
+       else
+               src2 <<= 1;
+
+       jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
+       PTR_FAIL_IF(!jump);
+       set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
+       jump->flags |= IS_BIT16_COND;
+
+       switch (type & 0xff) {
+       case SLJIT_D_EQUAL:
+               inst = C_UEQ_S;
+               if_true = 1;
+               break;
+       case SLJIT_D_NOT_EQUAL:
+               inst = C_UEQ_S;
+               if_true = 0;
+               break;
+       case SLJIT_D_LESS:
+               inst = C_ULT_S;
+               if_true = 1;
+               break;
+       case SLJIT_D_GREATER_EQUAL:
+               inst = C_ULT_S;
+               if_true = 0;
+               break;
+       case SLJIT_D_GREATER:
+               inst = C_ULE_S;
+               if_true = 0;
+               break;
+       case SLJIT_D_LESS_EQUAL:
+               inst = C_ULE_S;
+               if_true = 1;
+               break;
+       case SLJIT_D_UNORDERED:
+               inst = C_UN_S;
+               if_true = 1;
+               break;
+       default: /* Make compilers happy. */
+               SLJIT_ASSERT_STOP();
+       case SLJIT_D_ORDERED:
+               inst = C_UN_S;
+               if_true = 0;
+               break;
+       }
+
+       PTR_FAIL_IF(push_inst(compiler, inst | FMT(type) | FT(src2) | FS(src1), UNMOVABLE_INS));
+       /* Intentionally the other opcode. */
+       PTR_FAIL_IF(push_inst(compiler, (if_true ? BC1F : BC1T) | JUMP_LENGTH, UNMOVABLE_INS));
+       PTR_FAIL_IF(emit_const(compiler, TMP_REG2, 0));
+       PTR_FAIL_IF(push_inst(compiler, JR | S(TMP_REG2), UNMOVABLE_INS));
+       jump->addr = compiler->size;
+       PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
+       return jump;
+}
+
+#undef JUMP_LENGTH
+#undef BR_Z
+#undef BR_NZ
+#undef BR_T
+#undef BR_F
+
+#undef FLOAT_DATA
+#undef FMT
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw)
+{
+       sljit_si src_r = TMP_REG2;
+       struct sljit_jump *jump = NULL;
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       if (FAST_IS_REG(src)) {
+               if (DR(src) != 4)
+                       src_r = src;
+               else
+                       FAIL_IF(push_inst(compiler, ADDU_W | S(src) | TA(0) | D(TMP_REG2), DR(TMP_REG2)));
+       }
+
+       if (type >= SLJIT_CALL0) {
+               SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2);
+               if (src & (SLJIT_IMM | SLJIT_MEM)) {
+                       if (src & SLJIT_IMM)
+                               FAIL_IF(load_immediate(compiler, DR(PIC_ADDR_REG), srcw));
+                       else {
+                               SLJIT_ASSERT(src_r == TMP_REG2 && (src & SLJIT_MEM));
+                               FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw));
+                       }
+                       FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS));
+                       /* We need an extra instruction in any case. */
+                       return push_inst(compiler, ADDU_W | S(SLJIT_R0) | TA(0) | DA(4), UNMOVABLE_INS);
+               }
+
+               /* Register input. */
+               if (type >= SLJIT_CALL1)
+                       FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_R0) | TA(0) | DA(4), 4));
+               FAIL_IF(push_inst(compiler, JALR | S(src_r) | DA(RETURN_ADDR_REG), UNMOVABLE_INS));
+               return push_inst(compiler, ADDU_W | S(src_r) | TA(0) | D(PIC_ADDR_REG), UNMOVABLE_INS);
+       }
+
+       if (src & SLJIT_IMM) {
+               jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
+               FAIL_IF(!jump);
+               set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_JAL : 0));
+               jump->u.target = srcw;
+
+               if (compiler->delay_slot != UNMOVABLE_INS)
+                       jump->flags |= IS_MOVABLE;
+
+               FAIL_IF(emit_const(compiler, TMP_REG2, 0));
+       }
+       else if (src & SLJIT_MEM)
+               FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw));
+
+       FAIL_IF(push_inst(compiler, JR | S(src_r), UNMOVABLE_INS));
+       if (jump)
+               jump->addr = compiler->size;
+       FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw,
+       sljit_si type)
+{
+       sljit_si sugg_dst_ar, dst_ar;
+       sljit_si flags = GET_ALL_FLAGS(op);
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+#      define mem_type WORD_DATA
+#else
+       sljit_si mem_type = (op & SLJIT_INT_OP) ? (INT_DATA | SIGNED_DATA) : WORD_DATA;
+#endif
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+
+       if (dst == SLJIT_UNUSED)
+               return SLJIT_SUCCESS;
+
+       op = GET_OPCODE(op);
+#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+       if (op == SLJIT_MOV_SI || op == SLJIT_MOV_UI)
+               mem_type = INT_DATA | SIGNED_DATA;
+#endif
+       sugg_dst_ar = DR((op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2);
+
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+       if (op >= SLJIT_ADD && (src & SLJIT_MEM)) {
+               ADJUST_LOCAL_OFFSET(src, srcw);
+               FAIL_IF(emit_op_mem2(compiler, mem_type | LOAD_DATA, DR(TMP_REG1), src, srcw, dst, dstw));
+               src = TMP_REG1;
+               srcw = 0;
+       }
+
+       switch (type & 0xff) {
+       case SLJIT_EQUAL:
+       case SLJIT_NOT_EQUAL:
+               FAIL_IF(push_inst(compiler, SLTIU | SA(EQUAL_FLAG) | TA(sugg_dst_ar) | IMM(1), sugg_dst_ar));
+               dst_ar = sugg_dst_ar;
+               break;
+       case SLJIT_LESS:
+       case SLJIT_GREATER_EQUAL:
+       case SLJIT_D_LESS:
+       case SLJIT_D_GREATER_EQUAL:
+               dst_ar = ULESS_FLAG;
+               break;
+       case SLJIT_GREATER:
+       case SLJIT_LESS_EQUAL:
+       case SLJIT_D_GREATER:
+       case SLJIT_D_LESS_EQUAL:
+               dst_ar = UGREATER_FLAG;
+               break;
+       case SLJIT_SIG_LESS:
+       case SLJIT_SIG_GREATER_EQUAL:
+               dst_ar = LESS_FLAG;
+               break;
+       case SLJIT_SIG_GREATER:
+       case SLJIT_SIG_LESS_EQUAL:
+               dst_ar = GREATER_FLAG;
+               break;
+       case SLJIT_OVERFLOW:
+       case SLJIT_NOT_OVERFLOW:
+               dst_ar = OVERFLOW_FLAG;
+               break;
+       case SLJIT_MUL_OVERFLOW:
+       case SLJIT_MUL_NOT_OVERFLOW:
+               FAIL_IF(push_inst(compiler, SLTIU | SA(OVERFLOW_FLAG) | TA(sugg_dst_ar) | IMM(1), sugg_dst_ar));
+               dst_ar = sugg_dst_ar;
+               type ^= 0x1; /* Flip type bit for the XORI below. */
+               break;
+       case SLJIT_D_EQUAL:
+       case SLJIT_D_NOT_EQUAL:
+               dst_ar = EQUAL_FLAG;
+               break;
+
+       case SLJIT_D_UNORDERED:
+       case SLJIT_D_ORDERED:
+               FAIL_IF(push_inst(compiler, CFC1 | TA(sugg_dst_ar) | DA(FCSR_REG), sugg_dst_ar));
+               FAIL_IF(push_inst(compiler, SRL | TA(sugg_dst_ar) | DA(sugg_dst_ar) | SH_IMM(23), sugg_dst_ar));
+               FAIL_IF(push_inst(compiler, ANDI | SA(sugg_dst_ar) | TA(sugg_dst_ar) | IMM(1), sugg_dst_ar));
+               dst_ar = sugg_dst_ar;
+               break;
+
+       default:
+               SLJIT_ASSERT_STOP();
+               dst_ar = sugg_dst_ar;
+               break;
+       }
+
+       if (type & 0x1) {
+               FAIL_IF(push_inst(compiler, XORI | SA(dst_ar) | TA(sugg_dst_ar) | IMM(1), sugg_dst_ar));
+               dst_ar = sugg_dst_ar;
+       }
+
+       if (op >= SLJIT_ADD) {
+               if (DR(TMP_REG2) != dst_ar)
+                       FAIL_IF(push_inst(compiler, ADDU_W | SA(dst_ar) | TA(0) | D(TMP_REG2), DR(TMP_REG2)));
+               return emit_op(compiler, op | flags, mem_type | CUMULATIVE_OP | LOGICAL_OP | IMM_OP | ALT_KEEP_CACHE, dst, dstw, src, srcw, TMP_REG2, 0);
+       }
+
+       if (dst & SLJIT_MEM)
+               return emit_op_mem(compiler, mem_type, dst_ar, dst, dstw);
+
+       if (sugg_dst_ar != dst_ar)
+               return push_inst(compiler, ADDU_W | SA(dst_ar) | TA(0) | DA(sugg_dst_ar), sugg_dst_ar);
+       return SLJIT_SUCCESS;
+
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+#      undef mem_type
+#endif
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value)
+{
+       struct sljit_const *const_;
+       sljit_si reg;
+
+       CHECK_ERROR_PTR();
+       CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+
+       const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
+       PTR_FAIL_IF(!const_);
+       set_const(const_, compiler);
+
+       reg = SLOW_IS_REG(dst) ? dst : TMP_REG2;
+
+       PTR_FAIL_IF(emit_const(compiler, reg, init_value));
+
+       if (dst & SLJIT_MEM)
+               PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0));
+       return const_;
+}
diff --git a/ext/pcre/pcrelib/sljit/sljitNativePPC_32.c b/ext/pcre/pcrelib/sljit/sljitNativePPC_32.c
new file mode 100644 (file)
index 0000000..b14b75c
--- /dev/null
@@ -0,0 +1,269 @@
+/*
+ *    Stack-less Just-In-Time compiler
+ *
+ *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice, this list of
+ *      conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *      of conditions and the following disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* ppc 32-bit arch dependent functions. */
+
+static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si reg, sljit_sw imm)
+{
+       if (imm <= SIMM_MAX && imm >= SIMM_MIN)
+               return push_inst(compiler, ADDI | D(reg) | A(0) | IMM(imm));
+
+       if (!(imm & ~0xffff))
+               return push_inst(compiler, ORI | S(TMP_ZERO) | A(reg) | IMM(imm));
+
+       FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(imm >> 16)));
+       return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm)) : SLJIT_SUCCESS;
+}
+
+#define INS_CLEAR_LEFT(dst, src, from) \
+       (RLWINM | S(src) | A(dst) | ((from) << 6) | (31 << 1))
+
+static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags,
+       sljit_si dst, sljit_si src1, sljit_si src2)
+{
+       switch (op) {
+       case SLJIT_MOV:
+       case SLJIT_MOV_UI:
+       case SLJIT_MOV_SI:
+       case SLJIT_MOV_P:
+               SLJIT_ASSERT(src1 == TMP_REG1);
+               if (dst != src2)
+                       return push_inst(compiler, OR | S(src2) | A(dst) | B(src2));
+               return SLJIT_SUCCESS;
+
+       case SLJIT_MOV_UB:
+       case SLJIT_MOV_SB:
+               SLJIT_ASSERT(src1 == TMP_REG1);
+               if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
+                       if (op == SLJIT_MOV_SB)
+                               return push_inst(compiler, EXTSB | S(src2) | A(dst));
+                       return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 24));
+               }
+               else if ((flags & REG_DEST) && op == SLJIT_MOV_SB)
+                       return push_inst(compiler, EXTSB | S(src2) | A(dst));
+               else {
+                       SLJIT_ASSERT(dst == src2);
+               }
+               return SLJIT_SUCCESS;
+
+       case SLJIT_MOV_UH:
+       case SLJIT_MOV_SH:
+               SLJIT_ASSERT(src1 == TMP_REG1);
+               if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
+                       if (op == SLJIT_MOV_SH)
+                               return push_inst(compiler, EXTSH | S(src2) | A(dst));
+                       return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 16));
+               }
+               else {
+                       SLJIT_ASSERT(dst == src2);
+               }
+               return SLJIT_SUCCESS;
+
+       case SLJIT_NOT:
+               SLJIT_ASSERT(src1 == TMP_REG1);
+               return push_inst(compiler, NOR | RC(flags) | S(src2) | A(dst) | B(src2));
+
+       case SLJIT_NEG:
+               SLJIT_ASSERT(src1 == TMP_REG1);
+               return push_inst(compiler, NEG | OERC(flags) | D(dst) | A(src2));
+
+       case SLJIT_CLZ:
+               SLJIT_ASSERT(src1 == TMP_REG1);
+               return push_inst(compiler, CNTLZW | RC(flags) | S(src2) | A(dst));
+
+       case SLJIT_ADD:
+               if (flags & ALT_FORM1) {
+                       /* Flags does not set: BIN_IMM_EXTS unnecessary. */
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       return push_inst(compiler, ADDI | D(dst) | A(src1) | compiler->imm);
+               }
+               if (flags & ALT_FORM2) {
+                       /* Flags does not set: BIN_IMM_EXTS unnecessary. */
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       return push_inst(compiler, ADDIS | D(dst) | A(src1) | compiler->imm);
+               }
+               if (flags & ALT_FORM3) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       return push_inst(compiler, ADDIC | D(dst) | A(src1) | compiler->imm);
+               }
+               if (flags & ALT_FORM4) {
+                       /* Flags does not set: BIN_IMM_EXTS unnecessary. */
+                       FAIL_IF(push_inst(compiler, ADDI | D(dst) | A(src1) | (compiler->imm & 0xffff)));
+                       return push_inst(compiler, ADDIS | D(dst) | A(dst) | (((compiler->imm >> 16) & 0xffff) + ((compiler->imm >> 15) & 0x1)));
+               }
+               if (!(flags & ALT_SET_FLAGS))
+                       return push_inst(compiler, ADD | D(dst) | A(src1) | B(src2));
+               return push_inst(compiler, ADDC | OERC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2));
+
+       case SLJIT_ADDC:
+               if (flags & ALT_FORM1) {
+                       FAIL_IF(push_inst(compiler, MFXER | D(0)));
+                       FAIL_IF(push_inst(compiler, ADDE | D(dst) | A(src1) | B(src2)));
+                       return push_inst(compiler, MTXER | S(0));
+               }
+               return push_inst(compiler, ADDE | D(dst) | A(src1) | B(src2));
+
+       case SLJIT_SUB:
+               if (flags & ALT_FORM1) {
+                       /* Flags does not set: BIN_IMM_EXTS unnecessary. */
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       return push_inst(compiler, SUBFIC | D(dst) | A(src1) | compiler->imm);
+               }
+               if (flags & (ALT_FORM2 | ALT_FORM3)) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       if (flags & ALT_FORM2)
+                               FAIL_IF(push_inst(compiler, CMPI | CRD(0) | A(src1) | compiler->imm));
+                       if (flags & ALT_FORM3)
+                               return push_inst(compiler, CMPLI | CRD(4) | A(src1) | compiler->imm);
+                       return SLJIT_SUCCESS;
+               }
+               if (flags & (ALT_FORM4 | ALT_FORM5)) {
+                       if (flags & ALT_FORM4)
+                               FAIL_IF(push_inst(compiler, CMPL | CRD(4) | A(src1) | B(src2)));
+                       if (flags & ALT_FORM5)
+                               FAIL_IF(push_inst(compiler, CMP | CRD(0) | A(src1) | B(src2)));
+                       return SLJIT_SUCCESS;
+               }
+               if (!(flags & ALT_SET_FLAGS))
+                       return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1));
+               if (flags & ALT_FORM6)
+                       FAIL_IF(push_inst(compiler, CMPL | CRD(4) | A(src1) | B(src2)));
+               return push_inst(compiler, SUBFC | OERC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1));
+
+       case SLJIT_SUBC:
+               if (flags & ALT_FORM1) {
+                       FAIL_IF(push_inst(compiler, MFXER | D(0)));
+                       FAIL_IF(push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1)));
+                       return push_inst(compiler, MTXER | S(0));
+               }
+               return push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1));
+
+       case SLJIT_MUL:
+               if (flags & ALT_FORM1) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       return push_inst(compiler, MULLI | D(dst) | A(src1) | compiler->imm);
+               }
+               return push_inst(compiler, MULLW | OERC(flags) | D(dst) | A(src2) | B(src1));
+
+       case SLJIT_AND:
+               if (flags & ALT_FORM1) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       return push_inst(compiler, ANDI | S(src1) | A(dst) | compiler->imm);
+               }
+               if (flags & ALT_FORM2) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       return push_inst(compiler, ANDIS | S(src1) | A(dst) | compiler->imm);
+               }
+               return push_inst(compiler, AND | RC(flags) | S(src1) | A(dst) | B(src2));
+
+       case SLJIT_OR:
+               if (flags & ALT_FORM1) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       return push_inst(compiler, ORI | S(src1) | A(dst) | compiler->imm);
+               }
+               if (flags & ALT_FORM2) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       return push_inst(compiler, ORIS | S(src1) | A(dst) | compiler->imm);
+               }
+               if (flags & ALT_FORM3) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       FAIL_IF(push_inst(compiler, ORI | S(src1) | A(dst) | IMM(compiler->imm)));
+                       return push_inst(compiler, ORIS | S(dst) | A(dst) | IMM(compiler->imm >> 16));
+               }
+               return push_inst(compiler, OR | RC(flags) | S(src1) | A(dst) | B(src2));
+
+       case SLJIT_XOR:
+               if (flags & ALT_FORM1) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       return push_inst(compiler, XORI | S(src1) | A(dst) | compiler->imm);
+               }
+               if (flags & ALT_FORM2) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       return push_inst(compiler, XORIS | S(src1) | A(dst) | compiler->imm);
+               }
+               if (flags & ALT_FORM3) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       FAIL_IF(push_inst(compiler, XORI | S(src1) | A(dst) | IMM(compiler->imm)));
+                       return push_inst(compiler, XORIS | S(dst) | A(dst) | IMM(compiler->imm >> 16));
+               }
+               return push_inst(compiler, XOR | RC(flags) | S(src1) | A(dst) | B(src2));
+
+       case SLJIT_SHL:
+               if (flags & ALT_FORM1) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       compiler->imm &= 0x1f;
+                       return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11) | ((31 - compiler->imm) << 1));
+               }
+               return push_inst(compiler, SLW | RC(flags) | S(src1) | A(dst) | B(src2));
+
+       case SLJIT_LSHR:
+               if (flags & ALT_FORM1) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       compiler->imm &= 0x1f;
+                       return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | (((32 - compiler->imm) & 0x1f) << 11) | (compiler->imm << 6) | (31 << 1));
+               }
+               return push_inst(compiler, SRW | RC(flags) | S(src1) | A(dst) | B(src2));
+
+       case SLJIT_ASHR:
+               if (flags & ALT_FORM3)
+                       FAIL_IF(push_inst(compiler, MFXER | D(0)));
+               if (flags & ALT_FORM1) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       compiler->imm &= 0x1f;
+                       FAIL_IF(push_inst(compiler, SRAWI | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11)));
+               }
+               else
+                       FAIL_IF(push_inst(compiler, SRAW | RC(flags) | S(src1) | A(dst) | B(src2)));
+               return (flags & ALT_FORM3) ? push_inst(compiler, MTXER | S(0)) : SLJIT_SUCCESS;
+       }
+
+       SLJIT_ASSERT_STOP();
+       return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_si emit_const(struct sljit_compiler *compiler, sljit_si reg, sljit_sw init_value)
+{
+       FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(init_value >> 16)));
+       return push_inst(compiler, ORI | S(reg) | A(reg) | IMM(init_value));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
+{
+       sljit_ins *inst = (sljit_ins*)addr;
+
+       inst[0] = (inst[0] & 0xffff0000) | ((new_addr >> 16) & 0xffff);
+       inst[1] = (inst[1] & 0xffff0000) | (new_addr & 0xffff);
+       SLJIT_CACHE_FLUSH(inst, inst + 2);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
+{
+       sljit_ins *inst = (sljit_ins*)addr;
+
+       inst[0] = (inst[0] & 0xffff0000) | ((new_constant >> 16) & 0xffff);
+       inst[1] = (inst[1] & 0xffff0000) | (new_constant & 0xffff);
+       SLJIT_CACHE_FLUSH(inst, inst + 2);
+}
diff --git a/ext/pcre/pcrelib/sljit/sljitNativePPC_64.c b/ext/pcre/pcrelib/sljit/sljitNativePPC_64.c
new file mode 100644 (file)
index 0000000..182ac7b
--- /dev/null
@@ -0,0 +1,421 @@
+/*
+ *    Stack-less Just-In-Time compiler
+ *
+ *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice, this list of
+ *      conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *      of conditions and the following disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* ppc 64-bit arch dependent functions. */
+
+#if defined(__GNUC__) || (defined(__IBM_GCC_ASM) && __IBM_GCC_ASM)
+#define ASM_SLJIT_CLZ(src, dst) \
+       __asm__ volatile ( "cntlzd %0, %1" : "=r"(dst) : "r"(src) )
+#elif defined(__xlc__)
+#error "Please enable GCC syntax for inline assembly statements"
+#else
+#error "Must implement count leading zeroes"
+#endif
+
+#define RLDI(dst, src, sh, mb, type) \
+       (HI(30) | S(src) | A(dst) | ((type) << 2) | (((sh) & 0x1f) << 11) | (((sh) & 0x20) >> 4) | (((mb) & 0x1f) << 6) | ((mb) & 0x20))
+
+#define PUSH_RLDICR(reg, shift) \
+       push_inst(compiler, RLDI(reg, reg, 63 - shift, shift, 1))
+
+static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si reg, sljit_sw imm)
+{
+       sljit_uw tmp;
+       sljit_uw shift;
+       sljit_uw tmp2;
+       sljit_uw shift2;
+
+       if (imm <= SIMM_MAX && imm >= SIMM_MIN)
+               return push_inst(compiler, ADDI | D(reg) | A(0) | IMM(imm));
+
+       if (!(imm & ~0xffff))
+               return push_inst(compiler, ORI | S(TMP_ZERO) | A(reg) | IMM(imm));
+
+       if (imm <= 0x7fffffffl && imm >= -0x80000000l) {
+               FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(imm >> 16)));
+               return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm)) : SLJIT_SUCCESS;
+       }
+
+       /* Count leading zeroes. */
+       tmp = (imm >= 0) ? imm : ~imm;
+       ASM_SLJIT_CLZ(tmp, shift);
+       SLJIT_ASSERT(shift > 0);
+       shift--;
+       tmp = (imm << shift);
+
+       if ((tmp & ~0xffff000000000000ul) == 0) {
+               FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48)));
+               shift += 15;
+               return PUSH_RLDICR(reg, shift);
+       }
+
+       if ((tmp & ~0xffffffff00000000ul) == 0) {
+               FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(tmp >> 48)));
+               FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | IMM(tmp >> 32)));
+               shift += 31;
+               return PUSH_RLDICR(reg, shift);
+       }
+
+       /* Cut out the 16 bit from immediate. */
+       shift += 15;
+       tmp2 = imm & ((1ul << (63 - shift)) - 1);
+
+       if (tmp2 <= 0xffff) {
+               FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48)));
+               FAIL_IF(PUSH_RLDICR(reg, shift));
+               return push_inst(compiler, ORI | S(reg) | A(reg) | tmp2);
+       }
+
+       if (tmp2 <= 0xffffffff) {
+               FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48)));
+               FAIL_IF(PUSH_RLDICR(reg, shift));
+               FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | (tmp2 >> 16)));
+               return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(tmp2)) : SLJIT_SUCCESS;
+       }
+
+       ASM_SLJIT_CLZ(tmp2, shift2);
+       tmp2 <<= shift2;
+
+       if ((tmp2 & ~0xffff000000000000ul) == 0) {
+               FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48)));
+               shift2 += 15;
+               shift += (63 - shift2);
+               FAIL_IF(PUSH_RLDICR(reg, shift));
+               FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | (tmp2 >> 48)));
+               return PUSH_RLDICR(reg, shift2);
+       }
+
+       /* The general version. */
+       FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(imm >> 48)));
+       FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm >> 32)));
+       FAIL_IF(PUSH_RLDICR(reg, 31));
+       FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | IMM(imm >> 16)));
+       return push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm));
+}
+
+/* Simplified mnemonics: clrldi. */
+#define INS_CLEAR_LEFT(dst, src, from) \
+       (RLDICL | S(src) | A(dst) | ((from) << 6) | (1 << 5))
+
+/* Sign extension for integer operations. */
+#define UN_EXTS() \
+       if ((flags & (ALT_SIGN_EXT | REG2_SOURCE)) == (ALT_SIGN_EXT | REG2_SOURCE)) { \
+               FAIL_IF(push_inst(compiler, EXTSW | S(src2) | A(TMP_REG2))); \
+               src2 = TMP_REG2; \
+       }
+
+#define BIN_EXTS() \
+       if (flags & ALT_SIGN_EXT) { \
+               if (flags & REG1_SOURCE) { \
+                       FAIL_IF(push_inst(compiler, EXTSW | S(src1) | A(TMP_REG1))); \
+                       src1 = TMP_REG1; \
+               } \
+               if (flags & REG2_SOURCE) { \
+                       FAIL_IF(push_inst(compiler, EXTSW | S(src2) | A(TMP_REG2))); \
+                       src2 = TMP_REG2; \
+               } \
+       }
+
+#define BIN_IMM_EXTS() \
+       if ((flags & (ALT_SIGN_EXT | REG1_SOURCE)) == (ALT_SIGN_EXT | REG1_SOURCE)) { \
+               FAIL_IF(push_inst(compiler, EXTSW | S(src1) | A(TMP_REG1))); \
+               src1 = TMP_REG1; \
+       }
+
+static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags,
+       sljit_si dst, sljit_si src1, sljit_si src2)
+{
+       switch (op) {
+       case SLJIT_MOV:
+       case SLJIT_MOV_P:
+               SLJIT_ASSERT(src1 == TMP_REG1);
+               if (dst != src2)
+                       return push_inst(compiler, OR | S(src2) | A(dst) | B(src2));
+               return SLJIT_SUCCESS;
+
+       case SLJIT_MOV_UI:
+       case SLJIT_MOV_SI:
+               SLJIT_ASSERT(src1 == TMP_REG1);
+               if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
+                       if (op == SLJIT_MOV_SI)
+                               return push_inst(compiler, EXTSW | S(src2) | A(dst));
+                       return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 0));
+               }
+               else {
+                       SLJIT_ASSERT(dst == src2);
+               }
+               return SLJIT_SUCCESS;
+
+       case SLJIT_MOV_UB:
+       case SLJIT_MOV_SB:
+               SLJIT_ASSERT(src1 == TMP_REG1);
+               if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
+                       if (op == SLJIT_MOV_SB)
+                               return push_inst(compiler, EXTSB | S(src2) | A(dst));
+                       return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 24));
+               }
+               else if ((flags & REG_DEST) && op == SLJIT_MOV_SB)
+                       return push_inst(compiler, EXTSB | S(src2) | A(dst));
+               else {
+                       SLJIT_ASSERT(dst == src2);
+               }
+               return SLJIT_SUCCESS;
+
+       case SLJIT_MOV_UH:
+       case SLJIT_MOV_SH:
+               SLJIT_ASSERT(src1 == TMP_REG1);
+               if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
+                       if (op == SLJIT_MOV_SH)
+                               return push_inst(compiler, EXTSH | S(src2) | A(dst));
+                       return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 16));
+               }
+               else {
+                       SLJIT_ASSERT(dst == src2);
+               }
+               return SLJIT_SUCCESS;
+
+       case SLJIT_NOT:
+               SLJIT_ASSERT(src1 == TMP_REG1);
+               UN_EXTS();
+               return push_inst(compiler, NOR | RC(flags) | S(src2) | A(dst) | B(src2));
+
+       case SLJIT_NEG:
+               SLJIT_ASSERT(src1 == TMP_REG1);
+               UN_EXTS();
+               return push_inst(compiler, NEG | OERC(flags) | D(dst) | A(src2));
+
+       case SLJIT_CLZ:
+               SLJIT_ASSERT(src1 == TMP_REG1);
+               if (flags & ALT_FORM1)
+                       return push_inst(compiler, CNTLZW | RC(flags) | S(src2) | A(dst));
+               return push_inst(compiler, CNTLZD | RC(flags) | S(src2) | A(dst));
+
+       case SLJIT_ADD:
+               if (flags & ALT_FORM1) {
+                       /* Flags does not set: BIN_IMM_EXTS unnecessary. */
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       return push_inst(compiler, ADDI | D(dst) | A(src1) | compiler->imm);
+               }
+               if (flags & ALT_FORM2) {
+                       /* Flags does not set: BIN_IMM_EXTS unnecessary. */
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       return push_inst(compiler, ADDIS | D(dst) | A(src1) | compiler->imm);
+               }
+               if (flags & ALT_FORM3) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       BIN_IMM_EXTS();
+                       return push_inst(compiler, ADDIC | D(dst) | A(src1) | compiler->imm);
+               }
+               if (flags & ALT_FORM4) {
+                       /* Flags does not set: BIN_IMM_EXTS unnecessary. */
+                       FAIL_IF(push_inst(compiler, ADDI | D(dst) | A(src1) | (compiler->imm & 0xffff)));
+                       return push_inst(compiler, ADDIS | D(dst) | A(dst) | (((compiler->imm >> 16) & 0xffff) + ((compiler->imm >> 15) & 0x1)));
+               }
+               if (!(flags & ALT_SET_FLAGS))
+                       return push_inst(compiler, ADD | D(dst) | A(src1) | B(src2));
+               BIN_EXTS();
+               return push_inst(compiler, ADDC | OERC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2));
+
+       case SLJIT_ADDC:
+               if (flags & ALT_FORM1) {
+                       FAIL_IF(push_inst(compiler, MFXER | D(0)));
+                       FAIL_IF(push_inst(compiler, ADDE | D(dst) | A(src1) | B(src2)));
+                       return push_inst(compiler, MTXER | S(0));
+               }
+               BIN_EXTS();
+               return push_inst(compiler, ADDE | D(dst) | A(src1) | B(src2));
+
+       case SLJIT_SUB:
+               if (flags & ALT_FORM1) {
+                       /* Flags does not set: BIN_IMM_EXTS unnecessary. */
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       return push_inst(compiler, SUBFIC | D(dst) | A(src1) | compiler->imm);
+               }
+               if (flags & (ALT_FORM2 | ALT_FORM3)) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       if (flags & ALT_FORM2)
+                               FAIL_IF(push_inst(compiler, CMPI | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | compiler->imm));
+                       if (flags & ALT_FORM3)
+                               return push_inst(compiler, CMPLI | CRD(4 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | compiler->imm);
+                       return SLJIT_SUCCESS;
+               }
+               if (flags & (ALT_FORM4 | ALT_FORM5)) {
+                       if (flags & ALT_FORM4)
+                               FAIL_IF(push_inst(compiler, CMPL | CRD(4 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2)));
+                       if (flags & ALT_FORM5)
+                               return push_inst(compiler, CMP | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2));
+                       return SLJIT_SUCCESS;
+               }
+               if (!(flags & ALT_SET_FLAGS))
+                       return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1));
+               BIN_EXTS();
+               if (flags & ALT_FORM6)
+                       FAIL_IF(push_inst(compiler, CMPL | CRD(4 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2)));
+               return push_inst(compiler, SUBFC | OERC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1));
+
+       case SLJIT_SUBC:
+               if (flags & ALT_FORM1) {
+                       FAIL_IF(push_inst(compiler, MFXER | D(0)));
+                       FAIL_IF(push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1)));
+                       return push_inst(compiler, MTXER | S(0));
+               }
+               BIN_EXTS();
+               return push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1));
+
+       case SLJIT_MUL:
+               if (flags & ALT_FORM1) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       return push_inst(compiler, MULLI | D(dst) | A(src1) | compiler->imm);
+               }
+               BIN_EXTS();
+               if (flags & ALT_FORM2)
+                       return push_inst(compiler, MULLW | OERC(flags) | D(dst) | A(src2) | B(src1));
+               return push_inst(compiler, MULLD | OERC(flags) | D(dst) | A(src2) | B(src1));
+
+       case SLJIT_AND:
+               if (flags & ALT_FORM1) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       return push_inst(compiler, ANDI | S(src1) | A(dst) | compiler->imm);
+               }
+               if (flags & ALT_FORM2) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       return push_inst(compiler, ANDIS | S(src1) | A(dst) | compiler->imm);
+               }
+               return push_inst(compiler, AND | RC(flags) | S(src1) | A(dst) | B(src2));
+
+       case SLJIT_OR:
+               if (flags & ALT_FORM1) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       return push_inst(compiler, ORI | S(src1) | A(dst) | compiler->imm);
+               }
+               if (flags & ALT_FORM2) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       return push_inst(compiler, ORIS | S(src1) | A(dst) | compiler->imm);
+               }
+               if (flags & ALT_FORM3) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       FAIL_IF(push_inst(compiler, ORI | S(src1) | A(dst) | IMM(compiler->imm)));
+                       return push_inst(compiler, ORIS | S(dst) | A(dst) | IMM(compiler->imm >> 16));
+               }
+               return push_inst(compiler, OR | RC(flags) | S(src1) | A(dst) | B(src2));
+
+       case SLJIT_XOR:
+               if (flags & ALT_FORM1) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       return push_inst(compiler, XORI | S(src1) | A(dst) | compiler->imm);
+               }
+               if (flags & ALT_FORM2) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       return push_inst(compiler, XORIS | S(src1) | A(dst) | compiler->imm);
+               }
+               if (flags & ALT_FORM3) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       FAIL_IF(push_inst(compiler, XORI | S(src1) | A(dst) | IMM(compiler->imm)));
+                       return push_inst(compiler, XORIS | S(dst) | A(dst) | IMM(compiler->imm >> 16));
+               }
+               return push_inst(compiler, XOR | RC(flags) | S(src1) | A(dst) | B(src2));
+
+       case SLJIT_SHL:
+               if (flags & ALT_FORM1) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       if (flags & ALT_FORM2) {
+                               compiler->imm &= 0x1f;
+                               return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11) | ((31 - compiler->imm) << 1));
+                       }
+                       else {
+                               compiler->imm &= 0x3f;
+                               return push_inst(compiler, RLDI(dst, src1, compiler->imm, 63 - compiler->imm, 1) | RC(flags));
+                       }
+               }
+               return push_inst(compiler, ((flags & ALT_FORM2) ? SLW : SLD) | RC(flags) | S(src1) | A(dst) | B(src2));
+
+       case SLJIT_LSHR:
+               if (flags & ALT_FORM1) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       if (flags & ALT_FORM2) {
+                               compiler->imm &= 0x1f;
+                               return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | (((32 - compiler->imm) & 0x1f) << 11) | (compiler->imm << 6) | (31 << 1));
+                       }
+                       else {
+                               compiler->imm &= 0x3f;
+                               return push_inst(compiler, RLDI(dst, src1, 64 - compiler->imm, compiler->imm, 0) | RC(flags));
+                       }
+               }
+               return push_inst(compiler, ((flags & ALT_FORM2) ? SRW : SRD) | RC(flags) | S(src1) | A(dst) | B(src2));
+
+       case SLJIT_ASHR:
+               if (flags & ALT_FORM3)
+                       FAIL_IF(push_inst(compiler, MFXER | D(0)));
+               if (flags & ALT_FORM1) {
+                       SLJIT_ASSERT(src2 == TMP_REG2);
+                       if (flags & ALT_FORM2) {
+                               compiler->imm &= 0x1f;
+                               FAIL_IF(push_inst(compiler, SRAWI | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11)));
+                       }
+                       else {
+                               compiler->imm &= 0x3f;
+                               FAIL_IF(push_inst(compiler, SRADI | RC(flags) | S(src1) | A(dst) | ((compiler->imm & 0x1f) << 11) | ((compiler->imm & 0x20) >> 4)));
+                       }
+               }
+               else
+                       FAIL_IF(push_inst(compiler, ((flags & ALT_FORM2) ? SRAW : SRAD) | RC(flags) | S(src1) | A(dst) | B(src2)));
+               return (flags & ALT_FORM3) ? push_inst(compiler, MTXER | S(0)) : SLJIT_SUCCESS;
+       }
+
+       SLJIT_ASSERT_STOP();
+       return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_si emit_const(struct sljit_compiler *compiler, sljit_si reg, sljit_sw init_value)
+{
+       FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(init_value >> 48)));
+       FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | IMM(init_value >> 32)));
+       FAIL_IF(PUSH_RLDICR(reg, 31));
+       FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | IMM(init_value >> 16)));
+       return push_inst(compiler, ORI | S(reg) | A(reg) | IMM(init_value));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
+{
+       sljit_ins *inst = (sljit_ins*)addr;
+
+       inst[0] = (inst[0] & 0xffff0000) | ((new_addr >> 48) & 0xffff);
+       inst[1] = (inst[1] & 0xffff0000) | ((new_addr >> 32) & 0xffff);
+       inst[3] = (inst[3] & 0xffff0000) | ((new_addr >> 16) & 0xffff);
+       inst[4] = (inst[4] & 0xffff0000) | (new_addr & 0xffff);
+       SLJIT_CACHE_FLUSH(inst, inst + 5);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
+{
+       sljit_ins *inst = (sljit_ins*)addr;
+
+       inst[0] = (inst[0] & 0xffff0000) | ((new_constant >> 48) & 0xffff);
+       inst[1] = (inst[1] & 0xffff0000) | ((new_constant >> 32) & 0xffff);
+       inst[3] = (inst[3] & 0xffff0000) | ((new_constant >> 16) & 0xffff);
+       inst[4] = (inst[4] & 0xffff0000) | (new_constant & 0xffff);
+       SLJIT_CACHE_FLUSH(inst, inst + 5);
+}
diff --git a/ext/pcre/pcrelib/sljit/sljitNativePPC_common.c b/ext/pcre/pcrelib/sljit/sljitNativePPC_common.c
new file mode 100644 (file)
index 0000000..08d5356
--- /dev/null
@@ -0,0 +1,2374 @@
+/*
+ *    Stack-less Just-In-Time compiler
+ *
+ *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice, this list of
+ *      conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *      of conditions and the following disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
+{
+       return "PowerPC" SLJIT_CPUINFO;
+}
+
+/* Length of an instruction word.
+   Both for ppc-32 and ppc-64. */
+typedef sljit_ui sljit_ins;
+
+#if ((defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) && (defined _AIX)) \
+       || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+#define SLJIT_PPC_STACK_FRAME_V2 1
+#endif
+
+#ifdef _AIX
+#include <sys/cache.h>
+#endif
+
+#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
+#define SLJIT_PASS_ENTRY_ADDR_TO_CALL 1
+#endif
+
+static void ppc_cache_flush(sljit_ins *from, sljit_ins *to)
+{
+#ifdef _AIX
+       _sync_cache_range((caddr_t)from, (int)((size_t)to - (size_t)from));
+#elif defined(__GNUC__) || (defined(__IBM_GCC_ASM) && __IBM_GCC_ASM)
+#      if defined(_ARCH_PWR) || defined(_ARCH_PWR2)
+       /* Cache flush for POWER architecture. */
+       while (from < to) {
+               __asm__ volatile (
+                       "clf 0, %0\n"
+                       "dcs\n"
+                       : : "r"(from)
+               );
+               from++;
+       }
+       __asm__ volatile ( "ics" );
+#      elif defined(_ARCH_COM) && !defined(_ARCH_PPC)
+#      error "Cache flush is not implemented for PowerPC/POWER common mode."
+#      else
+       /* Cache flush for PowerPC architecture. */
+       while (from < to) {
+               __asm__ volatile (
+                       "dcbf 0, %0\n"
+                       "sync\n"
+                       "icbi 0, %0\n"
+                       : : "r"(from)
+               );
+               from++;
+       }
+       __asm__ volatile ( "isync" );
+#      endif
+#      ifdef __xlc__
+#      warning "This file may fail to compile if -qfuncsect is used"
+#      endif
+#elif defined(__xlc__)
+#error "Please enable GCC syntax for inline assembly statements with -qasm=gcc"
+#else
+#error "This platform requires a cache flush implementation."
+#endif /* _AIX */
+}
+
+#define TMP_REG1       (SLJIT_NUMBER_OF_REGISTERS + 2)
+#define TMP_REG2       (SLJIT_NUMBER_OF_REGISTERS + 3)
+#define TMP_REG3       (SLJIT_NUMBER_OF_REGISTERS + 4)
+#define TMP_ZERO       (SLJIT_NUMBER_OF_REGISTERS + 5)
+
+#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
+#define TMP_CALL_REG   (SLJIT_NUMBER_OF_REGISTERS + 6)
+#else
+#define TMP_CALL_REG   TMP_REG2
+#endif
+
+#define TMP_FREG1      (0)
+#define TMP_FREG2      (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
+
+static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = {
+       0, 3, 4, 5, 6, 7, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 1, 8, 9, 10, 31, 12
+};
+
+/* --------------------------------------------------------------------- */
+/*  Instrucion forms                                                     */
+/* --------------------------------------------------------------------- */
+#define D(d)           (reg_map[d] << 21)
+#define S(s)           (reg_map[s] << 21)
+#define A(a)           (reg_map[a] << 16)
+#define B(b)           (reg_map[b] << 11)
+#define C(c)           (reg_map[c] << 6)
+#define FD(fd)         ((fd) << 21)
+#define FS(fs)         ((fs) << 21)
+#define FA(fa)         ((fa) << 16)
+#define FB(fb)         ((fb) << 11)
+#define FC(fc)         ((fc) << 6)
+#define IMM(imm)       ((imm) & 0xffff)
+#define CRD(d)         ((d) << 21)
+
+/* Instruction bit sections.
+   OE and Rc flag (see ALT_SET_FLAGS). */
+#define OERC(flags)    (((flags & ALT_SET_FLAGS) >> 10) | (flags & ALT_SET_FLAGS))
+/* Rc flag (see ALT_SET_FLAGS). */
+#define RC(flags)      ((flags & ALT_SET_FLAGS) >> 10)
+#define HI(opcode)     ((opcode) << 26)
+#define LO(opcode)     ((opcode) << 1)
+
+#define ADD            (HI(31) | LO(266))
+#define ADDC           (HI(31) | LO(10))
+#define ADDE           (HI(31) | LO(138))
+#define ADDI           (HI(14))
+#define ADDIC          (HI(13))
+#define ADDIS          (HI(15))
+#define ADDME          (HI(31) | LO(234))
+#define AND            (HI(31) | LO(28))
+#define ANDI           (HI(28))
+#define ANDIS          (HI(29))
+#define Bx             (HI(18))
+#define BCx            (HI(16))
+#define BCCTR          (HI(19) | LO(528) | (3 << 11))
+#define BLR            (HI(19) | LO(16) | (0x14 << 21))
+#define CNTLZD         (HI(31) | LO(58))
+#define CNTLZW         (HI(31) | LO(26))
+#define CMP            (HI(31) | LO(0))
+#define CMPI           (HI(11))
+#define CMPL           (HI(31) | LO(32))
+#define CMPLI          (HI(10))
+#define CROR           (HI(19) | LO(449))
+#define DIVD           (HI(31) | LO(489))
+#define DIVDU          (HI(31) | LO(457))
+#define DIVW           (HI(31) | LO(491))
+#define DIVWU          (HI(31) | LO(459))
+#define EXTSB          (HI(31) | LO(954))
+#define EXTSH          (HI(31) | LO(922))
+#define EXTSW          (HI(31) | LO(986))
+#define FABS           (HI(63) | LO(264))
+#define FADD           (HI(63) | LO(21))
+#define FADDS          (HI(59) | LO(21))
+#define FCFID          (HI(63) | LO(846))
+#define FCMPU          (HI(63) | LO(0))
+#define FCTIDZ         (HI(63) | LO(815))
+#define FCTIWZ         (HI(63) | LO(15))
+#define FDIV           (HI(63) | LO(18))
+#define FDIVS          (HI(59) | LO(18))
+#define FMR            (HI(63) | LO(72))
+#define FMUL           (HI(63) | LO(25))
+#define FMULS          (HI(59) | LO(25))
+#define FNEG           (HI(63) | LO(40))
+#define FRSP           (HI(63) | LO(12))
+#define FSUB           (HI(63) | LO(20))
+#define FSUBS          (HI(59) | LO(20))
+#define LD             (HI(58) | 0)
+#define LWZ            (HI(32))
+#define MFCR           (HI(31) | LO(19))
+#define MFLR           (HI(31) | LO(339) | 0x80000)
+#define MFXER          (HI(31) | LO(339) | 0x10000)
+#define MTCTR          (HI(31) | LO(467) | 0x90000)
+#define MTLR           (HI(31) | LO(467) | 0x80000)
+#define MTXER          (HI(31) | LO(467) | 0x10000)
+#define MULHD          (HI(31) | LO(73))
+#define MULHDU         (HI(31) | LO(9))
+#define MULHW          (HI(31) | LO(75))
+#define MULHWU         (HI(31) | LO(11))
+#define MULLD          (HI(31) | LO(233))
+#define MULLI          (HI(7))
+#define MULLW          (HI(31) | LO(235))
+#define NEG            (HI(31) | LO(104))
+#define NOP            (HI(24))
+#define NOR            (HI(31) | LO(124))
+#define OR             (HI(31) | LO(444))
+#define ORI            (HI(24))
+#define ORIS           (HI(25))
+#define RLDICL         (HI(30))
+#define RLWINM         (HI(21))
+#define SLD            (HI(31) | LO(27))
+#define SLW            (HI(31) | LO(24))
+#define SRAD           (HI(31) | LO(794))
+#define SRADI          (HI(31) | LO(413 << 1))
+#define SRAW           (HI(31) | LO(792))
+#define SRAWI          (HI(31) | LO(824))
+#define SRD            (HI(31) | LO(539))
+#define SRW            (HI(31) | LO(536))
+#define STD            (HI(62) | 0)
+#define STDU           (HI(62) | 1)
+#define STDUX          (HI(31) | LO(181))
+#define STFIWX         (HI(31) | LO(983))
+#define STW            (HI(36))
+#define STWU           (HI(37))
+#define STWUX          (HI(31) | LO(183))
+#define SUBF           (HI(31) | LO(40))
+#define SUBFC          (HI(31) | LO(8))
+#define SUBFE          (HI(31) | LO(136))
+#define SUBFIC         (HI(8))
+#define XOR            (HI(31) | LO(316))
+#define XORI           (HI(26))
+#define XORIS          (HI(27))
+
+#define SIMM_MAX       (0x7fff)
+#define SIMM_MIN       (-0x8000)
+#define UIMM_MAX       (0xffff)
+
+#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_sw addr, void* func)
+{
+       sljit_sw* ptrs;
+       if (func_ptr)
+               *func_ptr = (void*)context;
+       ptrs = (sljit_sw*)func;
+       context->addr = addr ? addr : ptrs[0];
+       context->r2 = ptrs[1];
+       context->r11 = ptrs[2];
+}
+#endif
+
+static sljit_si push_inst(struct sljit_compiler *compiler, sljit_ins ins)
+{
+       sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
+       FAIL_IF(!ptr);
+       *ptr = ins;
+       compiler->size++;
+       return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_si detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code)
+{
+       sljit_sw diff;
+       sljit_uw target_addr;
+       sljit_sw extra_jump_flags;
+
+#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
+       if (jump->flags & (SLJIT_REWRITABLE_JUMP | IS_CALL))
+               return 0;
+#else
+       if (jump->flags & SLJIT_REWRITABLE_JUMP)
+               return 0;
+#endif
+
+       if (jump->flags & JUMP_ADDR)
+               target_addr = jump->u.target;
+       else {
+               SLJIT_ASSERT(jump->flags & JUMP_LABEL);
+               target_addr = (sljit_uw)(code + jump->u.label->size);
+       }
+
+#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+       if (jump->flags & IS_CALL)
+               goto keep_address;
+#endif
+
+       diff = ((sljit_sw)target_addr - (sljit_sw)(code_ptr)) & ~0x3l;
+
+       extra_jump_flags = 0;
+       if (jump->flags & IS_COND) {
+               if (diff <= 0x7fff && diff >= -0x8000) {
+                       jump->flags |= PATCH_B;
+                       return 1;
+               }
+               if (target_addr <= 0xffff) {
+                       jump->flags |= PATCH_B | PATCH_ABS_B;
+                       return 1;
+               }
+               extra_jump_flags = REMOVE_COND;
+
+               diff -= sizeof(sljit_ins);
+       }
+
+       if (diff <= 0x01ffffff && diff >= -0x02000000) {
+               jump->flags |= PATCH_B | extra_jump_flags;
+               return 1;
+       }
+       if (target_addr <= 0x03ffffff) {
+               jump->flags |= PATCH_B | PATCH_ABS_B | extra_jump_flags;
+               return 1;
+       }
+
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
+keep_address:
+#endif
+       if (target_addr <= 0x7fffffff) {
+               jump->flags |= PATCH_ABS32;
+               return 1;
+       }
+       if (target_addr <= 0x7fffffffffffl) {
+               jump->flags |= PATCH_ABS48;
+               return 1;
+       }
+#endif
+
+       return 0;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
+{
+       struct sljit_memory_fragment *buf;
+       sljit_ins *code;
+       sljit_ins *code_ptr;
+       sljit_ins *buf_ptr;
+       sljit_ins *buf_end;
+       sljit_uw word_count;
+       sljit_uw addr;
+
+       struct sljit_label *label;
+       struct sljit_jump *jump;
+       struct sljit_const *const_;
+
+       CHECK_ERROR_PTR();
+       CHECK_PTR(check_sljit_generate_code(compiler));
+       reverse_buf(compiler);
+
+#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+       compiler->size += (compiler->size & 0x1) + (sizeof(struct sljit_function_context) / sizeof(sljit_ins));
+#else
+       compiler->size += (sizeof(struct sljit_function_context) / sizeof(sljit_ins));
+#endif
+#endif
+       code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins));
+       PTR_FAIL_WITH_EXEC_IF(code);
+       buf = compiler->buf;
+
+       code_ptr = code;
+       word_count = 0;
+       label = compiler->labels;
+       jump = compiler->jumps;
+       const_ = compiler->consts;
+       do {
+               buf_ptr = (sljit_ins*)buf->memory;
+               buf_end = buf_ptr + (buf->used_size >> 2);
+               do {
+                       *code_ptr = *buf_ptr++;
+                       SLJIT_ASSERT(!label || label->size >= word_count);
+                       SLJIT_ASSERT(!jump || jump->addr >= word_count);
+                       SLJIT_ASSERT(!const_ || const_->addr >= word_count);
+                       /* These structures are ordered by their address. */
+                       if (label && label->size == word_count) {
+                               /* Just recording the address. */
+                               label->addr = (sljit_uw)code_ptr;
+                               label->size = code_ptr - code;
+                               label = label->next;
+                       }
+                       if (jump && jump->addr == word_count) {
+#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
+                               jump->addr = (sljit_uw)(code_ptr - 3);
+#else
+                               jump->addr = (sljit_uw)(code_ptr - 6);
+#endif
+                               if (detect_jump_type(jump, code_ptr, code)) {
+#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
+                                       code_ptr[-3] = code_ptr[0];
+                                       code_ptr -= 3;
+#else
+                                       if (jump->flags & PATCH_ABS32) {
+                                               code_ptr -= 3;
+                                               code_ptr[-1] = code_ptr[2];
+                                               code_ptr[0] = code_ptr[3];
+                                       }
+                                       else if (jump->flags & PATCH_ABS48) {
+                                               code_ptr--;
+                                               code_ptr[-1] = code_ptr[0];
+                                               code_ptr[0] = code_ptr[1];
+                                               /* rldicr rX,rX,32,31 -> rX,rX,16,47 */
+                                               SLJIT_ASSERT((code_ptr[-3] & 0xfc00ffff) == 0x780007c6);
+                                               code_ptr[-3] ^= 0x8422;
+                                               /* oris -> ori */
+                                               code_ptr[-2] ^= 0x4000000;
+                                       }
+                                       else {
+                                               code_ptr[-6] = code_ptr[0];
+                                               code_ptr -= 6;
+                                       }
+#endif
+                                       if (jump->flags & REMOVE_COND) {
+                                               code_ptr[0] = BCx | (2 << 2) | ((code_ptr[0] ^ (8 << 21)) & 0x03ff0001);
+                                               code_ptr++;
+                                               jump->addr += sizeof(sljit_ins);
+                                               code_ptr[0] = Bx;
+                                               jump->flags -= IS_COND;
+                                       }
+                               }
+                               jump = jump->next;
+                       }
+                       if (const_ && const_->addr == word_count) {
+                               const_->addr = (sljit_uw)code_ptr;
+                               const_ = const_->next;
+                       }
+                       code_ptr ++;
+                       word_count ++;
+               } while (buf_ptr < buf_end);
+
+               buf = buf->next;
+       } while (buf);
+
+       if (label && label->size == word_count) {
+               label->addr = (sljit_uw)code_ptr;
+               label->size = code_ptr - code;
+               label = label->next;
+       }
+
+       SLJIT_ASSERT(!label);
+       SLJIT_ASSERT(!jump);
+       SLJIT_ASSERT(!const_);
+#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
+       SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size - (sizeof(struct sljit_function_context) / sizeof(sljit_ins)));
+#else
+       SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
+#endif
+
+       jump = compiler->jumps;
+       while (jump) {
+               do {
+                       addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
+                       buf_ptr = (sljit_ins*)jump->addr;
+                       if (jump->flags & PATCH_B) {
+                               if (jump->flags & IS_COND) {
+                                       if (!(jump->flags & PATCH_ABS_B)) {
+                                               addr = addr - jump->addr;
+                                               SLJIT_ASSERT((sljit_sw)addr <= 0x7fff && (sljit_sw)addr >= -0x8000);
+                                               *buf_ptr = BCx | (addr & 0xfffc) | ((*buf_ptr) & 0x03ff0001);
+                                       }
+                                       else {
+                                               SLJIT_ASSERT(addr <= 0xffff);
+                                               *buf_ptr = BCx | (addr & 0xfffc) | 0x2 | ((*buf_ptr) & 0x03ff0001);
+                                       }
+                               }
+                               else {
+                                       if (!(jump->flags & PATCH_ABS_B)) {
+                                               addr = addr - jump->addr;
+                                               SLJIT_ASSERT((sljit_sw)addr <= 0x01ffffff && (sljit_sw)addr >= -0x02000000);
+                                               *buf_ptr = Bx | (addr & 0x03fffffc) | ((*buf_ptr) & 0x1);
+                                       }
+                                       else {
+                                               SLJIT_ASSERT(addr <= 0x03ffffff);
+                                               *buf_ptr = Bx | (addr & 0x03fffffc) | 0x2 | ((*buf_ptr) & 0x1);
+                                       }
+                               }
+                               break;
+                       }
+                       /* Set the fields of immediate loads. */
+#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
+                       buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff);
+                       buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff);
+#else
+                       if (jump->flags & PATCH_ABS32) {
+                               SLJIT_ASSERT(addr <= 0x7fffffff);
+                               buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff);
+                               buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff);
+                               break;
+                       }
+                       if (jump->flags & PATCH_ABS48) {
+                               SLJIT_ASSERT(addr <= 0x7fffffffffff);
+                               buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 32) & 0xffff);
+                               buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 16) & 0xffff);
+                               buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | (addr & 0xffff);
+                               break;
+                       }
+                       buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 48) & 0xffff);
+                       buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 32) & 0xffff);
+                       buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | ((addr >> 16) & 0xffff);
+                       buf_ptr[4] = (buf_ptr[4] & 0xffff0000) | (addr & 0xffff);
+#endif
+               } while (0);
+               jump = jump->next;
+       }
+
+       compiler->error = SLJIT_ERR_COMPILED;
+       compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
+       SLJIT_CACHE_FLUSH(code, code_ptr);
+
+#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+       if (((sljit_sw)code_ptr) & 0x4)
+               code_ptr++;
+       sljit_set_function_context(NULL, (struct sljit_function_context*)code_ptr, (sljit_sw)code, (void*)sljit_generate_code);
+       return code_ptr;
+#else
+       sljit_set_function_context(NULL, (struct sljit_function_context*)code_ptr, (sljit_sw)code, (void*)sljit_generate_code);
+       return code_ptr;
+#endif
+#else
+       return code;
+#endif
+}
+
+/* --------------------------------------------------------------------- */
+/*  Entry, exit                                                          */
+/* --------------------------------------------------------------------- */
+
+/* inp_flags: */
+
+/* Creates an index in data_transfer_insts array. */
+#define LOAD_DATA      0x01
+#define INDEXED                0x02
+#define WRITE_BACK     0x04
+#define WORD_DATA      0x00
+#define BYTE_DATA      0x08
+#define HALF_DATA      0x10
+#define INT_DATA       0x18
+#define SIGNED_DATA    0x20
+/* Separates integer and floating point registers */
+#define GPR_REG                0x3f
+#define DOUBLE_DATA    0x40
+
+#define MEM_MASK       0x7f
+
+/* Other inp_flags. */
+
+#define ARG_TEST       0x000100
+/* Integer opertion and set flags -> requires exts on 64 bit systems. */
+#define ALT_SIGN_EXT   0x000200
+/* This flag affects the RC() and OERC() macros. */
+#define ALT_SET_FLAGS  0x000400
+#define ALT_KEEP_CACHE 0x000800
+#define ALT_FORM1      0x010000
+#define ALT_FORM2      0x020000
+#define ALT_FORM3      0x040000
+#define ALT_FORM4      0x080000
+#define ALT_FORM5      0x100000
+#define ALT_FORM6      0x200000
+
+/* Source and destination is register. */
+#define REG_DEST       0x000001
+#define REG1_SOURCE    0x000002
+#define REG2_SOURCE    0x000004
+/* getput_arg_fast returned true. */
+#define FAST_DEST      0x000008
+/* Multiple instructions are required. */
+#define SLOW_DEST      0x000010
+/*
+ALT_SIGN_EXT           0x000200
+ALT_SET_FLAGS          0x000400
+ALT_FORM1              0x010000
+...
+ALT_FORM6              0x200000 */
+
+#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
+#include "sljitNativePPC_32.c"
+#else
+#include "sljitNativePPC_64.c"
+#endif
+
+#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
+#define STACK_STORE    STW
+#define STACK_LOAD     LWZ
+#else
+#define STACK_STORE    STD
+#define STACK_LOAD     LD
+#endif
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler,
+       sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds,
+       sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
+{
+       sljit_si i, tmp, offs;
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
+       set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
+
+       FAIL_IF(push_inst(compiler, MFLR | D(0)));
+       offs = -(sljit_si)(sizeof(sljit_sw));
+       FAIL_IF(push_inst(compiler, STACK_STORE | S(TMP_ZERO) | A(SLJIT_SP) | IMM(offs)));
+
+       tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
+       for (i = SLJIT_S0; i >= tmp; i--) {
+               offs -= (sljit_si)(sizeof(sljit_sw));
+               FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(SLJIT_SP) | IMM(offs)));
+       }
+
+       for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
+               offs -= (sljit_si)(sizeof(sljit_sw));
+               FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(SLJIT_SP) | IMM(offs)));
+       }
+
+       SLJIT_ASSERT(offs == -(sljit_si)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1));
+
+#if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2)
+       FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_SP) | IMM(2 * sizeof(sljit_sw))));
+#else
+       FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_SP) | IMM(sizeof(sljit_sw))));
+#endif
+
+       FAIL_IF(push_inst(compiler, ADDI | D(TMP_ZERO) | A(0) | 0));
+       if (args >= 1)
+               FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(SLJIT_S0) | B(SLJIT_R0)));
+       if (args >= 2)
+               FAIL_IF(push_inst(compiler, OR | S(SLJIT_R1) | A(SLJIT_S1) | B(SLJIT_R1)));
+       if (args >= 3)
+               FAIL_IF(push_inst(compiler, OR | S(SLJIT_R2) | A(SLJIT_S2) | B(SLJIT_R2)));
+
+       local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET;
+       local_size = (local_size + 15) & ~0xf;
+       compiler->local_size = local_size;
+
+#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
+       if (local_size <= SIMM_MAX)
+               FAIL_IF(push_inst(compiler, STWU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size)));
+       else {
+               FAIL_IF(load_immediate(compiler, 0, -local_size));
+               FAIL_IF(push_inst(compiler, STWUX | S(SLJIT_SP) | A(SLJIT_SP) | B(0)));
+       }
+#else
+       if (local_size <= SIMM_MAX)
+               FAIL_IF(push_inst(compiler, STDU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size)));
+       else {
+               FAIL_IF(load_immediate(compiler, 0, -local_size));
+               FAIL_IF(push_inst(compiler, STDUX | S(SLJIT_SP) | A(SLJIT_SP) | B(0)));
+       }
+#endif
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compiler,
+       sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds,
+       sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
+{
+       CHECK_ERROR();
+       CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
+       set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
+
+       local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET;
+       compiler->local_size = (local_size + 15) & ~0xf;
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
+{
+       sljit_si i, tmp, offs;
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_return(compiler, op, src, srcw));
+
+       FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
+
+       if (compiler->local_size <= SIMM_MAX)
+               FAIL_IF(push_inst(compiler, ADDI | D(SLJIT_SP) | A(SLJIT_SP) | IMM(compiler->local_size)));
+       else {
+               FAIL_IF(load_immediate(compiler, 0, compiler->local_size));
+               FAIL_IF(push_inst(compiler, ADD | D(SLJIT_SP) | A(SLJIT_SP) | B(0)));
+       }
+
+#if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2)
+       FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_SP) | IMM(2 * sizeof(sljit_sw))));
+#else
+       FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_SP) | IMM(sizeof(sljit_sw))));
+#endif
+
+       offs = -(sljit_si)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1);
+
+       tmp = compiler->scratches;
+       for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) {
+               FAIL_IF(push_inst(compiler, STACK_LOAD | D(i) | A(SLJIT_SP) | IMM(offs)));
+               offs += (sljit_si)(sizeof(sljit_sw));
+       }
+
+       tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
+       for (i = tmp; i <= SLJIT_S0; i++) {
+               FAIL_IF(push_inst(compiler, STACK_LOAD | D(i) | A(SLJIT_SP) | IMM(offs)));
+               offs += (sljit_si)(sizeof(sljit_sw));
+       }
+
+       FAIL_IF(push_inst(compiler, STACK_LOAD | D(TMP_ZERO) | A(SLJIT_SP) | IMM(offs)));
+       SLJIT_ASSERT(offs == -(sljit_sw)(sizeof(sljit_sw)));
+
+       FAIL_IF(push_inst(compiler, MTLR | S(0)));
+       FAIL_IF(push_inst(compiler, BLR));
+
+       return SLJIT_SUCCESS;
+}
+
+#undef STACK_STORE
+#undef STACK_LOAD
+
+/* --------------------------------------------------------------------- */
+/*  Operators                                                            */
+/* --------------------------------------------------------------------- */
+
+/* i/x - immediate/indexed form
+   n/w - no write-back / write-back (1 bit)
+   s/l - store/load (1 bit)
+   u/s - signed/unsigned (1 bit)
+   w/b/h/i - word/byte/half/int allowed (2 bit)
+   It contans 32 items, but not all are different. */
+
+/* 64 bit only: [reg+imm] must be aligned to 4 bytes. */
+#define INT_ALIGNED    0x10000
+/* 64-bit only: there is no lwau instruction. */
+#define UPDATE_REQ     0x20000
+
+#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
+#define ARCH_32_64(a, b)       a
+#define INST_CODE_AND_DST(inst, flags, reg) \
+       ((inst) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg)))
+#else
+#define ARCH_32_64(a, b)       b
+#define INST_CODE_AND_DST(inst, flags, reg) \
+       (((inst) & ~(INT_ALIGNED | UPDATE_REQ)) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg)))
+#endif
+
+static SLJIT_CONST sljit_ins data_transfer_insts[64 + 8] = {
+
+/* -------- Unsigned -------- */
+
+/* Word. */
+
+/* u w n i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | INT_ALIGNED | 0x0 /* std */),
+/* u w n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x0 /* ld */),
+/* u w n x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */),
+/* u w n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */),
+
+/* u w w i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | INT_ALIGNED | 0x1 /* stdu */),
+/* u w w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | 0x1 /* ldu */),
+/* u w w x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */),
+/* u w w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */),
+
+/* Byte. */
+
+/* u b n i s */ HI(38) /* stb */, 
+/* u b n i l */ HI(34) /* lbz */,
+/* u b n x s */ HI(31) | LO(215) /* stbx */,
+/* u b n x l */ HI(31) | LO(87) /* lbzx */,
+
+/* u b w i s */ HI(39) /* stbu */,
+/* u b w i l */ HI(35) /* lbzu */,
+/* u b w x s */ HI(31) | LO(247) /* stbux */,
+/* u b w x l */ HI(31) | LO(119) /* lbzux */,
+
+/* Half. */
+
+/* u h n i s */ HI(44) /* sth */,
+/* u h n i l */ HI(40) /* lhz */,
+/* u h n x s */ HI(31) | LO(407) /* sthx */,
+/* u h n x l */ HI(31) | LO(279) /* lhzx */,
+
+/* u h w i s */ HI(45) /* sthu */,
+/* u h w i l */ HI(41) /* lhzu */,
+/* u h w x s */ HI(31) | LO(439) /* sthux */,
+/* u h w x l */ HI(31) | LO(311) /* lhzux */,
+
+/* Int. */
+
+/* u i n i s */ HI(36) /* stw */,
+/* u i n i l */ HI(32) /* lwz */,
+/* u i n x s */ HI(31) | LO(151) /* stwx */,
+/* u i n x l */ HI(31) | LO(23) /* lwzx */,
+
+/* u i w i s */ HI(37) /* stwu */,
+/* u i w i l */ HI(33) /* lwzu */,
+/* u i w x s */ HI(31) | LO(183) /* stwux */,
+/* u i w x l */ HI(31) | LO(55) /* lwzux */,
+
+/* -------- Signed -------- */
+
+/* Word. */
+
+/* s w n i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | INT_ALIGNED | 0x0 /* std */),
+/* s w n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x0 /* ld */),
+/* s w n x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */),
+/* s w n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */),
+
+/* s w w i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | INT_ALIGNED | 0x1 /* stdu */),
+/* s w w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | 0x1 /* ldu */),
+/* s w w x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */),
+/* s w w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */),
+
+/* Byte. */
+
+/* s b n i s */ HI(38) /* stb */,
+/* s b n i l */ HI(34) /* lbz */ /* EXTS_REQ */,
+/* s b n x s */ HI(31) | LO(215) /* stbx */,
+/* s b n x l */ HI(31) | LO(87) /* lbzx */ /* EXTS_REQ */,
+
+/* s b w i s */ HI(39) /* stbu */,
+/* s b w i l */ HI(35) /* lbzu */ /* EXTS_REQ */,
+/* s b w x s */ HI(31) | LO(247) /* stbux */,
+/* s b w x l */ HI(31) | LO(119) /* lbzux */ /* EXTS_REQ */,
+
+/* Half. */
+
+/* s h n i s */ HI(44) /* sth */,
+/* s h n i l */ HI(42) /* lha */,
+/* s h n x s */ HI(31) | LO(407) /* sthx */,
+/* s h n x l */ HI(31) | LO(343) /* lhax */,
+
+/* s h w i s */ HI(45) /* sthu */,
+/* s h w i l */ HI(43) /* lhau */,
+/* s h w x s */ HI(31) | LO(439) /* sthux */,
+/* s h w x l */ HI(31) | LO(375) /* lhaux */,
+
+/* Int. */
+
+/* s i n i s */ HI(36) /* stw */,
+/* s i n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x2 /* lwa */),
+/* s i n x s */ HI(31) | LO(151) /* stwx */,
+/* s i n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(341) /* lwax */),
+
+/* s i w i s */ HI(37) /* stwu */,
+/* s i w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | UPDATE_REQ | 0x2 /* lwa */),
+/* s i w x s */ HI(31) | LO(183) /* stwux */,
+/* s i w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(373) /* lwaux */),
+
+/* -------- Double -------- */
+
+/* d   n i s */ HI(54) /* stfd */,
+/* d   n i l */ HI(50) /* lfd */,
+/* d   n x s */ HI(31) | LO(727) /* stfdx */,
+/* d   n x l */ HI(31) | LO(599) /* lfdx */,
+
+/* s   n i s */ HI(52) /* stfs */,
+/* s   n i l */ HI(48) /* lfs */,
+/* s   n x s */ HI(31) | LO(663) /* stfsx */,
+/* s   n x l */ HI(31) | LO(535) /* lfsx */,
+
+};
+
+#undef ARCH_32_64
+
+/* Simple cases, (no caching is required). */
+static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si inp_flags, sljit_si reg, sljit_si arg, sljit_sw argw)
+{
+       sljit_ins inst;
+
+       /* Should work when (arg & REG_MASK) == 0. */
+       SLJIT_COMPILE_ASSERT(A(0) == 0, a0_must_be_0);
+       SLJIT_ASSERT(arg & SLJIT_MEM);
+
+       if (arg & OFFS_REG_MASK) {
+               if (argw & 0x3)
+                       return 0;
+               if (inp_flags & ARG_TEST)
+                       return 1;
+
+               inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
+               SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
+               FAIL_IF(push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(OFFS_REG(arg))));
+               return -1;
+       }
+
+       if (SLJIT_UNLIKELY(!(arg & REG_MASK)))
+               inp_flags &= ~WRITE_BACK;
+
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+       inst = data_transfer_insts[inp_flags & MEM_MASK];
+       SLJIT_ASSERT((arg & REG_MASK) || !(inst & UPDATE_REQ));
+
+       if (argw > SIMM_MAX || argw < SIMM_MIN || ((inst & INT_ALIGNED) && (argw & 0x3)) || (inst & UPDATE_REQ))
+               return 0;
+       if (inp_flags & ARG_TEST)
+               return 1;
+#endif
+
+#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
+       if (argw > SIMM_MAX || argw < SIMM_MIN)
+               return 0;
+       if (inp_flags & ARG_TEST)
+               return 1;
+
+       inst = data_transfer_insts[inp_flags & MEM_MASK];
+       SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
+#endif
+
+       FAIL_IF(push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | IMM(argw)));
+       return -1;
+}
+
+/* See getput_arg below.
+   Note: can_cache is called only for binary operators. Those operator always
+   uses word arguments without write back. */
+static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw)
+{
+       sljit_sw high_short, next_high_short;
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+       sljit_sw diff;
+#endif
+
+       SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM));
+
+       if (arg & OFFS_REG_MASK)
+               return ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && (argw & 0x3) == (next_argw & 0x3));
+
+       if (next_arg & OFFS_REG_MASK)
+               return 0;
+
+#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
+       high_short = (argw + ((argw & 0x8000) << 1)) & ~0xffff;
+       next_high_short = (next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff;
+       return high_short == next_high_short;
+#else
+       if (argw <= 0x7fffffffl && argw >= -0x80000000l) {
+               high_short = (argw + ((argw & 0x8000) << 1)) & ~0xffff;
+               next_high_short = (next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff;
+               if (high_short == next_high_short)
+                       return 1;
+       }
+
+       diff = argw - next_argw;
+       if (!(arg & REG_MASK))
+               return diff <= SIMM_MAX && diff >= SIMM_MIN;
+
+       if (arg == next_arg && diff <= SIMM_MAX && diff >= SIMM_MIN)
+               return 1;
+
+       return 0;
+#endif
+}
+
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+#define ADJUST_CACHED_IMM(imm) \
+       if ((inst & INT_ALIGNED) && (imm & 0x3)) { \
+               /* Adjust cached value. Fortunately this is really a rare case */ \
+               compiler->cache_argw += imm & 0x3; \
+               FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG3) | A(TMP_REG3) | (imm & 0x3))); \
+               imm &= ~0x3; \
+       }
+#endif
+
+/* Emit the necessary instructions. See can_cache above. */
+static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si inp_flags, sljit_si reg, sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw)
+{
+       sljit_si tmp_r;
+       sljit_ins inst;
+       sljit_sw high_short, next_high_short;
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+       sljit_sw diff;
+#endif
+
+       SLJIT_ASSERT(arg & SLJIT_MEM);
+
+       tmp_r = ((inp_flags & LOAD_DATA) && ((inp_flags) & MEM_MASK) <= GPR_REG) ? reg : TMP_REG1;
+       /* Special case for "mov reg, [reg, ... ]". */
+       if ((arg & REG_MASK) == tmp_r)
+               tmp_r = TMP_REG1;
+
+       if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
+               argw &= 0x3;
+               /* Otherwise getput_arg_fast would capture it. */
+               SLJIT_ASSERT(argw);
+
+               if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg && argw == compiler->cache_argw)
+                       tmp_r = TMP_REG3;
+               else {
+                       if ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && argw == (next_argw & 0x3)) {
+                               compiler->cache_arg = SLJIT_MEM | (arg & OFFS_REG_MASK);
+                               compiler->cache_argw = argw;
+                               tmp_r = TMP_REG3;
+                       }
+#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
+                       FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(arg)) | A(tmp_r) | (argw << 11) | ((31 - argw) << 1)));
+#else
+                       FAIL_IF(push_inst(compiler, RLDI(tmp_r, OFFS_REG(arg), argw, 63 - argw, 1)));
+#endif
+               }
+               inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
+               SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
+               return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(tmp_r));
+       }
+
+       if (SLJIT_UNLIKELY(!(arg & REG_MASK)))
+               inp_flags &= ~WRITE_BACK;
+
+       inst = data_transfer_insts[inp_flags & MEM_MASK];
+       SLJIT_ASSERT((arg & REG_MASK) || !(inst & UPDATE_REQ));
+
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+       if (argw <= 0x7fff7fffl && argw >= -0x80000000l
+                       && (!(inst & INT_ALIGNED) || !(argw & 0x3)) && !(inst & UPDATE_REQ)) {
+#endif
+
+               arg &= REG_MASK;
+               high_short = (sljit_si)(argw + ((argw & 0x8000) << 1)) & ~0xffff;
+               /* The getput_arg_fast should handle this otherwise. */
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+               SLJIT_ASSERT(high_short && high_short <= 0x7fffffffl && high_short >= -0x80000000l);
+#else
+               SLJIT_ASSERT(high_short && !(inst & (INT_ALIGNED | UPDATE_REQ)));
+#endif
+
+               if (inp_flags & WRITE_BACK) {
+                       if (arg == reg) {
+                               FAIL_IF(push_inst(compiler, OR | S(reg) | A(tmp_r) | B(reg)));
+                               reg = tmp_r;
+                       }
+                       tmp_r = arg;
+                       FAIL_IF(push_inst(compiler, ADDIS | D(arg) | A(arg) | IMM(high_short >> 16)));
+               }
+               else if (compiler->cache_arg != (SLJIT_MEM | arg) || high_short != compiler->cache_argw) {
+                       if ((next_arg & SLJIT_MEM) && !(next_arg & OFFS_REG_MASK)) {
+                               next_high_short = (sljit_si)(next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff;
+                               if (high_short == next_high_short) {
+                                       compiler->cache_arg = SLJIT_MEM | arg;
+                                       compiler->cache_argw = high_short;
+                                       tmp_r = TMP_REG3;
+                               }
+                       }
+                       FAIL_IF(push_inst(compiler, ADDIS | D(tmp_r) | A(arg & REG_MASK) | IMM(high_short >> 16)));
+               }
+               else
+                       tmp_r = TMP_REG3;
+
+               return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(tmp_r) | IMM(argw));
+
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+       }
+
+       /* Everything else is PPC-64 only. */
+       if (SLJIT_UNLIKELY(!(arg & REG_MASK))) {
+               diff = argw - compiler->cache_argw;
+               if ((compiler->cache_arg & SLJIT_IMM) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
+                       ADJUST_CACHED_IMM(diff);
+                       return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3) | IMM(diff));
+               }
+
+               diff = argw - next_argw;
+               if ((next_arg & SLJIT_MEM) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
+                       SLJIT_ASSERT(inp_flags & LOAD_DATA);
+
+                       compiler->cache_arg = SLJIT_IMM;
+                       compiler->cache_argw = argw;
+                       tmp_r = TMP_REG3;
+               }
+
+               FAIL_IF(load_immediate(compiler, tmp_r, argw));
+               return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(tmp_r));
+       }
+
+       diff = argw - compiler->cache_argw;
+       if (compiler->cache_arg == arg && diff <= SIMM_MAX && diff >= SIMM_MIN) {
+               SLJIT_ASSERT(!(inp_flags & WRITE_BACK) && !(inst & UPDATE_REQ));
+               ADJUST_CACHED_IMM(diff);
+               return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3) | IMM(diff));
+       }
+
+       if ((compiler->cache_arg & SLJIT_IMM) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
+               inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
+               SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
+               if (compiler->cache_argw != argw) {
+                       FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG3) | A(TMP_REG3) | IMM(diff)));
+                       compiler->cache_argw = argw;
+               }
+               return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(TMP_REG3));
+       }
+
+       if (argw == next_argw && (next_arg & SLJIT_MEM)) {
+               SLJIT_ASSERT(inp_flags & LOAD_DATA);
+               FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
+
+               compiler->cache_arg = SLJIT_IMM;
+               compiler->cache_argw = argw;
+
+               inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
+               SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
+               return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(TMP_REG3));
+       }
+
+       diff = argw - next_argw;
+       if (arg == next_arg && !(inp_flags & WRITE_BACK) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
+               SLJIT_ASSERT(inp_flags & LOAD_DATA);
+               FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
+               FAIL_IF(push_inst(compiler, ADD | D(TMP_REG3) | A(TMP_REG3) | B(arg & REG_MASK)));
+
+               compiler->cache_arg = arg;
+               compiler->cache_argw = argw;
+
+               return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3));
+       }
+
+       if ((next_arg & SLJIT_MEM) && !(next_arg & OFFS_REG_MASK) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
+               SLJIT_ASSERT(inp_flags & LOAD_DATA);
+               FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
+
+               compiler->cache_arg = SLJIT_IMM;
+               compiler->cache_argw = argw;
+               tmp_r = TMP_REG3;
+       }
+       else
+               FAIL_IF(load_immediate(compiler, tmp_r, argw));
+
+       /* Get the indexed version instead of the normal one. */
+       inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
+       SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
+       return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(tmp_r));
+#endif
+}
+
+static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg1, sljit_sw arg1w, sljit_si arg2, sljit_sw arg2w)
+{
+       if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
+               return compiler->error;
+       return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
+}
+
+static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si input_flags,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       /* arg1 goes to TMP_REG1 or src reg
+          arg2 goes to TMP_REG2, imm or src reg
+          TMP_REG3 can be used for caching
+          result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */
+       sljit_si dst_r;
+       sljit_si src1_r;
+       sljit_si src2_r;
+       sljit_si sugg_src2_r = TMP_REG2;
+       sljit_si flags = input_flags & (ALT_FORM1 | ALT_FORM2 | ALT_FORM3 | ALT_FORM4 | ALT_FORM5 | ALT_FORM6 | ALT_SIGN_EXT | ALT_SET_FLAGS);
+
+       if (!(input_flags & ALT_KEEP_CACHE)) {
+               compiler->cache_arg = 0;
+               compiler->cache_argw = 0;
+       }
+
+       /* Destination check. */
+       if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
+               if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI && !(src2 & SLJIT_MEM))
+                       return SLJIT_SUCCESS;
+               dst_r = TMP_REG2;
+       }
+       else if (FAST_IS_REG(dst)) {
+               dst_r = dst;
+               flags |= REG_DEST;
+               if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI)
+                       sugg_src2_r = dst_r;
+       }
+       else {
+               SLJIT_ASSERT(dst & SLJIT_MEM);
+               if (getput_arg_fast(compiler, input_flags | ARG_TEST, TMP_REG2, dst, dstw)) {
+                       flags |= FAST_DEST;
+                       dst_r = TMP_REG2;
+               }
+               else {
+                       flags |= SLOW_DEST;
+                       dst_r = 0;
+               }
+       }
+
+       /* Source 1. */
+       if (FAST_IS_REG(src1)) {
+               src1_r = src1;
+               flags |= REG1_SOURCE;
+       }
+       else if (src1 & SLJIT_IMM) {
+               FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
+               src1_r = TMP_REG1;
+       }
+       else if (getput_arg_fast(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w)) {
+               FAIL_IF(compiler->error);
+               src1_r = TMP_REG1;
+       }
+       else
+               src1_r = 0;
+
+       /* Source 2. */
+       if (FAST_IS_REG(src2)) {
+               src2_r = src2;
+               flags |= REG2_SOURCE;
+               if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_SI)
+                       dst_r = src2_r;
+       }
+       else if (src2 & SLJIT_IMM) {
+               FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w));
+               src2_r = sugg_src2_r;
+       }
+       else if (getput_arg_fast(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w)) {
+               FAIL_IF(compiler->error);
+               src2_r = sugg_src2_r;
+       }
+       else
+               src2_r = 0;
+
+       /* src1_r, src2_r and dst_r can be zero (=unprocessed).
+          All arguments are complex addressing modes, and it is a binary operator. */
+       if (src1_r == 0 && src2_r == 0 && dst_r == 0) {
+               if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
+                       FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w));
+                       FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
+               }
+               else {
+                       FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
+                       FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw));
+               }
+               src1_r = TMP_REG1;
+               src2_r = TMP_REG2;
+       }
+       else if (src1_r == 0 && src2_r == 0) {
+               FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
+               src1_r = TMP_REG1;
+       }
+       else if (src1_r == 0 && dst_r == 0) {
+               FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
+               src1_r = TMP_REG1;
+       }
+       else if (src2_r == 0 && dst_r == 0) {
+               FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw));
+               src2_r = sugg_src2_r;
+       }
+
+       if (dst_r == 0)
+               dst_r = TMP_REG2;
+
+       if (src1_r == 0) {
+               FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, 0, 0));
+               src1_r = TMP_REG1;
+       }
+
+       if (src2_r == 0) {
+               FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, 0, 0));
+               src2_r = sugg_src2_r;
+       }
+
+       FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));
+
+       if (flags & (FAST_DEST | SLOW_DEST)) {
+               if (flags & FAST_DEST)
+                       FAIL_IF(getput_arg_fast(compiler, input_flags, dst_r, dst, dstw));
+               else
+                       FAIL_IF(getput_arg(compiler, input_flags, dst_r, dst, dstw, 0, 0));
+       }
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op)
+{
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+       sljit_si int_op = op & SLJIT_INT_OP;
+#endif
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_op0(compiler, op));
+
+       op = GET_OPCODE(op);
+       switch (op) {
+       case SLJIT_BREAKPOINT:
+       case SLJIT_NOP:
+               return push_inst(compiler, NOP);
+       case SLJIT_LUMUL:
+       case SLJIT_LSMUL:
+               FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R0)));
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+               FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1)));
+               return push_inst(compiler, (op == SLJIT_LUMUL ? MULHDU : MULHD) | D(SLJIT_R1) | A(TMP_REG1) | B(SLJIT_R1));
+#else
+               FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1)));
+               return push_inst(compiler, (op == SLJIT_LUMUL ? MULHWU : MULHW) | D(SLJIT_R1) | A(TMP_REG1) | B(SLJIT_R1));
+#endif
+       case SLJIT_LUDIV:
+       case SLJIT_LSDIV:
+               FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R0)));
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+               if (int_op) {
+                       FAIL_IF(push_inst(compiler, (op == SLJIT_LUDIV ? DIVWU : DIVW) | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1)));
+                       FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1)));
+               } else {
+                       FAIL_IF(push_inst(compiler, (op == SLJIT_LUDIV ? DIVDU : DIVD) | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1)));
+                       FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1)));
+               }
+               return push_inst(compiler, SUBF | D(SLJIT_R1) | A(SLJIT_R1) | B(TMP_REG1));
+#else
+               FAIL_IF(push_inst(compiler, (op == SLJIT_LUDIV ? DIVWU : DIVW) | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1)));
+               FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1)));
+               return push_inst(compiler, SUBF | D(SLJIT_R1) | A(SLJIT_R1) | B(TMP_REG1));
+#endif
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+#define EMIT_MOV(type, type_flags, type_cast) \
+       emit_op(compiler, (src & SLJIT_IMM) ? SLJIT_MOV : type, flags | (type_flags), dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? type_cast srcw : srcw)
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       sljit_si flags = GET_FLAGS(op) ? ALT_SET_FLAGS : 0;
+       sljit_si op_flags = GET_ALL_FLAGS(op);
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       op = GET_OPCODE(op);
+       if ((src & SLJIT_IMM) && srcw == 0)
+               src = TMP_ZERO;
+
+       if (op_flags & SLJIT_SET_O)
+               FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO)));
+
+       if (op_flags & SLJIT_INT_OP) {
+               if (op < SLJIT_NOT) {
+                       if (FAST_IS_REG(src) && src == dst) {
+                               if (!TYPE_CAST_NEEDED(op))
+                                       return SLJIT_SUCCESS;
+                       }
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+                       if (op == SLJIT_MOV_SI && (src & SLJIT_MEM))
+                               op = SLJIT_MOV_UI;
+                       if (op == SLJIT_MOVU_SI && (src & SLJIT_MEM))
+                               op = SLJIT_MOVU_UI;
+                       if (op == SLJIT_MOV_UI && (src & SLJIT_IMM))
+                               op = SLJIT_MOV_SI;
+                       if (op == SLJIT_MOVU_UI && (src & SLJIT_IMM))
+                               op = SLJIT_MOVU_SI;
+#endif
+               }
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+               else {
+                       /* Most operations expect sign extended arguments. */
+                       flags |= INT_DATA | SIGNED_DATA;
+                       if (src & SLJIT_IMM)
+                               srcw = (sljit_si)srcw;
+               }
+#endif
+       }
+
+       switch (op) {
+       case SLJIT_MOV:
+       case SLJIT_MOV_P:
+#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
+       case SLJIT_MOV_UI:
+       case SLJIT_MOV_SI:
+#endif
+               return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
+
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+       case SLJIT_MOV_UI:
+               return EMIT_MOV(SLJIT_MOV_UI, INT_DATA, (sljit_ui));
+
+       case SLJIT_MOV_SI:
+               return EMIT_MOV(SLJIT_MOV_SI, INT_DATA | SIGNED_DATA, (sljit_si));
+#endif
+
+       case SLJIT_MOV_UB:
+               return EMIT_MOV(SLJIT_MOV_UB, BYTE_DATA, (sljit_ub));
+
+       case SLJIT_MOV_SB:
+               return EMIT_MOV(SLJIT_MOV_SB, BYTE_DATA | SIGNED_DATA, (sljit_sb));
+
+       case SLJIT_MOV_UH:
+               return EMIT_MOV(SLJIT_MOV_UH, HALF_DATA, (sljit_uh));
+
+       case SLJIT_MOV_SH:
+               return EMIT_MOV(SLJIT_MOV_SH, HALF_DATA | SIGNED_DATA, (sljit_sh));
+
+       case SLJIT_MOVU:
+       case SLJIT_MOVU_P:
+#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
+       case SLJIT_MOVU_UI:
+       case SLJIT_MOVU_SI:
+#endif
+               return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
+
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+       case SLJIT_MOVU_UI:
+               return EMIT_MOV(SLJIT_MOV_UI, INT_DATA | WRITE_BACK, (sljit_ui));
+
+       case SLJIT_MOVU_SI:
+               return EMIT_MOV(SLJIT_MOV_SI, INT_DATA | SIGNED_DATA | WRITE_BACK, (sljit_si));
+#endif
+
+       case SLJIT_MOVU_UB:
+               return EMIT_MOV(SLJIT_MOV_UB, BYTE_DATA | WRITE_BACK, (sljit_ub));
+
+       case SLJIT_MOVU_SB:
+               return EMIT_MOV(SLJIT_MOV_SB, BYTE_DATA | SIGNED_DATA | WRITE_BACK, (sljit_sb));
+
+       case SLJIT_MOVU_UH:
+               return EMIT_MOV(SLJIT_MOV_UH, HALF_DATA | WRITE_BACK, (sljit_uh));
+
+       case SLJIT_MOVU_SH:
+               return EMIT_MOV(SLJIT_MOV_SH, HALF_DATA | SIGNED_DATA | WRITE_BACK, (sljit_sh));
+
+       case SLJIT_NOT:
+               return emit_op(compiler, SLJIT_NOT, flags, dst, dstw, TMP_REG1, 0, src, srcw);
+
+       case SLJIT_NEG:
+               return emit_op(compiler, SLJIT_NEG, flags, dst, dstw, TMP_REG1, 0, src, srcw);
+
+       case SLJIT_CLZ:
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+               return emit_op(compiler, SLJIT_CLZ, flags | (!(op_flags & SLJIT_INT_OP) ? 0 : ALT_FORM1), dst, dstw, TMP_REG1, 0, src, srcw);
+#else
+               return emit_op(compiler, SLJIT_CLZ, flags, dst, dstw, TMP_REG1, 0, src, srcw);
+#endif
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+#undef EMIT_MOV
+
+#define TEST_SL_IMM(src, srcw) \
+       (((src) & SLJIT_IMM) && (srcw) <= SIMM_MAX && (srcw) >= SIMM_MIN)
+
+#define TEST_UL_IMM(src, srcw) \
+       (((src) & SLJIT_IMM) && !((srcw) & ~0xffff))
+
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+#define TEST_SH_IMM(src, srcw) \
+       (((src) & SLJIT_IMM) && !((srcw) & 0xffff) && (srcw) <= 0x7fffffffl && (srcw) >= -0x80000000l)
+#else
+#define TEST_SH_IMM(src, srcw) \
+       (((src) & SLJIT_IMM) && !((srcw) & 0xffff))
+#endif
+
+#define TEST_UH_IMM(src, srcw) \
+       (((src) & SLJIT_IMM) && !((srcw) & ~0xffff0000))
+
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+#define TEST_ADD_IMM(src, srcw) \
+       (((src) & SLJIT_IMM) && (srcw) <= 0x7fff7fffl && (srcw) >= -0x80000000l)
+#else
+#define TEST_ADD_IMM(src, srcw) \
+       ((src) & SLJIT_IMM)
+#endif
+
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+#define TEST_UI_IMM(src, srcw) \
+       (((src) & SLJIT_IMM) && !((srcw) & ~0xffffffff))
+#else
+#define TEST_UI_IMM(src, srcw) \
+       ((src) & SLJIT_IMM)
+#endif
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       sljit_si flags = GET_FLAGS(op) ? ALT_SET_FLAGS : 0;
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+       ADJUST_LOCAL_OFFSET(src1, src1w);
+       ADJUST_LOCAL_OFFSET(src2, src2w);
+
+       if ((src1 & SLJIT_IMM) && src1w == 0)
+               src1 = TMP_ZERO;
+       if ((src2 & SLJIT_IMM) && src2w == 0)
+               src2 = TMP_ZERO;
+
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+       if (op & SLJIT_INT_OP) {
+               /* Most operations expect sign extended arguments. */
+               flags |= INT_DATA | SIGNED_DATA;
+               if (src1 & SLJIT_IMM)
+                       src1w = (sljit_si)(src1w);
+               if (src2 & SLJIT_IMM)
+                       src2w = (sljit_si)(src2w);
+               if (GET_FLAGS(op))
+                       flags |= ALT_SIGN_EXT;
+       }
+#endif
+       if (op & SLJIT_SET_O)
+               FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO)));
+       if (src2 == TMP_REG2)
+               flags |= ALT_KEEP_CACHE;
+
+       switch (GET_OPCODE(op)) {
+       case SLJIT_ADD:
+               if (!GET_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) {
+                       if (TEST_SL_IMM(src2, src2w)) {
+                               compiler->imm = src2w & 0xffff;
+                               return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
+                       }
+                       if (TEST_SL_IMM(src1, src1w)) {
+                               compiler->imm = src1w & 0xffff;
+                               return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
+                       }
+                       if (TEST_SH_IMM(src2, src2w)) {
+                               compiler->imm = (src2w >> 16) & 0xffff;
+                               return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
+                       }
+                       if (TEST_SH_IMM(src1, src1w)) {
+                               compiler->imm = (src1w >> 16) & 0xffff;
+                               return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
+                       }
+                       /* Range between -1 and -32768 is covered above. */
+                       if (TEST_ADD_IMM(src2, src2w)) {
+                               compiler->imm = src2w & 0xffffffff;
+                               return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0);
+                       }
+                       if (TEST_ADD_IMM(src1, src1w)) {
+                               compiler->imm = src1w & 0xffffffff;
+                               return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src2, src2w, TMP_REG2, 0);
+                       }
+               }
+               if (!(GET_FLAGS(op) & (SLJIT_SET_E | SLJIT_SET_O))) {
+                       if (TEST_SL_IMM(src2, src2w)) {
+                               compiler->imm = src2w & 0xffff;
+                               return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
+                       }
+                       if (TEST_SL_IMM(src1, src1w)) {
+                               compiler->imm = src1w & 0xffff;
+                               return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0);
+                       }
+               }
+               return emit_op(compiler, SLJIT_ADD, flags, dst, dstw, src1, src1w, src2, src2w);
+
+       case SLJIT_ADDC:
+               return emit_op(compiler, SLJIT_ADDC, flags | (!(op & SLJIT_KEEP_FLAGS) ? 0 : ALT_FORM1), dst, dstw, src1, src1w, src2, src2w);
+
+       case SLJIT_SUB:
+               if (!GET_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) {
+                       if (TEST_SL_IMM(src2, -src2w)) {
+                               compiler->imm = (-src2w) & 0xffff;
+                               return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
+                       }
+                       if (TEST_SL_IMM(src1, src1w)) {
+                               compiler->imm = src1w & 0xffff;
+                               return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
+                       }
+                       if (TEST_SH_IMM(src2, -src2w)) {
+                               compiler->imm = ((-src2w) >> 16) & 0xffff;
+                               return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
+                       }
+                       /* Range between -1 and -32768 is covered above. */
+                       if (TEST_ADD_IMM(src2, -src2w)) {
+                               compiler->imm = -src2w & 0xffffffff;
+                               return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0);
+                       }
+               }
+               if (dst == SLJIT_UNUSED && (op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S)) && !(op & (SLJIT_SET_O | SLJIT_SET_C))) {
+                       if (!(op & SLJIT_SET_U)) {
+                               /* We know ALT_SIGN_EXT is set if it is an SLJIT_INT_OP on 64 bit systems. */
+                               if (TEST_SL_IMM(src2, src2w)) {
+                                       compiler->imm = src2w & 0xffff;
+                                       return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
+                               }
+                               if (GET_FLAGS(op) == SLJIT_SET_E && TEST_SL_IMM(src1, src1w)) {
+                                       compiler->imm = src1w & 0xffff;
+                                       return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
+                               }
+                       }
+                       if (!(op & (SLJIT_SET_E | SLJIT_SET_S))) {
+                               /* We know ALT_SIGN_EXT is set if it is an SLJIT_INT_OP on 64 bit systems. */
+                               if (TEST_UL_IMM(src2, src2w)) {
+                                       compiler->imm = src2w & 0xffff;
+                                       return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
+                               }
+                               return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM4, dst, dstw, src1, src1w, src2, src2w);
+                       }
+                       if ((src2 & SLJIT_IMM) && src2w >= 0 && src2w <= 0x7fff) {
+                               compiler->imm = src2w;
+                               return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
+                       }
+                       return emit_op(compiler, SLJIT_SUB, flags | ((op & SLJIT_SET_U) ? ALT_FORM4 : 0) | ((op & (SLJIT_SET_E | SLJIT_SET_S)) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w);
+               }
+               if (!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O))) {
+                       if (TEST_SL_IMM(src2, -src2w)) {
+                               compiler->imm = (-src2w) & 0xffff;
+                               return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
+                       }
+               }
+               /* We know ALT_SIGN_EXT is set if it is an SLJIT_INT_OP on 64 bit systems. */
+               return emit_op(compiler, SLJIT_SUB, flags | (!(op & SLJIT_SET_U) ? 0 : ALT_FORM6), dst, dstw, src1, src1w, src2, src2w);
+
+       case SLJIT_SUBC:
+               return emit_op(compiler, SLJIT_SUBC, flags | (!(op & SLJIT_KEEP_FLAGS) ? 0 : ALT_FORM1), dst, dstw, src1, src1w, src2, src2w);
+
+       case SLJIT_MUL:
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+               if (op & SLJIT_INT_OP)
+                       flags |= ALT_FORM2;
+#endif
+               if (!GET_FLAGS(op)) {
+                       if (TEST_SL_IMM(src2, src2w)) {
+                               compiler->imm = src2w & 0xffff;
+                               return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
+                       }
+                       if (TEST_SL_IMM(src1, src1w)) {
+                               compiler->imm = src1w & 0xffff;
+                               return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
+                       }
+               }
+               return emit_op(compiler, SLJIT_MUL, flags, dst, dstw, src1, src1w, src2, src2w);
+
+       case SLJIT_AND:
+       case SLJIT_OR:
+       case SLJIT_XOR:
+               /* Commutative unsigned operations. */
+               if (!GET_FLAGS(op) || GET_OPCODE(op) == SLJIT_AND) {
+                       if (TEST_UL_IMM(src2, src2w)) {
+                               compiler->imm = src2w;
+                               return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
+                       }
+                       if (TEST_UL_IMM(src1, src1w)) {
+                               compiler->imm = src1w;
+                               return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
+                       }
+                       if (TEST_UH_IMM(src2, src2w)) {
+                               compiler->imm = (src2w >> 16) & 0xffff;
+                               return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
+                       }
+                       if (TEST_UH_IMM(src1, src1w)) {
+                               compiler->imm = (src1w >> 16) & 0xffff;
+                               return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
+                       }
+               }
+               if (!GET_FLAGS(op) && GET_OPCODE(op) != SLJIT_AND) {
+                       if (TEST_UI_IMM(src2, src2w)) {
+                               compiler->imm = src2w;
+                               return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
+                       }
+                       if (TEST_UI_IMM(src1, src1w)) {
+                               compiler->imm = src1w;
+                               return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0);
+                       }
+               }
+               return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w);
+
+       case SLJIT_ASHR:
+               if (op & SLJIT_KEEP_FLAGS)
+                       flags |= ALT_FORM3;
+               /* Fall through. */
+       case SLJIT_SHL:
+       case SLJIT_LSHR:
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+               if (op & SLJIT_INT_OP)
+                       flags |= ALT_FORM2;
+#endif
+               if (src2 & SLJIT_IMM) {
+                       compiler->imm = src2w;
+                       return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
+               }
+               return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w);
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
+{
+       CHECK_REG_INDEX(check_sljit_get_register_index(reg));
+       return reg_map[reg];
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg)
+{
+       CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
+       return reg;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
+       void *instruction, sljit_si size)
+{
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
+
+       return push_inst(compiler, *(sljit_ins*)instruction);
+}
+
+/* --------------------------------------------------------------------- */
+/*  Floating point operators                                             */
+/* --------------------------------------------------------------------- */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
+{
+#ifdef SLJIT_IS_FPU_AVAILABLE
+       return SLJIT_IS_FPU_AVAILABLE;
+#else
+       /* Available by default. */
+       return 1;
+#endif
+}
+
+#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_SINGLE_OP) >> 6))
+#define SELECT_FOP(op, single, double) ((op & SLJIT_SINGLE_OP) ? single : double)
+
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+#define FLOAT_TMP_MEM_OFFSET (6 * sizeof(sljit_sw))
+#else
+#define FLOAT_TMP_MEM_OFFSET (2 * sizeof(sljit_sw))
+
+#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
+#define FLOAT_TMP_MEM_OFFSET_LOW (2 * sizeof(sljit_sw))
+#define FLOAT_TMP_MEM_OFFSET_HI (3 * sizeof(sljit_sw))
+#else
+#define FLOAT_TMP_MEM_OFFSET_LOW (3 * sizeof(sljit_sw))
+#define FLOAT_TMP_MEM_OFFSET_HI (2 * sizeof(sljit_sw))
+#endif
+
+#endif /* SLJIT_CONFIG_PPC_64 */
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       if (src & SLJIT_MEM) {
+               /* We can ignore the temporary data store on the stack from caching point of view. */
+               FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
+               src = TMP_FREG1;
+       }
+
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+       op = GET_OPCODE(op);
+       FAIL_IF(push_inst(compiler, (op == SLJIT_CONVI_FROMD ? FCTIWZ : FCTIDZ) | FD(TMP_FREG1) | FB(src)));
+
+       if (dst == SLJIT_UNUSED)
+               return SLJIT_SUCCESS;
+
+       if (op == SLJIT_CONVW_FROMD) {
+               if (FAST_IS_REG(dst)) {
+                       FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0));
+                       return emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0);
+               }
+               return emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG1, dst, dstw, 0, 0);
+       }
+
+#else
+       FAIL_IF(push_inst(compiler, FCTIWZ | FD(TMP_FREG1) | FB(src)));
+
+       if (dst == SLJIT_UNUSED)
+               return SLJIT_SUCCESS;
+#endif
+
+       if (FAST_IS_REG(dst)) {
+               FAIL_IF(load_immediate(compiler, TMP_REG1, FLOAT_TMP_MEM_OFFSET));
+               FAIL_IF(push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(SLJIT_SP) | B(TMP_REG1)));
+               return emit_op_mem2(compiler, INT_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0);
+       }
+
+       SLJIT_ASSERT(dst & SLJIT_MEM);
+
+       if (dst & OFFS_REG_MASK) {
+               dstw &= 0x3;
+               if (dstw) {
+#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
+                       FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(dst)) | A(TMP_REG1) | (dstw << 11) | ((31 - dstw) << 1)));
+#else
+                       FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, OFFS_REG(dst), dstw, 63 - dstw, 1)));
+#endif
+                       dstw = TMP_REG1;
+               }
+               else
+                       dstw = OFFS_REG(dst);
+       }
+       else {
+               if ((dst & REG_MASK) && !dstw) {
+                       dstw = dst & REG_MASK;
+                       dst = 0;
+               }
+               else {
+                       /* This works regardless we have SLJIT_MEM1 or SLJIT_MEM0. */
+                       FAIL_IF(load_immediate(compiler, TMP_REG1, dstw));
+                       dstw = TMP_REG1;
+               }
+       }
+
+       return push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(dst & REG_MASK) | B(dstw));
+}
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+
+       sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
+
+       if (src & SLJIT_IMM) {
+               if (GET_OPCODE(op) == SLJIT_CONVD_FROMI)
+                       srcw = (sljit_si)srcw;
+               FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
+               src = TMP_REG1;
+       }
+       else if (GET_OPCODE(op) == SLJIT_CONVD_FROMI) {
+               if (FAST_IS_REG(src))
+                       FAIL_IF(push_inst(compiler, EXTSW | S(src) | A(TMP_REG1)));
+               else
+                       FAIL_IF(emit_op_mem2(compiler, INT_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
+               src = TMP_REG1;
+       }
+
+       if (FAST_IS_REG(src)) {
+               FAIL_IF(emit_op_mem2(compiler, WORD_DATA, src, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
+               FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, dst, dstw));
+       }
+       else
+               FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
+
+       FAIL_IF(push_inst(compiler, FCFID | FD(dst_r) | FB(TMP_FREG1)));
+
+       if (dst & SLJIT_MEM)
+               return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
+       if (op & SLJIT_SINGLE_OP)
+               return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r));
+       return SLJIT_SUCCESS;
+
+#else
+
+       sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
+       sljit_si invert_sign = 1;
+
+       if (src & SLJIT_IMM) {
+               FAIL_IF(load_immediate(compiler, TMP_REG1, srcw ^ 0x80000000));
+               src = TMP_REG1;
+               invert_sign = 0;
+       }
+       else if (!FAST_IS_REG(src)) {
+               FAIL_IF(emit_op_mem2(compiler, WORD_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW));
+               src = TMP_REG1;
+       }
+
+       /* First, a special double floating point value is constructed: (2^53 + (input xor (2^31)))
+          The double precision format has exactly 53 bit precision, so the lower 32 bit represents
+          the lower 32 bit of such value. The result of xor 2^31 is the same as adding 0x80000000
+          to the input, which shifts it into the 0 - 0xffffffff range. To get the converted floating
+          point value, we need to substract 2^53 + 2^31 from the constructed value. */
+       FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG2) | A(0) | 0x4330));
+       if (invert_sign)
+               FAIL_IF(push_inst(compiler, XORIS | S(src) | A(TMP_REG1) | 0x8000));
+       FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG2, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_HI, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
+       FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_HI));
+       FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG1) | A(0) | 0x8000));
+       FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW));
+       FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
+       FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW));
+
+       FAIL_IF(push_inst(compiler, FSUB | FD(dst_r) | FA(TMP_FREG1) | FB(TMP_FREG2)));
+
+       if (dst & SLJIT_MEM)
+               return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
+       if (op & SLJIT_SINGLE_OP)
+               return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r));
+       return SLJIT_SUCCESS;
+
+#endif
+}
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       if (src1 & SLJIT_MEM) {
+               FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
+               src1 = TMP_FREG1;
+       }
+
+       if (src2 & SLJIT_MEM) {
+               FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0));
+               src2 = TMP_FREG2;
+       }
+
+       return push_inst(compiler, FCMPU | CRD(4) | FA(src1) | FB(src2));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       sljit_si dst_r;
+
+       CHECK_ERROR();
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+
+       SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100) && !(DOUBLE_DATA & 0x4), float_transfer_bit_error);
+       SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
+
+       if (GET_OPCODE(op) == SLJIT_CONVD_FROMS)
+               op ^= SLJIT_SINGLE_OP;
+
+       dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
+
+       if (src & SLJIT_MEM) {
+               FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw));
+               src = dst_r;
+       }
+
+       switch (GET_OPCODE(op)) {
+       case SLJIT_CONVD_FROMS:
+               op ^= SLJIT_SINGLE_OP;
+               if (op & SLJIT_SINGLE_OP) {
+                       FAIL_IF(push_inst(compiler, FRSP | FD(dst_r) | FB(src)));
+                       break;
+               }
+               /* Fall through. */
+       case SLJIT_DMOV:
+               if (src != dst_r) {
+                       if (dst_r != TMP_FREG1)
+                               FAIL_IF(push_inst(compiler, FMR | FD(dst_r) | FB(src)));
+                       else
+                               dst_r = src;
+               }
+               break;
+       case SLJIT_DNEG:
+               FAIL_IF(push_inst(compiler, FNEG | FD(dst_r) | FB(src)));
+               break;
+       case SLJIT_DABS:
+               FAIL_IF(push_inst(compiler, FABS | FD(dst_r) | FB(src)));
+               break;
+       }
+
+       if (dst & SLJIT_MEM)
+               FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0));
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       sljit_si dst_r, flags = 0;
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+       ADJUST_LOCAL_OFFSET(src1, src1w);
+       ADJUST_LOCAL_OFFSET(src2, src2w);
+
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+
+       dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG2;
+
+       if (src1 & SLJIT_MEM) {
+               if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) {
+                       FAIL_IF(compiler->error);
+                       src1 = TMP_FREG1;
+               } else
+                       flags |= ALT_FORM1;
+       }
+
+       if (src2 & SLJIT_MEM) {
+               if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) {
+                       FAIL_IF(compiler->error);
+                       src2 = TMP_FREG2;
+               } else
+                       flags |= ALT_FORM2;
+       }
+
+       if ((flags & (ALT_FORM1 | ALT_FORM2)) == (ALT_FORM1 | ALT_FORM2)) {
+               if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
+                       FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w));
+                       FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
+               }
+               else {
+                       FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
+                       FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
+               }
+       }
+       else if (flags & ALT_FORM1)
+               FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
+       else if (flags & ALT_FORM2)
+               FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
+
+       if (flags & ALT_FORM1)
+               src1 = TMP_FREG1;
+       if (flags & ALT_FORM2)
+               src2 = TMP_FREG2;
+
+       switch (GET_OPCODE(op)) {
+       case SLJIT_DADD:
+               FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADD) | FD(dst_r) | FA(src1) | FB(src2)));
+               break;
+
+       case SLJIT_DSUB:
+               FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUB) | FD(dst_r) | FA(src1) | FB(src2)));
+               break;
+
+       case SLJIT_DMUL:
+               FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMUL) | FD(dst_r) | FA(src1) | FC(src2) /* FMUL use FC as src2 */));
+               break;
+
+       case SLJIT_DDIV:
+               FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIV) | FD(dst_r) | FA(src1) | FB(src2)));
+               break;
+       }
+
+       if (dst_r == TMP_FREG2)
+               FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0));
+
+       return SLJIT_SUCCESS;
+}
+
+#undef FLOAT_DATA
+#undef SELECT_FOP
+
+/* --------------------------------------------------------------------- */
+/*  Other instructions                                                   */
+/* --------------------------------------------------------------------- */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw)
+{
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+
+       /* For UNUSED dst. Uncommon, but possible. */
+       if (dst == SLJIT_UNUSED)
+               return SLJIT_SUCCESS;
+
+       if (FAST_IS_REG(dst))
+               return push_inst(compiler, MFLR | D(dst));
+
+       /* Memory. */
+       FAIL_IF(push_inst(compiler, MFLR | D(TMP_REG2)));
+       return emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw)
+{
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       if (FAST_IS_REG(src))
+               FAIL_IF(push_inst(compiler, MTLR | S(src)));
+       else {
+               if (src & SLJIT_MEM)
+                       FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw));
+               else if (src & SLJIT_IMM)
+                       FAIL_IF(load_immediate(compiler, TMP_REG2, srcw));
+               FAIL_IF(push_inst(compiler, MTLR | S(TMP_REG2)));
+       }
+       return push_inst(compiler, BLR);
+}
+
+/* --------------------------------------------------------------------- */
+/*  Conditional instructions                                             */
+/* --------------------------------------------------------------------- */
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
+{
+       struct sljit_label *label;
+
+       CHECK_ERROR_PTR();
+       CHECK_PTR(check_sljit_emit_label(compiler));
+
+       if (compiler->last_label && compiler->last_label->size == compiler->size)
+               return compiler->last_label;
+
+       label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
+       PTR_FAIL_IF(!label);
+       set_label(label, compiler);
+       return label;
+}
+
+static sljit_ins get_bo_bi_flags(sljit_si type)
+{
+       switch (type) {
+       case SLJIT_EQUAL:
+               return (12 << 21) | (2 << 16);
+
+       case SLJIT_NOT_EQUAL:
+               return (4 << 21) | (2 << 16);
+
+       case SLJIT_LESS:
+       case SLJIT_D_LESS:
+               return (12 << 21) | ((4 + 0) << 16);
+
+       case SLJIT_GREATER_EQUAL:
+       case SLJIT_D_GREATER_EQUAL:
+               return (4 << 21) | ((4 + 0) << 16);
+
+       case SLJIT_GREATER:
+       case SLJIT_D_GREATER:
+               return (12 << 21) | ((4 + 1) << 16);
+
+       case SLJIT_LESS_EQUAL:
+       case SLJIT_D_LESS_EQUAL:
+               return (4 << 21) | ((4 + 1) << 16);
+
+       case SLJIT_SIG_LESS:
+               return (12 << 21) | (0 << 16);
+
+       case SLJIT_SIG_GREATER_EQUAL:
+               return (4 << 21) | (0 << 16);
+
+       case SLJIT_SIG_GREATER:
+               return (12 << 21) | (1 << 16);
+
+       case SLJIT_SIG_LESS_EQUAL:
+               return (4 << 21) | (1 << 16);
+
+       case SLJIT_OVERFLOW:
+       case SLJIT_MUL_OVERFLOW:
+               return (12 << 21) | (3 << 16);
+
+       case SLJIT_NOT_OVERFLOW:
+       case SLJIT_MUL_NOT_OVERFLOW:
+               return (4 << 21) | (3 << 16);
+
+       case SLJIT_D_EQUAL:
+               return (12 << 21) | ((4 + 2) << 16);
+
+       case SLJIT_D_NOT_EQUAL:
+               return (4 << 21) | ((4 + 2) << 16);
+
+       case SLJIT_D_UNORDERED:
+               return (12 << 21) | ((4 + 3) << 16);
+
+       case SLJIT_D_ORDERED:
+               return (4 << 21) | ((4 + 3) << 16);
+
+       default:
+               SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL3);
+               return (20 << 21);
+       }
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type)
+{
+       struct sljit_jump *jump;
+       sljit_ins bo_bi_flags;
+
+       CHECK_ERROR_PTR();
+       CHECK_PTR(check_sljit_emit_jump(compiler, type));
+
+       bo_bi_flags = get_bo_bi_flags(type & 0xff);
+       if (!bo_bi_flags)
+               return NULL;
+
+       jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
+       PTR_FAIL_IF(!jump);
+       set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
+       type &= 0xff;
+
+       /* In PPC, we don't need to touch the arguments. */
+       if (type < SLJIT_JUMP)
+               jump->flags |= IS_COND;
+#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
+       if (type >= SLJIT_CALL0)
+               jump->flags |= IS_CALL;
+#endif
+
+       PTR_FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0));
+       PTR_FAIL_IF(push_inst(compiler, MTCTR | S(TMP_CALL_REG)));
+       jump->addr = compiler->size;
+       PTR_FAIL_IF(push_inst(compiler, BCCTR | bo_bi_flags | (type >= SLJIT_FAST_CALL ? 1 : 0)));
+       return jump;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw)
+{
+       struct sljit_jump *jump = NULL;
+       sljit_si src_r;
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       if (FAST_IS_REG(src)) {
+#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
+               if (type >= SLJIT_CALL0) {
+                       FAIL_IF(push_inst(compiler, OR | S(src) | A(TMP_CALL_REG) | B(src)));
+                       src_r = TMP_CALL_REG;
+               }
+               else
+                       src_r = src;
+#else
+               src_r = src;
+#endif
+       } else if (src & SLJIT_IMM) {
+               jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
+               FAIL_IF(!jump);
+               set_jump(jump, compiler, JUMP_ADDR);
+               jump->u.target = srcw;
+#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
+               if (type >= SLJIT_CALL0)
+                       jump->flags |= IS_CALL;
+#endif
+               FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0));
+               src_r = TMP_CALL_REG;
+       }
+       else {
+               FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_CALL_REG, 0, TMP_REG1, 0, src, srcw));
+               src_r = TMP_CALL_REG;
+       }
+
+       FAIL_IF(push_inst(compiler, MTCTR | S(src_r)));
+       if (jump)
+               jump->addr = compiler->size;
+       return push_inst(compiler, BCCTR | (20 << 21) | (type >= SLJIT_FAST_CALL ? 1 : 0));
+}
+
+/* Get a bit from CR, all other bits are zeroed. */
+#define GET_CR_BIT(bit, dst) \
+       FAIL_IF(push_inst(compiler, MFCR | D(dst))); \
+       FAIL_IF(push_inst(compiler, RLWINM | S(dst) | A(dst) | ((1 + (bit)) << 11) | (31 << 6) | (31 << 1)));
+
+#define INVERT_BIT(dst) \
+       FAIL_IF(push_inst(compiler, XORI | S(dst) | A(dst) | 0x1));
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw,
+       sljit_si type)
+{
+       sljit_si reg, input_flags;
+       sljit_si flags = GET_ALL_FLAGS(op);
+       sljit_sw original_dstw = dstw;
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+
+       if (dst == SLJIT_UNUSED)
+               return SLJIT_SUCCESS;
+
+       op = GET_OPCODE(op);
+       reg = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2;
+
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+       if (op >= SLJIT_ADD && (src & SLJIT_MEM)) {
+               ADJUST_LOCAL_OFFSET(src, srcw);
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+               input_flags = (flags & SLJIT_INT_OP) ? INT_DATA : WORD_DATA;
+#else
+               input_flags = WORD_DATA;
+#endif
+               FAIL_IF(emit_op_mem2(compiler, input_flags | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
+               src = TMP_REG1;
+               srcw = 0;
+       }
+
+       switch (type & 0xff) {
+       case SLJIT_EQUAL:
+               GET_CR_BIT(2, reg);
+               break;
+
+       case SLJIT_NOT_EQUAL:
+               GET_CR_BIT(2, reg);
+               INVERT_BIT(reg);
+               break;
+
+       case SLJIT_LESS:
+       case SLJIT_D_LESS:
+               GET_CR_BIT(4 + 0, reg);
+               break;
+
+       case SLJIT_GREATER_EQUAL:
+       case SLJIT_D_GREATER_EQUAL:
+               GET_CR_BIT(4 + 0, reg);
+               INVERT_BIT(reg);
+               break;
+
+       case SLJIT_GREATER:
+       case SLJIT_D_GREATER:
+               GET_CR_BIT(4 + 1, reg);
+               break;
+
+       case SLJIT_LESS_EQUAL:
+       case SLJIT_D_LESS_EQUAL:
+               GET_CR_BIT(4 + 1, reg);
+               INVERT_BIT(reg);
+               break;
+
+       case SLJIT_SIG_LESS:
+               GET_CR_BIT(0, reg);
+               break;
+
+       case SLJIT_SIG_GREATER_EQUAL:
+               GET_CR_BIT(0, reg);
+               INVERT_BIT(reg);
+               break;
+
+       case SLJIT_SIG_GREATER:
+               GET_CR_BIT(1, reg);
+               break;
+
+       case SLJIT_SIG_LESS_EQUAL:
+               GET_CR_BIT(1, reg);
+               INVERT_BIT(reg);
+               break;
+
+       case SLJIT_OVERFLOW:
+       case SLJIT_MUL_OVERFLOW:
+               GET_CR_BIT(3, reg);
+               break;
+
+       case SLJIT_NOT_OVERFLOW:
+       case SLJIT_MUL_NOT_OVERFLOW:
+               GET_CR_BIT(3, reg);
+               INVERT_BIT(reg);
+               break;
+
+       case SLJIT_D_EQUAL:
+               GET_CR_BIT(4 + 2, reg);
+               break;
+
+       case SLJIT_D_NOT_EQUAL:
+               GET_CR_BIT(4 + 2, reg);
+               INVERT_BIT(reg);
+               break;
+
+       case SLJIT_D_UNORDERED:
+               GET_CR_BIT(4 + 3, reg);
+               break;
+
+       case SLJIT_D_ORDERED:
+               GET_CR_BIT(4 + 3, reg);
+               INVERT_BIT(reg);
+               break;
+
+       default:
+               SLJIT_ASSERT_STOP();
+               break;
+       }
+
+       if (op < SLJIT_ADD) {
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+               if (op == SLJIT_MOV)
+                       input_flags = WORD_DATA;
+               else {
+                       op = SLJIT_MOV_UI;
+                       input_flags = INT_DATA;
+               }
+#else
+               op = SLJIT_MOV;
+               input_flags = WORD_DATA;
+#endif
+               if (reg != TMP_REG2)
+                       return SLJIT_SUCCESS;
+               return emit_op(compiler, op, input_flags, dst, dstw, TMP_REG1, 0, TMP_REG2, 0);
+       }
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+               || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+       compiler->skip_checks = 1;
+#endif
+       return sljit_emit_op2(compiler, op | flags, dst, original_dstw, src, srcw, TMP_REG2, 0);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value)
+{
+       struct sljit_const *const_;
+       sljit_si reg;
+
+       CHECK_ERROR_PTR();
+       CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+
+       const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
+       PTR_FAIL_IF(!const_);
+       set_const(const_, compiler);
+
+       reg = SLOW_IS_REG(dst) ? dst : TMP_REG2;
+
+       PTR_FAIL_IF(emit_const(compiler, reg, init_value));
+
+       if (dst & SLJIT_MEM)
+               PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0));
+       return const_;
+}
diff --git a/ext/pcre/pcrelib/sljit/sljitNativeSPARC_32.c b/ext/pcre/pcrelib/sljit/sljitNativeSPARC_32.c
new file mode 100644 (file)
index 0000000..4a2e629
--- /dev/null
@@ -0,0 +1,164 @@
+/*
+ *    Stack-less Just-In-Time compiler
+ *
+ *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice, this list of
+ *      conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *      of conditions and the following disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si dst, sljit_sw imm)
+{
+       if (imm <= SIMM_MAX && imm >= SIMM_MIN)
+               return push_inst(compiler, OR | D(dst) | S1(0) | IMM(imm), DR(dst));
+
+       FAIL_IF(push_inst(compiler, SETHI | D(dst) | ((imm >> 10) & 0x3fffff), DR(dst)));
+       return (imm & 0x3ff) ? push_inst(compiler, OR | D(dst) | S1(dst) | IMM_ARG | (imm & 0x3ff), DR(dst)) : SLJIT_SUCCESS;
+}
+
+#define ARG2(flags, src2) ((flags & SRC2_IMM) ? IMM(src2) : S2(src2))
+
+static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags,
+       sljit_si dst, sljit_si src1, sljit_sw src2)
+{
+       SLJIT_COMPILE_ASSERT(ICC_IS_SET == SET_FLAGS, icc_is_set_and_set_flags_must_be_the_same);
+
+       switch (op) {
+       case SLJIT_MOV:
+       case SLJIT_MOV_UI:
+       case SLJIT_MOV_SI:
+       case SLJIT_MOV_P:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+               if (dst != src2)
+                       return push_inst(compiler, OR | D(dst) | S1(0) | S2(src2), DR(dst));
+               return SLJIT_SUCCESS;
+
+       case SLJIT_MOV_UB:
+       case SLJIT_MOV_SB:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+               if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
+                       if (op == SLJIT_MOV_UB)
+                               return push_inst(compiler, AND | D(dst) | S1(src2) | IMM(0xff), DR(dst));
+                       FAIL_IF(push_inst(compiler, SLL | D(dst) | S1(src2) | IMM(24), DR(dst)));
+                       return push_inst(compiler, SRA | D(dst) | S1(dst) | IMM(24), DR(dst));
+               }
+               else if (dst != src2)
+                       SLJIT_ASSERT_STOP();
+               return SLJIT_SUCCESS;
+
+       case SLJIT_MOV_UH:
+       case SLJIT_MOV_SH:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+               if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
+                       FAIL_IF(push_inst(compiler, SLL | D(dst) | S1(src2) | IMM(16), DR(dst)));
+                       return push_inst(compiler, (op == SLJIT_MOV_SH ? SRA : SRL) | D(dst) | S1(dst) | IMM(16), DR(dst));
+               }
+               else if (dst != src2)
+                       SLJIT_ASSERT_STOP();
+               return SLJIT_SUCCESS;
+
+       case SLJIT_NOT:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+               return push_inst(compiler, XNOR | (flags & SET_FLAGS) | D(dst) | S1(0) | S2(src2), DR(dst) | (flags & SET_FLAGS));
+
+       case SLJIT_CLZ:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+               /* sparc 32 does not support SLJIT_KEEP_FLAGS. Not sure I can fix this. */
+               FAIL_IF(push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(src2) | S2(0), SET_FLAGS));
+               FAIL_IF(push_inst(compiler, OR | D(TMP_REG1) | S1(0) | S2(src2), DR(TMP_REG1)));
+               FAIL_IF(push_inst(compiler, BICC | DA(0x1) | (7 & DISP_MASK), UNMOVABLE_INS));
+               FAIL_IF(push_inst(compiler, OR | (flags & SET_FLAGS) | D(dst) | S1(0) | IMM(32), UNMOVABLE_INS | (flags & SET_FLAGS)));
+               FAIL_IF(push_inst(compiler, OR | D(dst) | S1(0) | IMM(-1), DR(dst)));
+
+               /* Loop. */
+               FAIL_IF(push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(TMP_REG1) | S2(0), SET_FLAGS));
+               FAIL_IF(push_inst(compiler, SLL | D(TMP_REG1) | S1(TMP_REG1) | IMM(1), DR(TMP_REG1)));
+               FAIL_IF(push_inst(compiler, BICC | DA(0xe) | (-2 & DISP_MASK), UNMOVABLE_INS));
+               return push_inst(compiler, ADD | (flags & SET_FLAGS) | D(dst) | S1(dst) | IMM(1), UNMOVABLE_INS | (flags & SET_FLAGS));
+
+       case SLJIT_ADD:
+               return push_inst(compiler, ADD | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS));
+
+       case SLJIT_ADDC:
+               return push_inst(compiler, ADDC | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS));
+
+       case SLJIT_SUB:
+               return push_inst(compiler, SUB | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS));
+
+       case SLJIT_SUBC:
+               return push_inst(compiler, SUBC | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS));
+
+       case SLJIT_MUL:
+               FAIL_IF(push_inst(compiler, SMUL | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst)));
+               if (!(flags & SET_FLAGS))
+                       return SLJIT_SUCCESS;
+               FAIL_IF(push_inst(compiler, SRA | D(TMP_REG1) | S1(dst) | IMM(31), DR(TMP_REG1)));
+               FAIL_IF(push_inst(compiler, RDY | D(TMP_LINK), DR(TMP_LINK)));
+               return push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(TMP_REG1) | S2(TMP_LINK), MOVABLE_INS | SET_FLAGS);
+
+       case SLJIT_AND:
+               return push_inst(compiler, AND | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS));
+
+       case SLJIT_OR:
+               return push_inst(compiler, OR | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS));
+
+       case SLJIT_XOR:
+               return push_inst(compiler, XOR | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS));
+
+       case SLJIT_SHL:
+               FAIL_IF(push_inst(compiler, SLL | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst)));
+               return !(flags & SET_FLAGS) ? SLJIT_SUCCESS : push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(dst) | S2(0), SET_FLAGS);
+
+       case SLJIT_LSHR:
+               FAIL_IF(push_inst(compiler, SRL | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst)));
+               return !(flags & SET_FLAGS) ? SLJIT_SUCCESS : push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(dst) | S2(0), SET_FLAGS);
+
+       case SLJIT_ASHR:
+               FAIL_IF(push_inst(compiler, SRA | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst)));
+               return !(flags & SET_FLAGS) ? SLJIT_SUCCESS : push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(dst) | S2(0), SET_FLAGS);
+       }
+
+       SLJIT_ASSERT_STOP();
+       return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_si emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw init_value)
+{
+       FAIL_IF(push_inst(compiler, SETHI | D(dst) | ((init_value >> 10) & 0x3fffff), DR(dst)));
+       return push_inst(compiler, OR | D(dst) | S1(dst) | IMM_ARG | (init_value & 0x3ff), DR(dst));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
+{
+       sljit_ins *inst = (sljit_ins*)addr;
+
+       inst[0] = (inst[0] & 0xffc00000) | ((new_addr >> 10) & 0x3fffff);
+       inst[1] = (inst[1] & 0xfffffc00) | (new_addr & 0x3ff);
+       SLJIT_CACHE_FLUSH(inst, inst + 2);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
+{
+       sljit_ins *inst = (sljit_ins*)addr;
+
+       inst[0] = (inst[0] & 0xffc00000) | ((new_constant >> 10) & 0x3fffff);
+       inst[1] = (inst[1] & 0xfffffc00) | (new_constant & 0x3ff);
+       SLJIT_CACHE_FLUSH(inst, inst + 2);
+}
diff --git a/ext/pcre/pcrelib/sljit/sljitNativeSPARC_common.c b/ext/pcre/pcrelib/sljit/sljitNativeSPARC_common.c
new file mode 100644 (file)
index 0000000..0b1927a
--- /dev/null
@@ -0,0 +1,1430 @@
+/*
+ *    Stack-less Just-In-Time compiler
+ *
+ *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice, this list of
+ *      conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *      of conditions and the following disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
+{
+       return "SPARC" SLJIT_CPUINFO;
+}
+
+/* Length of an instruction word
+   Both for sparc-32 and sparc-64 */
+typedef sljit_ui sljit_ins;
+
+static void sparc_cache_flush(sljit_ins *from, sljit_ins *to)
+{
+#if defined(__SUNPRO_C) && __SUNPRO_C < 0x590
+       __asm (
+               /* if (from == to) return */
+               "cmp %i0, %i1\n"
+               "be .leave\n"
+               "nop\n"
+
+               /* loop until from >= to */
+               ".mainloop:\n"
+               "flush %i0\n"
+               "add %i0, 8, %i0\n"
+               "cmp %i0, %i1\n"
+               "bcs .mainloop\n"
+               "nop\n"
+
+               /* The comparison was done above. */
+               "bne .leave\n"
+               /* nop is not necessary here, since the
+                  sub operation has no side effect. */
+               "sub %i0, 4, %i0\n"
+               "flush %i0\n"
+               ".leave:"
+       );
+#else
+       if (SLJIT_UNLIKELY(from == to))
+               return;
+
+       do {
+               __asm__ volatile (
+                       "flush %0\n"
+                       : : "r"(from)
+               );
+               /* Operates at least on doubleword. */
+               from += 2;
+       } while (from < to);
+
+       if (from == to) {
+               /* Flush the last word. */
+               from --;
+               __asm__ volatile (
+                       "flush %0\n"
+                       : : "r"(from)
+               );
+       }
+#endif
+}
+
+/* TMP_REG2 is not used by getput_arg */
+#define TMP_REG1       (SLJIT_NUMBER_OF_REGISTERS + 2)
+#define TMP_REG2       (SLJIT_NUMBER_OF_REGISTERS + 3)
+#define TMP_REG3       (SLJIT_NUMBER_OF_REGISTERS + 4)
+#define TMP_LINK       (SLJIT_NUMBER_OF_REGISTERS + 5)
+
+#define TMP_FREG1      (0)
+#define TMP_FREG2      ((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) << 1)
+
+static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = {
+       0, 8, 9, 10, 13, 29, 28, 27, 23, 22, 21, 20, 19, 18, 17, 16, 26, 25, 24, 14, 1, 11, 12, 15
+};
+
+/* --------------------------------------------------------------------- */
+/*  Instrucion forms                                                     */
+/* --------------------------------------------------------------------- */
+
+#define D(d)           (reg_map[d] << 25)
+#define DA(d)          ((d) << 25)
+#define S1(s1)         (reg_map[s1] << 14)
+#define S2(s2)         (reg_map[s2])
+#define S1A(s1)                ((s1) << 14)
+#define S2A(s2)                (s2)
+#define IMM_ARG                0x2000
+#define DOP(op)                ((op) << 5)
+#define IMM(imm)       (((imm) & 0x1fff) | IMM_ARG)
+
+#define DR(dr)         (reg_map[dr])
+#define OPC1(opcode)   ((opcode) << 30)
+#define OPC2(opcode)   ((opcode) << 22)
+#define OPC3(opcode)   ((opcode) << 19)
+#define SET_FLAGS      OPC3(0x10)
+
+#define ADD            (OPC1(0x2) | OPC3(0x00))
+#define ADDC           (OPC1(0x2) | OPC3(0x08))
+#define AND            (OPC1(0x2) | OPC3(0x01))
+#define ANDN           (OPC1(0x2) | OPC3(0x05))
+#define CALL           (OPC1(0x1))
+#define FABSS          (OPC1(0x2) | OPC3(0x34) | DOP(0x09))
+#define FADDD          (OPC1(0x2) | OPC3(0x34) | DOP(0x42))
+#define FADDS          (OPC1(0x2) | OPC3(0x34) | DOP(0x41))
+#define FCMPD          (OPC1(0x2) | OPC3(0x35) | DOP(0x52))
+#define FCMPS          (OPC1(0x2) | OPC3(0x35) | DOP(0x51))
+#define FDIVD          (OPC1(0x2) | OPC3(0x34) | DOP(0x4e))
+#define FDIVS          (OPC1(0x2) | OPC3(0x34) | DOP(0x4d))
+#define FDTOI          (OPC1(0x2) | OPC3(0x34) | DOP(0xd2))
+#define FDTOS          (OPC1(0x2) | OPC3(0x34) | DOP(0xc6))
+#define FITOD          (OPC1(0x2) | OPC3(0x34) | DOP(0xc8))
+#define FITOS          (OPC1(0x2) | OPC3(0x34) | DOP(0xc4))
+#define FMOVS          (OPC1(0x2) | OPC3(0x34) | DOP(0x01))
+#define FMULD          (OPC1(0x2) | OPC3(0x34) | DOP(0x4a))
+#define FMULS          (OPC1(0x2) | OPC3(0x34) | DOP(0x49))
+#define FNEGS          (OPC1(0x2) | OPC3(0x34) | DOP(0x05))
+#define FSTOD          (OPC1(0x2) | OPC3(0x34) | DOP(0xc9))
+#define FSTOI          (OPC1(0x2) | OPC3(0x34) | DOP(0xd1))
+#define FSUBD          (OPC1(0x2) | OPC3(0x34) | DOP(0x46))
+#define FSUBS          (OPC1(0x2) | OPC3(0x34) | DOP(0x45))
+#define JMPL           (OPC1(0x2) | OPC3(0x38))
+#define NOP            (OPC1(0x0) | OPC2(0x04))
+#define OR             (OPC1(0x2) | OPC3(0x02))
+#define ORN            (OPC1(0x2) | OPC3(0x06))
+#define RDY            (OPC1(0x2) | OPC3(0x28) | S1A(0))
+#define RESTORE                (OPC1(0x2) | OPC3(0x3d))
+#define SAVE           (OPC1(0x2) | OPC3(0x3c))
+#define SETHI          (OPC1(0x0) | OPC2(0x04))
+#define SLL            (OPC1(0x2) | OPC3(0x25))
+#define SLLX           (OPC1(0x2) | OPC3(0x25) | (1 << 12))
+#define SRA            (OPC1(0x2) | OPC3(0x27))
+#define SRAX           (OPC1(0x2) | OPC3(0x27) | (1 << 12))
+#define SRL            (OPC1(0x2) | OPC3(0x26))
+#define SRLX           (OPC1(0x2) | OPC3(0x26) | (1 << 12))
+#define SUB            (OPC1(0x2) | OPC3(0x04))
+#define SUBC           (OPC1(0x2) | OPC3(0x0c))
+#define TA             (OPC1(0x2) | OPC3(0x3a) | (8 << 25))
+#define WRY            (OPC1(0x2) | OPC3(0x30) | DA(0))
+#define XOR            (OPC1(0x2) | OPC3(0x03))
+#define XNOR           (OPC1(0x2) | OPC3(0x07))
+
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+#define MAX_DISP       (0x1fffff)
+#define MIN_DISP       (-0x200000)
+#define DISP_MASK      (0x3fffff)
+
+#define BICC           (OPC1(0x0) | OPC2(0x2))
+#define FBFCC          (OPC1(0x0) | OPC2(0x6))
+#define SLL_W          SLL
+#define SDIV           (OPC1(0x2) | OPC3(0x0f))
+#define SMUL           (OPC1(0x2) | OPC3(0x0b))
+#define UDIV           (OPC1(0x2) | OPC3(0x0e))
+#define UMUL           (OPC1(0x2) | OPC3(0x0a))
+#else
+#define SLL_W          SLLX
+#endif
+
+#define SIMM_MAX       (0x0fff)
+#define SIMM_MIN       (-0x1000)
+
+/* dest_reg is the absolute name of the register
+   Useful for reordering instructions in the delay slot. */
+static sljit_si push_inst(struct sljit_compiler *compiler, sljit_ins ins, sljit_si delay_slot)
+{
+       sljit_ins *ptr;
+       SLJIT_ASSERT((delay_slot & DST_INS_MASK) == UNMOVABLE_INS
+               || (delay_slot & DST_INS_MASK) == MOVABLE_INS
+               || (delay_slot & DST_INS_MASK) == ((ins >> 25) & 0x1f));
+       ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
+       FAIL_IF(!ptr);
+       *ptr = ins;
+       compiler->size++;
+       compiler->delay_slot = delay_slot;
+       return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code)
+{
+       sljit_sw diff;
+       sljit_uw target_addr;
+       sljit_ins *inst;
+       sljit_ins saved_inst;
+
+       if (jump->flags & SLJIT_REWRITABLE_JUMP)
+               return code_ptr;
+
+       if (jump->flags & JUMP_ADDR)
+               target_addr = jump->u.target;
+       else {
+               SLJIT_ASSERT(jump->flags & JUMP_LABEL);
+               target_addr = (sljit_uw)(code + jump->u.label->size);
+       }
+       inst = (sljit_ins*)jump->addr;
+
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+       if (jump->flags & IS_CALL) {
+               /* Call is always patchable on sparc 32. */
+               jump->flags |= PATCH_CALL;
+               if (jump->flags & IS_MOVABLE) {
+                       inst[0] = inst[-1];
+                       inst[-1] = CALL;
+                       jump->addr -= sizeof(sljit_ins);
+                       return inst;
+               }
+               inst[0] = CALL;
+               inst[1] = NOP;
+               return inst + 1;
+       }
+#else
+       /* Both calls and BPr instructions shall not pass this point. */
+#error "Implementation required"
+#endif
+
+       if (jump->flags & IS_COND)
+               inst--;
+
+       if (jump->flags & IS_MOVABLE) {
+               diff = ((sljit_sw)target_addr - (sljit_sw)(inst - 1)) >> 2;
+               if (diff <= MAX_DISP && diff >= MIN_DISP) {
+                       jump->flags |= PATCH_B;
+                       inst--;
+                       if (jump->flags & IS_COND) {
+                               saved_inst = inst[0];
+                               inst[0] = inst[1] ^ (1 << 28);
+                               inst[1] = saved_inst;
+                       } else {
+                               inst[1] = inst[0];
+                               inst[0] = BICC | DA(0x8);
+                       }
+                       jump->addr = (sljit_uw)inst;
+                       return inst + 1;
+               }
+       }
+
+       diff = ((sljit_sw)target_addr - (sljit_sw)(inst)) >> 2;
+       if (diff <= MAX_DISP && diff >= MIN_DISP) {
+               jump->flags |= PATCH_B;
+               if (jump->flags & IS_COND)
+                       inst[0] ^= (1 << 28);
+               else
+                       inst[0] = BICC | DA(0x8);
+               inst[1] = NOP;
+               jump->addr = (sljit_uw)inst;
+               return inst + 1;
+       }
+
+       return code_ptr;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
+{
+       struct sljit_memory_fragment *buf;
+       sljit_ins *code;
+       sljit_ins *code_ptr;
+       sljit_ins *buf_ptr;
+       sljit_ins *buf_end;
+       sljit_uw word_count;
+       sljit_uw addr;
+
+       struct sljit_label *label;
+       struct sljit_jump *jump;
+       struct sljit_const *const_;
+
+       CHECK_ERROR_PTR();
+       CHECK_PTR(check_sljit_generate_code(compiler));
+       reverse_buf(compiler);
+
+       code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins));
+       PTR_FAIL_WITH_EXEC_IF(code);
+       buf = compiler->buf;
+
+       code_ptr = code;
+       word_count = 0;
+       label = compiler->labels;
+       jump = compiler->jumps;
+       const_ = compiler->consts;
+       do {
+               buf_ptr = (sljit_ins*)buf->memory;
+               buf_end = buf_ptr + (buf->used_size >> 2);
+               do {
+                       *code_ptr = *buf_ptr++;
+                       SLJIT_ASSERT(!label || label->size >= word_count);
+                       SLJIT_ASSERT(!jump || jump->addr >= word_count);
+                       SLJIT_ASSERT(!const_ || const_->addr >= word_count);
+                       /* These structures are ordered by their address. */
+                       if (label && label->size == word_count) {
+                               /* Just recording the address. */
+                               label->addr = (sljit_uw)code_ptr;
+                               label->size = code_ptr - code;
+                               label = label->next;
+                       }
+                       if (jump && jump->addr == word_count) {
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+                               jump->addr = (sljit_uw)(code_ptr - 3);
+#else
+                               jump->addr = (sljit_uw)(code_ptr - 6);
+#endif
+                               code_ptr = detect_jump_type(jump, code_ptr, code);
+                               jump = jump->next;
+                       }
+                       if (const_ && const_->addr == word_count) {
+                               /* Just recording the address. */
+                               const_->addr = (sljit_uw)code_ptr;
+                               const_ = const_->next;
+                       }
+                       code_ptr ++;
+                       word_count ++;
+               } while (buf_ptr < buf_end);
+
+               buf = buf->next;
+       } while (buf);
+
+       if (label && label->size == word_count) {
+               label->addr = (sljit_uw)code_ptr;
+               label->size = code_ptr - code;
+               label = label->next;
+       }
+
+       SLJIT_ASSERT(!label);
+       SLJIT_ASSERT(!jump);
+       SLJIT_ASSERT(!const_);
+       SLJIT_ASSERT(code_ptr - code <= (sljit_si)compiler->size);
+
+       jump = compiler->jumps;
+       while (jump) {
+               do {
+                       addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
+                       buf_ptr = (sljit_ins*)jump->addr;
+
+                       if (jump->flags & PATCH_CALL) {
+                               addr = (sljit_sw)(addr - jump->addr) >> 2;
+                               SLJIT_ASSERT((sljit_sw)addr <= 0x1fffffff && (sljit_sw)addr >= -0x20000000);
+                               buf_ptr[0] = CALL | (addr & 0x3fffffff);
+                               break;
+                       }
+                       if (jump->flags & PATCH_B) {
+                               addr = (sljit_sw)(addr - jump->addr) >> 2;
+                               SLJIT_ASSERT((sljit_sw)addr <= MAX_DISP && (sljit_sw)addr >= MIN_DISP);
+                               buf_ptr[0] = (buf_ptr[0] & ~DISP_MASK) | (addr & DISP_MASK);
+                               break;
+                       }
+
+                       /* Set the fields of immediate loads. */
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+                       buf_ptr[0] = (buf_ptr[0] & 0xffc00000) | ((addr >> 10) & 0x3fffff);
+                       buf_ptr[1] = (buf_ptr[1] & 0xfffffc00) | (addr & 0x3ff);
+#else
+#error "Implementation required"
+#endif
+               } while (0);
+               jump = jump->next;
+       }
+
+
+       compiler->error = SLJIT_ERR_COMPILED;
+       compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
+       SLJIT_CACHE_FLUSH(code, code_ptr);
+       return code;
+}
+
+/* --------------------------------------------------------------------- */
+/*  Entry, exit                                                          */
+/* --------------------------------------------------------------------- */
+
+/* Creates an index in data_transfer_insts array. */
+#define LOAD_DATA      0x01
+#define WORD_DATA      0x00
+#define BYTE_DATA      0x02
+#define HALF_DATA      0x04
+#define INT_DATA       0x06
+#define SIGNED_DATA    0x08
+/* Separates integer and floating point registers */
+#define GPR_REG                0x0f
+#define DOUBLE_DATA    0x10
+#define SINGLE_DATA    0x12
+
+#define MEM_MASK       0x1f
+
+#define WRITE_BACK     0x00020
+#define ARG_TEST       0x00040
+#define ALT_KEEP_CACHE 0x00080
+#define CUMULATIVE_OP  0x00100
+#define IMM_OP         0x00200
+#define SRC2_IMM       0x00400
+
+#define REG_DEST       0x00800
+#define REG2_SOURCE    0x01000
+#define SLOW_SRC1      0x02000
+#define SLOW_SRC2      0x04000
+#define SLOW_DEST      0x08000
+
+/* SET_FLAGS (0x10 << 19) also belong here! */
+
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+#include "sljitNativeSPARC_32.c"
+#else
+#include "sljitNativeSPARC_64.c"
+#endif
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler,
+       sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds,
+       sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
+{
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
+       set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
+
+       local_size = (local_size + SLJIT_LOCALS_OFFSET + 7) & ~0x7;
+       compiler->local_size = local_size;
+
+       if (local_size <= SIMM_MAX) {
+               FAIL_IF(push_inst(compiler, SAVE | D(SLJIT_SP) | S1(SLJIT_SP) | IMM(-local_size), UNMOVABLE_INS));
+       }
+       else {
+               FAIL_IF(load_immediate(compiler, TMP_REG1, -local_size));
+               FAIL_IF(push_inst(compiler, SAVE | D(SLJIT_SP) | S1(SLJIT_SP) | S2(TMP_REG1), UNMOVABLE_INS));
+       }
+
+       /* Arguments are in their appropriate registers. */
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compiler,
+       sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds,
+       sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
+{
+       CHECK_ERROR();
+       CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
+       set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
+
+       compiler->local_size = (local_size + SLJIT_LOCALS_OFFSET + 7) & ~0x7;
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
+{
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_return(compiler, op, src, srcw));
+
+       if (op != SLJIT_MOV || !FAST_IS_REG(src)) {
+               FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
+               src = SLJIT_R0;
+       }
+
+       FAIL_IF(push_inst(compiler, JMPL | D(0) | S1A(31) | IMM(8), UNMOVABLE_INS));
+       return push_inst(compiler, RESTORE | D(SLJIT_R0) | S1(src) | S2(0), UNMOVABLE_INS);
+}
+
+/* --------------------------------------------------------------------- */
+/*  Operators                                                            */
+/* --------------------------------------------------------------------- */
+
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+#define ARCH_32_64(a, b)       a
+#else
+#define ARCH_32_64(a, b)       b
+#endif
+
+static SLJIT_CONST sljit_ins data_transfer_insts[16 + 4] = {
+/* u w s */ ARCH_32_64(OPC1(3) | OPC3(0x04) /* stw */, OPC1(3) | OPC3(0x0e) /* stx */),
+/* u w l */ ARCH_32_64(OPC1(3) | OPC3(0x00) /* lduw */, OPC1(3) | OPC3(0x0b) /* ldx */),
+/* u b s */ OPC1(3) | OPC3(0x05) /* stb */,
+/* u b l */ OPC1(3) | OPC3(0x01) /* ldub */,
+/* u h s */ OPC1(3) | OPC3(0x06) /* sth */,
+/* u h l */ OPC1(3) | OPC3(0x02) /* lduh */,
+/* u i s */ OPC1(3) | OPC3(0x04) /* stw */,
+/* u i l */ OPC1(3) | OPC3(0x00) /* lduw */,
+
+/* s w s */ ARCH_32_64(OPC1(3) | OPC3(0x04) /* stw */, OPC1(3) | OPC3(0x0e) /* stx */),
+/* s w l */ ARCH_32_64(OPC1(3) | OPC3(0x00) /* lduw */, OPC1(3) | OPC3(0x0b) /* ldx */),
+/* s b s */ OPC1(3) | OPC3(0x05) /* stb */,
+/* s b l */ OPC1(3) | OPC3(0x09) /* ldsb */,
+/* s h s */ OPC1(3) | OPC3(0x06) /* sth */,
+/* s h l */ OPC1(3) | OPC3(0x0a) /* ldsh */,
+/* s i s */ OPC1(3) | OPC3(0x04) /* stw */,
+/* s i l */ ARCH_32_64(OPC1(3) | OPC3(0x00) /* lduw */, OPC1(3) | OPC3(0x08) /* ldsw */),
+
+/* d   s */ OPC1(3) | OPC3(0x27),
+/* d   l */ OPC1(3) | OPC3(0x23),
+/* s   s */ OPC1(3) | OPC3(0x24),
+/* s   l */ OPC1(3) | OPC3(0x20),
+};
+
+#undef ARCH_32_64
+
+/* Can perform an operation using at most 1 instruction. */
+static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw)
+{
+       SLJIT_ASSERT(arg & SLJIT_MEM);
+
+       if (!(flags & WRITE_BACK) || !(arg & REG_MASK)) {
+               if ((!(arg & OFFS_REG_MASK) && argw <= SIMM_MAX && argw >= SIMM_MIN)
+                               || ((arg & OFFS_REG_MASK) && (argw & 0x3) == 0)) {
+                       /* Works for both absoulte and relative addresses (immediate case). */
+                       if (SLJIT_UNLIKELY(flags & ARG_TEST))
+                               return 1;
+                       FAIL_IF(push_inst(compiler, data_transfer_insts[flags & MEM_MASK]
+                               | ((flags & MEM_MASK) <= GPR_REG ? D(reg) : DA(reg))
+                               | S1(arg & REG_MASK) | ((arg & OFFS_REG_MASK) ? S2(OFFS_REG(arg)) : IMM(argw)),
+                               ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) ? DR(reg) : MOVABLE_INS));
+                       return -1;
+               }
+       }
+       return 0;
+}
+
+/* See getput_arg below.
+   Note: can_cache is called only for binary operators. Those
+   operators always uses word arguments without write back. */
+static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw)
+{
+       SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM));
+
+       /* Simple operation except for updates. */
+       if (arg & OFFS_REG_MASK) {
+               argw &= 0x3;
+               SLJIT_ASSERT(argw);
+               next_argw &= 0x3;
+               if ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && argw == next_argw)
+                       return 1;
+               return 0;
+       }
+
+       if (((next_argw - argw) <= SIMM_MAX && (next_argw - argw) >= SIMM_MIN))
+               return 1;
+       return 0;
+}
+
+/* Emit the necessary instructions. See can_cache above. */
+static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw)
+{
+       sljit_si base, arg2, delay_slot;
+       sljit_ins dest;
+
+       SLJIT_ASSERT(arg & SLJIT_MEM);
+       if (!(next_arg & SLJIT_MEM)) {
+               next_arg = 0;
+               next_argw = 0;
+       }
+
+       base = arg & REG_MASK;
+       if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
+               argw &= 0x3;
+               SLJIT_ASSERT(argw != 0);
+
+               /* Using the cache. */
+               if (((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) && (argw == compiler->cache_argw))
+                       arg2 = TMP_REG3;
+               else {
+                       if ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && argw == (next_argw & 0x3)) {
+                               compiler->cache_arg = SLJIT_MEM | (arg & OFFS_REG_MASK);
+                               compiler->cache_argw = argw;
+                               arg2 = TMP_REG3;
+                       }
+                       else if ((flags & LOAD_DATA) && ((flags & MEM_MASK) <= GPR_REG) && reg != base && reg != OFFS_REG(arg))
+                               arg2 = reg;
+                       else /* It must be a mov operation, so tmp1 must be free to use. */
+                               arg2 = TMP_REG1;
+                       FAIL_IF(push_inst(compiler, SLL_W | D(arg2) | S1(OFFS_REG(arg)) | IMM_ARG | argw, DR(arg2)));
+               }
+       }
+       else {
+               /* Using the cache. */
+               if ((compiler->cache_arg == SLJIT_MEM) && (argw - compiler->cache_argw) <= SIMM_MAX && (argw - compiler->cache_argw) >= SIMM_MIN) {
+                       if (argw != compiler->cache_argw) {
+                               FAIL_IF(push_inst(compiler, ADD | D(TMP_REG3) | S1(TMP_REG3) | IMM(argw - compiler->cache_argw), DR(TMP_REG3)));
+                               compiler->cache_argw = argw;
+                       }
+                       arg2 = TMP_REG3;
+               } else {
+                       if ((next_argw - argw) <= SIMM_MAX && (next_argw - argw) >= SIMM_MIN) {
+                               compiler->cache_arg = SLJIT_MEM;
+                               compiler->cache_argw = argw;
+                               arg2 = TMP_REG3;
+                       }
+                       else if ((flags & LOAD_DATA) && ((flags & MEM_MASK) <= GPR_REG) && reg != base)
+                               arg2 = reg;
+                       else /* It must be a mov operation, so tmp1 must be free to use. */
+                               arg2 = TMP_REG1;
+                       FAIL_IF(load_immediate(compiler, arg2, argw));
+               }
+       }
+
+       dest = ((flags & MEM_MASK) <= GPR_REG ? D(reg) : DA(reg));
+       delay_slot = ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) ? DR(reg) : MOVABLE_INS;
+       if (!base)
+               return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | dest | S1(arg2) | IMM(0), delay_slot);
+       if (!(flags & WRITE_BACK))
+               return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | dest | S1(base) | S2(arg2), delay_slot);
+       FAIL_IF(push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | dest | S1(base) | S2(arg2), delay_slot));
+       return push_inst(compiler, ADD | D(base) | S1(base) | S2(arg2), DR(base));
+}
+
+static SLJIT_INLINE sljit_si emit_op_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw)
+{
+       if (getput_arg_fast(compiler, flags, reg, arg, argw))
+               return compiler->error;
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+       return getput_arg(compiler, flags, reg, arg, argw, 0, 0);
+}
+
+static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg1, sljit_sw arg1w, sljit_si arg2, sljit_sw arg2w)
+{
+       if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
+               return compiler->error;
+       return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
+}
+
+static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       /* arg1 goes to TMP_REG1 or src reg
+          arg2 goes to TMP_REG2, imm or src reg
+          TMP_REG3 can be used for caching
+          result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */
+       sljit_si dst_r = TMP_REG2;
+       sljit_si src1_r;
+       sljit_sw src2_r = 0;
+       sljit_si sugg_src2_r = TMP_REG2;
+
+       if (!(flags & ALT_KEEP_CACHE)) {
+               compiler->cache_arg = 0;
+               compiler->cache_argw = 0;
+       }
+
+       if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
+               if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI && !(src2 & SLJIT_MEM))
+                       return SLJIT_SUCCESS;
+       }
+       else if (FAST_IS_REG(dst)) {
+               dst_r = dst;
+               flags |= REG_DEST;
+               if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI)
+                       sugg_src2_r = dst_r;
+       }
+       else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1, dst, dstw))
+               flags |= SLOW_DEST;
+
+       if (flags & IMM_OP) {
+               if ((src2 & SLJIT_IMM) && src2w) {
+                       if (src2w <= SIMM_MAX && src2w >= SIMM_MIN) {
+                               flags |= SRC2_IMM;
+                               src2_r = src2w;
+                       }
+               }
+               if (!(flags & SRC2_IMM) && (flags & CUMULATIVE_OP) && (src1 & SLJIT_IMM) && src1w) {
+                       if (src1w <= SIMM_MAX && src1w >= SIMM_MIN) {
+                               flags |= SRC2_IMM;
+                               src2_r = src1w;
+
+                               /* And swap arguments. */
+                               src1 = src2;
+                               src1w = src2w;
+                               src2 = SLJIT_IMM;
+                               /* src2w = src2_r unneeded. */
+                       }
+               }
+       }
+
+       /* Source 1. */
+       if (FAST_IS_REG(src1))
+               src1_r = src1;
+       else if (src1 & SLJIT_IMM) {
+               if (src1w) {
+                       FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
+                       src1_r = TMP_REG1;
+               }
+               else
+                       src1_r = 0;
+       }
+       else {
+               if (getput_arg_fast(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w))
+                       FAIL_IF(compiler->error);
+               else
+                       flags |= SLOW_SRC1;
+               src1_r = TMP_REG1;
+       }
+
+       /* Source 2. */
+       if (FAST_IS_REG(src2)) {
+               src2_r = src2;
+               flags |= REG2_SOURCE;
+               if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_SI)
+                       dst_r = src2_r;
+       }
+       else if (src2 & SLJIT_IMM) {
+               if (!(flags & SRC2_IMM)) {
+                       if (src2w) {
+                               FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w));
+                               src2_r = sugg_src2_r;
+                       }
+                       else {
+                               src2_r = 0;
+                               if ((op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) && (dst & SLJIT_MEM))
+                                       dst_r = 0;
+                       }
+               }
+       }
+       else {
+               if (getput_arg_fast(compiler, flags | LOAD_DATA, sugg_src2_r, src2, src2w))
+                       FAIL_IF(compiler->error);
+               else
+                       flags |= SLOW_SRC2;
+               src2_r = sugg_src2_r;
+       }
+
+       if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
+               SLJIT_ASSERT(src2_r == TMP_REG2);
+               if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
+                       FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w));
+                       FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
+               }
+               else {
+                       FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
+                       FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw));
+               }
+       }
+       else if (flags & SLOW_SRC1)
+               FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
+       else if (flags & SLOW_SRC2)
+               FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw));
+
+       FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));
+
+       if (dst & SLJIT_MEM) {
+               if (!(flags & SLOW_DEST)) {
+                       getput_arg_fast(compiler, flags, dst_r, dst, dstw);
+                       return compiler->error;
+               }
+               return getput_arg(compiler, flags, dst_r, dst, dstw, 0, 0);
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op)
+{
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_op0(compiler, op));
+
+       op = GET_OPCODE(op);
+       switch (op) {
+       case SLJIT_BREAKPOINT:
+               return push_inst(compiler, TA, UNMOVABLE_INS);
+       case SLJIT_NOP:
+               return push_inst(compiler, NOP, UNMOVABLE_INS);
+       case SLJIT_LUMUL:
+       case SLJIT_LSMUL:
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+               FAIL_IF(push_inst(compiler, (op == SLJIT_LUMUL ? UMUL : SMUL) | D(SLJIT_R0) | S1(SLJIT_R0) | S2(SLJIT_R1), DR(SLJIT_R0)));
+               return push_inst(compiler, RDY | D(SLJIT_R1), DR(SLJIT_R1));
+#else
+#error "Implementation required"
+#endif
+       case SLJIT_LUDIV:
+       case SLJIT_LSDIV:
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+               if (op == SLJIT_LUDIV)
+                       FAIL_IF(push_inst(compiler, WRY | S1(0), MOVABLE_INS));
+               else {
+                       FAIL_IF(push_inst(compiler, SRA | D(TMP_REG1) | S1(SLJIT_R0) | IMM(31), DR(TMP_REG1)));
+                       FAIL_IF(push_inst(compiler, WRY | S1(TMP_REG1), MOVABLE_INS));
+               }
+               FAIL_IF(push_inst(compiler, OR | D(TMP_REG2) | S1(0) | S2(SLJIT_R0), DR(TMP_REG2)));
+               FAIL_IF(push_inst(compiler, (op == SLJIT_LUDIV ? UDIV : SDIV) | D(SLJIT_R0) | S1(SLJIT_R0) | S2(SLJIT_R1), DR(SLJIT_R0)));
+               FAIL_IF(push_inst(compiler, SMUL | D(SLJIT_R1) | S1(SLJIT_R0) | S2(SLJIT_R1), DR(SLJIT_R1)));
+               FAIL_IF(push_inst(compiler, SUB | D(SLJIT_R1) | S1(TMP_REG2) | S2(SLJIT_R1), DR(SLJIT_R1)));
+               return SLJIT_SUCCESS;
+#else
+#error "Implementation required"
+#endif
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       sljit_si flags = GET_FLAGS(op) ? SET_FLAGS : 0;
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       op = GET_OPCODE(op);
+       switch (op) {
+       case SLJIT_MOV:
+       case SLJIT_MOV_P:
+               return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
+
+       case SLJIT_MOV_UI:
+               return emit_op(compiler, SLJIT_MOV_UI, flags | INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
+
+       case SLJIT_MOV_SI:
+               return emit_op(compiler, SLJIT_MOV_SI, flags | INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
+
+       case SLJIT_MOV_UB:
+               return emit_op(compiler, SLJIT_MOV_UB, flags | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ub)srcw : srcw);
+
+       case SLJIT_MOV_SB:
+               return emit_op(compiler, SLJIT_MOV_SB, flags | BYTE_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sb)srcw : srcw);
+
+       case SLJIT_MOV_UH:
+               return emit_op(compiler, SLJIT_MOV_UH, flags | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_uh)srcw : srcw);
+
+       case SLJIT_MOV_SH:
+               return emit_op(compiler, SLJIT_MOV_SH, flags | HALF_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sh)srcw : srcw);
+
+       case SLJIT_MOVU:
+       case SLJIT_MOVU_P:
+               return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
+
+       case SLJIT_MOVU_UI:
+               return emit_op(compiler, SLJIT_MOV_UI, flags | INT_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
+
+       case SLJIT_MOVU_SI:
+               return emit_op(compiler, SLJIT_MOV_SI, flags | INT_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
+
+       case SLJIT_MOVU_UB:
+               return emit_op(compiler, SLJIT_MOV_UB, flags | BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ub)srcw : srcw);
+
+       case SLJIT_MOVU_SB:
+               return emit_op(compiler, SLJIT_MOV_SB, flags | BYTE_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sb)srcw : srcw);
+
+       case SLJIT_MOVU_UH:
+               return emit_op(compiler, SLJIT_MOV_UH, flags | HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_uh)srcw : srcw);
+
+       case SLJIT_MOVU_SH:
+               return emit_op(compiler, SLJIT_MOV_SH, flags | HALF_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sh)srcw : srcw);
+
+       case SLJIT_NOT:
+       case SLJIT_CLZ:
+               return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw);
+
+       case SLJIT_NEG:
+               return emit_op(compiler, SLJIT_SUB, flags | IMM_OP, dst, dstw, SLJIT_IMM, 0, src, srcw);
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       sljit_si flags = GET_FLAGS(op) ? SET_FLAGS : 0;
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+       ADJUST_LOCAL_OFFSET(src1, src1w);
+       ADJUST_LOCAL_OFFSET(src2, src2w);
+
+       op = GET_OPCODE(op);
+       switch (op) {
+       case SLJIT_ADD:
+       case SLJIT_ADDC:
+       case SLJIT_MUL:
+       case SLJIT_AND:
+       case SLJIT_OR:
+       case SLJIT_XOR:
+               return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
+
+       case SLJIT_SUB:
+       case SLJIT_SUBC:
+               return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
+
+       case SLJIT_SHL:
+       case SLJIT_LSHR:
+       case SLJIT_ASHR:
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+               if (src2 & SLJIT_IMM)
+                       src2w &= 0x1f;
+#else
+               SLJIT_ASSERT_STOP();
+#endif
+               return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
+{
+       CHECK_REG_INDEX(check_sljit_get_register_index(reg));
+       return reg_map[reg];
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg)
+{
+       CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
+       return reg << 1;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
+       void *instruction, sljit_si size)
+{
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
+
+       return push_inst(compiler, *(sljit_ins*)instruction, UNMOVABLE_INS);
+}
+
+/* --------------------------------------------------------------------- */
+/*  Floating point operators                                             */
+/* --------------------------------------------------------------------- */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
+{
+#ifdef SLJIT_IS_FPU_AVAILABLE
+       return SLJIT_IS_FPU_AVAILABLE;
+#else
+       /* Available by default. */
+       return 1;
+#endif
+}
+
+#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_SINGLE_OP) >> 7))
+#define SELECT_FOP(op, single, double) ((op & SLJIT_SINGLE_OP) ? single : double)
+#define FLOAT_TMP_MEM_OFFSET (22 * sizeof(sljit_sw))
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       if (src & SLJIT_MEM) {
+               FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
+               src = TMP_FREG1;
+       }
+       else
+               src <<= 1;
+
+       FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSTOI, FDTOI) | DA(TMP_FREG1) | S2A(src), MOVABLE_INS));
+
+       if (dst == SLJIT_UNUSED)
+               return SLJIT_SUCCESS;
+
+       if (FAST_IS_REG(dst)) {
+               FAIL_IF(emit_op_mem2(compiler, SINGLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
+               return emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET);
+       }
+
+       /* Store the integer value from a VFP register. */
+       return emit_op_mem2(compiler, SINGLE_DATA, TMP_FREG1, dst, dstw, 0, 0);
+}
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       sljit_si dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1;
+
+       if (src & SLJIT_IMM) {
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+               if (GET_OPCODE(op) == SLJIT_CONVD_FROMI)
+                       srcw = (sljit_si)srcw;
+#endif
+               FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
+               src = TMP_REG1;
+               srcw = 0;
+       }
+
+       if (FAST_IS_REG(src)) {
+               FAIL_IF(emit_op_mem2(compiler, WORD_DATA, src, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
+               src = SLJIT_MEM1(SLJIT_SP);
+               srcw = FLOAT_TMP_MEM_OFFSET;
+       }
+
+       FAIL_IF(emit_op_mem2(compiler, SINGLE_DATA | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
+       FAIL_IF(push_inst(compiler, SELECT_FOP(op, FITOS, FITOD) | DA(dst_r) | S2A(TMP_FREG1), MOVABLE_INS));
+
+       if (dst & SLJIT_MEM)
+               return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
+       return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       if (src1 & SLJIT_MEM) {
+               FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
+               src1 = TMP_FREG1;
+       }
+       else
+               src1 <<= 1;
+
+       if (src2 & SLJIT_MEM) {
+               FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0));
+               src2 = TMP_FREG2;
+       }
+       else
+               src2 <<= 1;
+
+       return push_inst(compiler, SELECT_FOP(op, FCMPS, FCMPD) | S1A(src1) | S2A(src2), FCC_IS_SET | MOVABLE_INS);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       sljit_si dst_r;
+
+       CHECK_ERROR();
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+
+       SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error);
+       SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
+
+       if (GET_OPCODE(op) == SLJIT_CONVD_FROMS)
+               op ^= SLJIT_SINGLE_OP;
+
+       dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1;
+
+       if (src & SLJIT_MEM) {
+               FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw));
+               src = dst_r;
+       }
+       else
+               src <<= 1;
+
+       switch (GET_OPCODE(op)) {
+       case SLJIT_DMOV:
+               if (src != dst_r) {
+                       if (dst_r != TMP_FREG1) {
+                               FAIL_IF(push_inst(compiler, FMOVS | DA(dst_r) | S2A(src), MOVABLE_INS));
+                               if (!(op & SLJIT_SINGLE_OP))
+                                       FAIL_IF(push_inst(compiler, FMOVS | DA(dst_r | 1) | S2A(src | 1), MOVABLE_INS));
+                       }
+                       else
+                               dst_r = src;
+               }
+               break;
+       case SLJIT_DNEG:
+               FAIL_IF(push_inst(compiler, FNEGS | DA(dst_r) | S2A(src), MOVABLE_INS));
+               if (dst_r != src && !(op & SLJIT_SINGLE_OP))
+                       FAIL_IF(push_inst(compiler, FMOVS | DA(dst_r | 1) | S2A(src | 1), MOVABLE_INS));
+               break;
+       case SLJIT_DABS:
+               FAIL_IF(push_inst(compiler, FABSS | DA(dst_r) | S2A(src), MOVABLE_INS));
+               if (dst_r != src && !(op & SLJIT_SINGLE_OP))
+                       FAIL_IF(push_inst(compiler, FMOVS | DA(dst_r | 1) | S2A(src | 1), MOVABLE_INS));
+               break;
+       case SLJIT_CONVD_FROMS:
+               FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSTOD, FDTOS) | DA(dst_r) | S2A(src), MOVABLE_INS));
+               op ^= SLJIT_SINGLE_OP;
+               break;
+       }
+
+       if (dst & SLJIT_MEM)
+               FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0));
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       sljit_si dst_r, flags = 0;
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+       ADJUST_LOCAL_OFFSET(src1, src1w);
+       ADJUST_LOCAL_OFFSET(src2, src2w);
+
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+
+       dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG2;
+
+       if (src1 & SLJIT_MEM) {
+               if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) {
+                       FAIL_IF(compiler->error);
+                       src1 = TMP_FREG1;
+               } else
+                       flags |= SLOW_SRC1;
+       }
+       else
+               src1 <<= 1;
+
+       if (src2 & SLJIT_MEM) {
+               if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) {
+                       FAIL_IF(compiler->error);
+                       src2 = TMP_FREG2;
+               } else
+                       flags |= SLOW_SRC2;
+       }
+       else
+               src2 <<= 1;
+
+       if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
+               if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
+                       FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w));
+                       FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
+               }
+               else {
+                       FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
+                       FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
+               }
+       }
+       else if (flags & SLOW_SRC1)
+               FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
+       else if (flags & SLOW_SRC2)
+               FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
+
+       if (flags & SLOW_SRC1)
+               src1 = TMP_FREG1;
+       if (flags & SLOW_SRC2)
+               src2 = TMP_FREG2;
+
+       switch (GET_OPCODE(op)) {
+       case SLJIT_DADD:
+               FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADDD) | DA(dst_r) | S1A(src1) | S2A(src2), MOVABLE_INS));
+               break;
+
+       case SLJIT_DSUB:
+               FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUBD) | DA(dst_r) | S1A(src1) | S2A(src2), MOVABLE_INS));
+               break;
+
+       case SLJIT_DMUL:
+               FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMULD) | DA(dst_r) | S1A(src1) | S2A(src2), MOVABLE_INS));
+               break;
+
+       case SLJIT_DDIV:
+               FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIVD) | DA(dst_r) | S1A(src1) | S2A(src2), MOVABLE_INS));
+               break;
+       }
+
+       if (dst_r == TMP_FREG2)
+               FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0));
+
+       return SLJIT_SUCCESS;
+}
+
+#undef FLOAT_DATA
+#undef SELECT_FOP
+
+/* --------------------------------------------------------------------- */
+/*  Other instructions                                                   */
+/* --------------------------------------------------------------------- */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw)
+{
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+
+       /* For UNUSED dst. Uncommon, but possible. */
+       if (dst == SLJIT_UNUSED)
+               return SLJIT_SUCCESS;
+
+       if (FAST_IS_REG(dst))
+               return push_inst(compiler, OR | D(dst) | S1(0) | S2(TMP_LINK), DR(dst));
+
+       /* Memory. */
+       return emit_op_mem(compiler, WORD_DATA, TMP_LINK, dst, dstw);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw)
+{
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       if (FAST_IS_REG(src))
+               FAIL_IF(push_inst(compiler, OR | D(TMP_LINK) | S1(0) | S2(src), DR(TMP_LINK)));
+       else if (src & SLJIT_MEM)
+               FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_LINK, src, srcw));
+       else if (src & SLJIT_IMM)
+               FAIL_IF(load_immediate(compiler, TMP_LINK, srcw));
+
+       FAIL_IF(push_inst(compiler, JMPL | D(0) | S1(TMP_LINK) | IMM(8), UNMOVABLE_INS));
+       return push_inst(compiler, NOP, UNMOVABLE_INS);
+}
+
+/* --------------------------------------------------------------------- */
+/*  Conditional instructions                                             */
+/* --------------------------------------------------------------------- */
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
+{
+       struct sljit_label *label;
+
+       CHECK_ERROR_PTR();
+       CHECK_PTR(check_sljit_emit_label(compiler));
+
+       if (compiler->last_label && compiler->last_label->size == compiler->size)
+               return compiler->last_label;
+
+       label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
+       PTR_FAIL_IF(!label);
+       set_label(label, compiler);
+       compiler->delay_slot = UNMOVABLE_INS;
+       return label;
+}
+
+static sljit_ins get_cc(sljit_si type)
+{
+       switch (type) {
+       case SLJIT_EQUAL:
+       case SLJIT_MUL_NOT_OVERFLOW:
+       case SLJIT_D_NOT_EQUAL: /* Unordered. */
+               return DA(0x1);
+
+       case SLJIT_NOT_EQUAL:
+       case SLJIT_MUL_OVERFLOW:
+       case SLJIT_D_EQUAL:
+               return DA(0x9);
+
+       case SLJIT_LESS:
+       case SLJIT_D_GREATER: /* Unordered. */
+               return DA(0x5);
+
+       case SLJIT_GREATER_EQUAL:
+       case SLJIT_D_LESS_EQUAL:
+               return DA(0xd);
+
+       case SLJIT_GREATER:
+       case SLJIT_D_GREATER_EQUAL: /* Unordered. */
+               return DA(0xc);
+
+       case SLJIT_LESS_EQUAL:
+       case SLJIT_D_LESS:
+               return DA(0x4);
+
+       case SLJIT_SIG_LESS:
+               return DA(0x3);
+
+       case SLJIT_SIG_GREATER_EQUAL:
+               return DA(0xb);
+
+       case SLJIT_SIG_GREATER:
+               return DA(0xa);
+
+       case SLJIT_SIG_LESS_EQUAL:
+               return DA(0x2);
+
+       case SLJIT_OVERFLOW:
+       case SLJIT_D_UNORDERED:
+               return DA(0x7);
+
+       case SLJIT_NOT_OVERFLOW:
+       case SLJIT_D_ORDERED:
+               return DA(0xf);
+
+       default:
+               SLJIT_ASSERT_STOP();
+               return DA(0x8);
+       }
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type)
+{
+       struct sljit_jump *jump;
+
+       CHECK_ERROR_PTR();
+       CHECK_PTR(check_sljit_emit_jump(compiler, type));
+
+       jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
+       PTR_FAIL_IF(!jump);
+       set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
+       type &= 0xff;
+
+       if (type < SLJIT_D_EQUAL) {
+               jump->flags |= IS_COND;
+               if (((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS) && !(compiler->delay_slot & ICC_IS_SET))
+                       jump->flags |= IS_MOVABLE;
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+               PTR_FAIL_IF(push_inst(compiler, BICC | get_cc(type ^ 1) | 5, UNMOVABLE_INS));
+#else
+#error "Implementation required"
+#endif
+       }
+       else if (type < SLJIT_JUMP) {
+               jump->flags |= IS_COND;
+               if (((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS) && !(compiler->delay_slot & FCC_IS_SET))
+                       jump->flags |= IS_MOVABLE;
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+               PTR_FAIL_IF(push_inst(compiler, FBFCC | get_cc(type ^ 1) | 5, UNMOVABLE_INS));
+#else
+#error "Implementation required"
+#endif
+       } else {
+               if ((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS)
+                       jump->flags |= IS_MOVABLE;
+               if (type >= SLJIT_FAST_CALL)
+                       jump->flags |= IS_CALL;
+       }
+
+       PTR_FAIL_IF(emit_const(compiler, TMP_REG2, 0));
+       PTR_FAIL_IF(push_inst(compiler, JMPL | D(type >= SLJIT_FAST_CALL ? TMP_LINK : 0) | S1(TMP_REG2) | IMM(0), UNMOVABLE_INS));
+       jump->addr = compiler->size;
+       PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
+
+       return jump;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw)
+{
+       struct sljit_jump *jump = NULL;
+       sljit_si src_r;
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       if (FAST_IS_REG(src))
+               src_r = src;
+       else if (src & SLJIT_IMM) {
+               jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
+               FAIL_IF(!jump);
+               set_jump(jump, compiler, JUMP_ADDR);
+               jump->u.target = srcw;
+               if ((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS)
+                       jump->flags |= IS_MOVABLE;
+               if (type >= SLJIT_FAST_CALL)
+                       jump->flags |= IS_CALL;
+
+               FAIL_IF(emit_const(compiler, TMP_REG2, 0));
+               src_r = TMP_REG2;
+       }
+       else {
+               FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src, srcw));
+               src_r = TMP_REG2;
+       }
+
+       FAIL_IF(push_inst(compiler, JMPL | D(type >= SLJIT_FAST_CALL ? TMP_LINK : 0) | S1(src_r) | IMM(0), UNMOVABLE_INS));
+       if (jump)
+               jump->addr = compiler->size;
+       return push_inst(compiler, NOP, UNMOVABLE_INS);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw,
+       sljit_si type)
+{
+       sljit_si reg, flags = (GET_FLAGS(op) ? SET_FLAGS : 0);
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+
+       if (dst == SLJIT_UNUSED)
+               return SLJIT_SUCCESS;
+
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+       op = GET_OPCODE(op);
+       reg = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2;
+
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+       if (op >= SLJIT_ADD && (src & SLJIT_MEM)) {
+               ADJUST_LOCAL_OFFSET(src, srcw);
+               FAIL_IF(emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
+               src = TMP_REG1;
+               srcw = 0;
+       }
+
+       type &= 0xff;
+       if (type < SLJIT_D_EQUAL)
+               FAIL_IF(push_inst(compiler, BICC | get_cc(type) | 3, UNMOVABLE_INS));
+       else
+               FAIL_IF(push_inst(compiler, FBFCC | get_cc(type) | 3, UNMOVABLE_INS));
+
+       FAIL_IF(push_inst(compiler, OR | D(reg) | S1(0) | IMM(1), UNMOVABLE_INS));
+       FAIL_IF(push_inst(compiler, OR | D(reg) | S1(0) | IMM(0), UNMOVABLE_INS));
+
+       if (op >= SLJIT_ADD)
+               return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP | ALT_KEEP_CACHE, dst, dstw, src, srcw, TMP_REG2, 0);
+
+       return (reg == TMP_REG2) ? emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw) : SLJIT_SUCCESS;
+#else
+#error "Implementation required"
+#endif
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value)
+{
+       sljit_si reg;
+       struct sljit_const *const_;
+
+       CHECK_ERROR_PTR();
+       CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+
+       const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
+       PTR_FAIL_IF(!const_);
+       set_const(const_, compiler);
+
+       reg = SLOW_IS_REG(dst) ? dst : TMP_REG2;
+
+       PTR_FAIL_IF(emit_const(compiler, reg, init_value));
+
+       if (dst & SLJIT_MEM)
+               PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw));
+       return const_;
+}
diff --git a/ext/pcre/pcrelib/sljit/sljitNativeTILEGX-encoder.c b/ext/pcre/pcrelib/sljit/sljitNativeTILEGX-encoder.c
new file mode 100644 (file)
index 0000000..7196329
--- /dev/null
@@ -0,0 +1,10159 @@
+/*
+ *    Stack-less Just-In-Time compiler
+ *
+ *    Copyright 2013-2013 Tilera Corporation(jiwang@tilera.com). All rights reserved.
+ *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice, this list of
+ *      conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *      of conditions and the following disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* This code is owned by Tilera Corporation, and distributed as part
+   of multiple projects. In sljit, the code is under BSD licence.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#define BFD_RELOC(x) R_##x
+
+/* Special registers. */
+#define TREG_LR 55
+#define TREG_SN 56
+#define TREG_ZERO 63
+
+/* Canonical name of each register. */
+const char *const tilegx_register_names[] =
+{
+  "r0",   "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",
+  "r8",   "r9",  "r10", "r11", "r12", "r13", "r14", "r15",
+  "r16",  "r17", "r18", "r19", "r20", "r21", "r22", "r23",
+  "r24",  "r25", "r26", "r27", "r28", "r29", "r30", "r31",
+  "r32",  "r33", "r34", "r35", "r36", "r37", "r38", "r39",
+  "r40",  "r41", "r42", "r43", "r44", "r45", "r46", "r47",
+  "r48",  "r49", "r50", "r51", "r52", "tp",  "sp",  "lr",
+  "sn",  "idn0", "idn1", "udn0", "udn1", "udn2", "udn3", "zero"
+};
+
+enum
+{
+  R_NONE = 0,
+  R_TILEGX_NONE = 0,
+  R_TILEGX_64 = 1,
+  R_TILEGX_32 = 2,
+  R_TILEGX_16 = 3,
+  R_TILEGX_8 = 4,
+  R_TILEGX_64_PCREL = 5,
+  R_TILEGX_32_PCREL = 6,
+  R_TILEGX_16_PCREL = 7,
+  R_TILEGX_8_PCREL = 8,
+  R_TILEGX_HW0 = 9,
+  R_TILEGX_HW1 = 10,
+  R_TILEGX_HW2 = 11,
+  R_TILEGX_HW3 = 12,
+  R_TILEGX_HW0_LAST = 13,
+  R_TILEGX_HW1_LAST = 14,
+  R_TILEGX_HW2_LAST = 15,
+  R_TILEGX_COPY = 16,
+  R_TILEGX_GLOB_DAT = 17,
+  R_TILEGX_JMP_SLOT = 18,
+  R_TILEGX_RELATIVE = 19,
+  R_TILEGX_BROFF_X1 = 20,
+  R_TILEGX_JUMPOFF_X1 = 21,
+  R_TILEGX_JUMPOFF_X1_PLT = 22,
+  R_TILEGX_IMM8_X0 = 23,
+  R_TILEGX_IMM8_Y0 = 24,
+  R_TILEGX_IMM8_X1 = 25,
+  R_TILEGX_IMM8_Y1 = 26,
+  R_TILEGX_DEST_IMM8_X1 = 27,
+  R_TILEGX_MT_IMM14_X1 = 28,
+  R_TILEGX_MF_IMM14_X1 = 29,
+  R_TILEGX_MMSTART_X0 = 30,
+  R_TILEGX_MMEND_X0 = 31,
+  R_TILEGX_SHAMT_X0 = 32,
+  R_TILEGX_SHAMT_X1 = 33,
+  R_TILEGX_SHAMT_Y0 = 34,
+  R_TILEGX_SHAMT_Y1 = 35,
+  R_TILEGX_IMM16_X0_HW0 = 36,
+  R_TILEGX_IMM16_X1_HW0 = 37,
+  R_TILEGX_IMM16_X0_HW1 = 38,
+  R_TILEGX_IMM16_X1_HW1 = 39,
+  R_TILEGX_IMM16_X0_HW2 = 40,
+  R_TILEGX_IMM16_X1_HW2 = 41,
+  R_TILEGX_IMM16_X0_HW3 = 42,
+  R_TILEGX_IMM16_X1_HW3 = 43,
+  R_TILEGX_IMM16_X0_HW0_LAST = 44,
+  R_TILEGX_IMM16_X1_HW0_LAST = 45,
+  R_TILEGX_IMM16_X0_HW1_LAST = 46,
+  R_TILEGX_IMM16_X1_HW1_LAST = 47,
+  R_TILEGX_IMM16_X0_HW2_LAST = 48,
+  R_TILEGX_IMM16_X1_HW2_LAST = 49,
+  R_TILEGX_IMM16_X0_HW0_PCREL = 50,
+  R_TILEGX_IMM16_X1_HW0_PCREL = 51,
+  R_TILEGX_IMM16_X0_HW1_PCREL = 52,
+  R_TILEGX_IMM16_X1_HW1_PCREL = 53,
+  R_TILEGX_IMM16_X0_HW2_PCREL = 54,
+  R_TILEGX_IMM16_X1_HW2_PCREL = 55,
+  R_TILEGX_IMM16_X0_HW3_PCREL = 56,
+  R_TILEGX_IMM16_X1_HW3_PCREL = 57,
+  R_TILEGX_IMM16_X0_HW0_LAST_PCREL = 58,
+  R_TILEGX_IMM16_X1_HW0_LAST_PCREL = 59,
+  R_TILEGX_IMM16_X0_HW1_LAST_PCREL = 60,
+  R_TILEGX_IMM16_X1_HW1_LAST_PCREL = 61,
+  R_TILEGX_IMM16_X0_HW2_LAST_PCREL = 62,
+  R_TILEGX_IMM16_X1_HW2_LAST_PCREL = 63,
+  R_TILEGX_IMM16_X0_HW0_GOT = 64,
+  R_TILEGX_IMM16_X1_HW0_GOT = 65,
+
+  R_TILEGX_IMM16_X0_HW0_PLT_PCREL = 66,
+  R_TILEGX_IMM16_X1_HW0_PLT_PCREL = 67,
+  R_TILEGX_IMM16_X0_HW1_PLT_PCREL = 68,
+  R_TILEGX_IMM16_X1_HW1_PLT_PCREL = 69,
+  R_TILEGX_IMM16_X0_HW2_PLT_PCREL = 70,
+  R_TILEGX_IMM16_X1_HW2_PLT_PCREL = 71,
+
+  R_TILEGX_IMM16_X0_HW0_LAST_GOT = 72,
+  R_TILEGX_IMM16_X1_HW0_LAST_GOT = 73,
+  R_TILEGX_IMM16_X0_HW1_LAST_GOT = 74,
+  R_TILEGX_IMM16_X1_HW1_LAST_GOT = 75,
+  R_TILEGX_IMM16_X0_HW0_TLS_GD = 78,
+  R_TILEGX_IMM16_X1_HW0_TLS_GD = 79,
+  R_TILEGX_IMM16_X0_HW0_TLS_LE = 80,
+  R_TILEGX_IMM16_X1_HW0_TLS_LE = 81,
+  R_TILEGX_IMM16_X0_HW0_LAST_TLS_LE = 82,
+  R_TILEGX_IMM16_X1_HW0_LAST_TLS_LE = 83,
+  R_TILEGX_IMM16_X0_HW1_LAST_TLS_LE = 84,
+  R_TILEGX_IMM16_X1_HW1_LAST_TLS_LE = 85,
+  R_TILEGX_IMM16_X0_HW0_LAST_TLS_GD = 86,
+  R_TILEGX_IMM16_X1_HW0_LAST_TLS_GD = 87,
+  R_TILEGX_IMM16_X0_HW1_LAST_TLS_GD = 88,
+  R_TILEGX_IMM16_X1_HW1_LAST_TLS_GD = 89,
+  R_TILEGX_IMM16_X0_HW0_TLS_IE = 92,
+  R_TILEGX_IMM16_X1_HW0_TLS_IE = 93,
+
+  R_TILEGX_IMM16_X0_HW0_LAST_PLT_PCREL = 94,
+  R_TILEGX_IMM16_X1_HW0_LAST_PLT_PCREL = 95,
+  R_TILEGX_IMM16_X0_HW1_LAST_PLT_PCREL = 96,
+  R_TILEGX_IMM16_X1_HW1_LAST_PLT_PCREL = 97,
+  R_TILEGX_IMM16_X0_HW2_LAST_PLT_PCREL = 98,
+  R_TILEGX_IMM16_X1_HW2_LAST_PLT_PCREL = 99,
+
+  R_TILEGX_IMM16_X0_HW0_LAST_TLS_IE = 100,
+  R_TILEGX_IMM16_X1_HW0_LAST_TLS_IE = 101,
+  R_TILEGX_IMM16_X0_HW1_LAST_TLS_IE = 102,
+  R_TILEGX_IMM16_X1_HW1_LAST_TLS_IE = 103,
+  R_TILEGX_TLS_DTPMOD64 = 106,
+  R_TILEGX_TLS_DTPOFF64 = 107,
+  R_TILEGX_TLS_TPOFF64 = 108,
+  R_TILEGX_TLS_DTPMOD32 = 109,
+  R_TILEGX_TLS_DTPOFF32 = 110,
+  R_TILEGX_TLS_TPOFF32 = 111,
+  R_TILEGX_TLS_GD_CALL = 112,
+  R_TILEGX_IMM8_X0_TLS_GD_ADD = 113,
+  R_TILEGX_IMM8_X1_TLS_GD_ADD = 114,
+  R_TILEGX_IMM8_Y0_TLS_GD_ADD = 115,
+  R_TILEGX_IMM8_Y1_TLS_GD_ADD = 116,
+  R_TILEGX_TLS_IE_LOAD = 117,
+  R_TILEGX_IMM8_X0_TLS_ADD = 118,
+  R_TILEGX_IMM8_X1_TLS_ADD = 119,
+  R_TILEGX_IMM8_Y0_TLS_ADD = 120,
+  R_TILEGX_IMM8_Y1_TLS_ADD = 121,
+  R_TILEGX_GNU_VTINHERIT = 128,
+  R_TILEGX_GNU_VTENTRY = 129,
+  R_TILEGX_IRELATIVE = 130,
+  R_TILEGX_NUM = 131
+};
+
+typedef enum
+{
+  TILEGX_PIPELINE_X0,
+  TILEGX_PIPELINE_X1,
+  TILEGX_PIPELINE_Y0,
+  TILEGX_PIPELINE_Y1,
+  TILEGX_PIPELINE_Y2,
+} tilegx_pipeline;
+
+typedef unsigned long long tilegx_bundle_bits;
+
+/* These are the bits that determine if a bundle is in the X encoding. */
+#define TILEGX_BUNDLE_MODE_MASK ((tilegx_bundle_bits)3 << 62)
+
+enum
+{
+  /* Maximum number of instructions in a bundle (2 for X, 3 for Y). */
+  TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE = 3,
+
+  /* How many different pipeline encodings are there? X0, X1, Y0, Y1, Y2. */
+  TILEGX_NUM_PIPELINE_ENCODINGS = 5,
+
+  /* Log base 2 of TILEGX_BUNDLE_SIZE_IN_BYTES. */
+  TILEGX_LOG2_BUNDLE_SIZE_IN_BYTES = 3,
+
+  /* Instructions take this many bytes. */
+  TILEGX_BUNDLE_SIZE_IN_BYTES = 1 << TILEGX_LOG2_BUNDLE_SIZE_IN_BYTES,
+
+  /* Log base 2 of TILEGX_BUNDLE_ALIGNMENT_IN_BYTES. */
+  TILEGX_LOG2_BUNDLE_ALIGNMENT_IN_BYTES = 3,
+
+  /* Bundles should be aligned modulo this number of bytes. */
+  TILEGX_BUNDLE_ALIGNMENT_IN_BYTES =
+    (1 << TILEGX_LOG2_BUNDLE_ALIGNMENT_IN_BYTES),
+
+  /* Number of registers (some are magic, such as network I/O). */
+  TILEGX_NUM_REGISTERS = 64,
+};
+
+/* Make a few "tile_" variables to simplify common code between
+   architectures.  */
+
+typedef tilegx_bundle_bits tile_bundle_bits;
+#define TILE_BUNDLE_SIZE_IN_BYTES TILEGX_BUNDLE_SIZE_IN_BYTES
+#define TILE_BUNDLE_ALIGNMENT_IN_BYTES TILEGX_BUNDLE_ALIGNMENT_IN_BYTES
+#define TILE_LOG2_BUNDLE_ALIGNMENT_IN_BYTES \
+  TILEGX_LOG2_BUNDLE_ALIGNMENT_IN_BYTES
+
+/* 64-bit pattern for a { bpt ; nop } bundle. */
+#define TILEGX_BPT_BUNDLE 0x286a44ae51485000ULL
+
+typedef enum
+{
+  TILEGX_OP_TYPE_REGISTER,
+  TILEGX_OP_TYPE_IMMEDIATE,
+  TILEGX_OP_TYPE_ADDRESS,
+  TILEGX_OP_TYPE_SPR
+} tilegx_operand_type;
+
+struct tilegx_operand
+{
+  /* Is this operand a register, immediate or address? */
+  tilegx_operand_type type;
+
+  /* The default relocation type for this operand.  */
+  signed int default_reloc : 16;
+
+  /* How many bits is this value? (used for range checking) */
+  unsigned int num_bits : 5;
+
+  /* Is the value signed? (used for range checking) */
+  unsigned int is_signed : 1;
+
+  /* Is this operand a source register? */
+  unsigned int is_src_reg : 1;
+
+  /* Is this operand written? (i.e. is it a destination register) */
+  unsigned int is_dest_reg : 1;
+
+  /* Is this operand PC-relative? */
+  unsigned int is_pc_relative : 1;
+
+  /* By how many bits do we right shift the value before inserting? */
+  unsigned int rightshift : 2;
+
+  /* Return the bits for this operand to be ORed into an existing bundle. */
+  tilegx_bundle_bits (*insert) (int op);
+
+  /* Extract this operand and return it. */
+  unsigned int (*extract) (tilegx_bundle_bits bundle);
+};
+
+typedef enum
+{
+  TILEGX_OPC_BPT,
+  TILEGX_OPC_INFO,
+  TILEGX_OPC_INFOL,
+  TILEGX_OPC_LD4S_TLS,
+  TILEGX_OPC_LD_TLS,
+  TILEGX_OPC_MOVE,
+  TILEGX_OPC_MOVEI,
+  TILEGX_OPC_MOVELI,
+  TILEGX_OPC_PREFETCH,
+  TILEGX_OPC_PREFETCH_ADD_L1,
+  TILEGX_OPC_PREFETCH_ADD_L1_FAULT,
+  TILEGX_OPC_PREFETCH_ADD_L2,
+  TILEGX_OPC_PREFETCH_ADD_L2_FAULT,
+  TILEGX_OPC_PREFETCH_ADD_L3,
+  TILEGX_OPC_PREFETCH_ADD_L3_FAULT,
+  TILEGX_OPC_PREFETCH_L1,
+  TILEGX_OPC_PREFETCH_L1_FAULT,
+  TILEGX_OPC_PREFETCH_L2,
+  TILEGX_OPC_PREFETCH_L2_FAULT,
+  TILEGX_OPC_PREFETCH_L3,
+  TILEGX_OPC_PREFETCH_L3_FAULT,
+  TILEGX_OPC_RAISE,
+  TILEGX_OPC_ADD,
+  TILEGX_OPC_ADDI,
+  TILEGX_OPC_ADDLI,
+  TILEGX_OPC_ADDX,
+  TILEGX_OPC_ADDXI,
+  TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXSC,
+  TILEGX_OPC_AND,
+  TILEGX_OPC_ANDI,
+  TILEGX_OPC_BEQZ,
+  TILEGX_OPC_BEQZT,
+  TILEGX_OPC_BFEXTS,
+  TILEGX_OPC_BFEXTU,
+  TILEGX_OPC_BFINS,
+  TILEGX_OPC_BGEZ,
+  TILEGX_OPC_BGEZT,
+  TILEGX_OPC_BGTZ,
+  TILEGX_OPC_BGTZT,
+  TILEGX_OPC_BLBC,
+  TILEGX_OPC_BLBCT,
+  TILEGX_OPC_BLBS,
+  TILEGX_OPC_BLBST,
+  TILEGX_OPC_BLEZ,
+  TILEGX_OPC_BLEZT,
+  TILEGX_OPC_BLTZ,
+  TILEGX_OPC_BLTZT,
+  TILEGX_OPC_BNEZ,
+  TILEGX_OPC_BNEZT,
+  TILEGX_OPC_CLZ,
+  TILEGX_OPC_CMOVEQZ,
+  TILEGX_OPC_CMOVNEZ,
+  TILEGX_OPC_CMPEQ,
+  TILEGX_OPC_CMPEQI,
+  TILEGX_OPC_CMPEXCH,
+  TILEGX_OPC_CMPEXCH4,
+  TILEGX_OPC_CMPLES,
+  TILEGX_OPC_CMPLEU,
+  TILEGX_OPC_CMPLTS,
+  TILEGX_OPC_CMPLTSI,
+  TILEGX_OPC_CMPLTU,
+  TILEGX_OPC_CMPLTUI,
+  TILEGX_OPC_CMPNE,
+  TILEGX_OPC_CMUL,
+  TILEGX_OPC_CMULA,
+  TILEGX_OPC_CMULAF,
+  TILEGX_OPC_CMULF,
+  TILEGX_OPC_CMULFR,
+  TILEGX_OPC_CMULH,
+  TILEGX_OPC_CMULHR,
+  TILEGX_OPC_CRC32_32,
+  TILEGX_OPC_CRC32_8,
+  TILEGX_OPC_CTZ,
+  TILEGX_OPC_DBLALIGN,
+  TILEGX_OPC_DBLALIGN2,
+  TILEGX_OPC_DBLALIGN4,
+  TILEGX_OPC_DBLALIGN6,
+  TILEGX_OPC_DRAIN,
+  TILEGX_OPC_DTLBPR,
+  TILEGX_OPC_EXCH,
+  TILEGX_OPC_EXCH4,
+  TILEGX_OPC_FDOUBLE_ADD_FLAGS,
+  TILEGX_OPC_FDOUBLE_ADDSUB,
+  TILEGX_OPC_FDOUBLE_MUL_FLAGS,
+  TILEGX_OPC_FDOUBLE_PACK1,
+  TILEGX_OPC_FDOUBLE_PACK2,
+  TILEGX_OPC_FDOUBLE_SUB_FLAGS,
+  TILEGX_OPC_FDOUBLE_UNPACK_MAX,
+  TILEGX_OPC_FDOUBLE_UNPACK_MIN,
+  TILEGX_OPC_FETCHADD,
+  TILEGX_OPC_FETCHADD4,
+  TILEGX_OPC_FETCHADDGEZ,
+  TILEGX_OPC_FETCHADDGEZ4,
+  TILEGX_OPC_FETCHAND,
+  TILEGX_OPC_FETCHAND4,
+  TILEGX_OPC_FETCHOR,
+  TILEGX_OPC_FETCHOR4,
+  TILEGX_OPC_FINV,
+  TILEGX_OPC_FLUSH,
+  TILEGX_OPC_FLUSHWB,
+  TILEGX_OPC_FNOP,
+  TILEGX_OPC_FSINGLE_ADD1,
+  TILEGX_OPC_FSINGLE_ADDSUB2,
+  TILEGX_OPC_FSINGLE_MUL1,
+  TILEGX_OPC_FSINGLE_MUL2,
+  TILEGX_OPC_FSINGLE_PACK1,
+  TILEGX_OPC_FSINGLE_PACK2,
+  TILEGX_OPC_FSINGLE_SUB1,
+  TILEGX_OPC_ICOH,
+  TILEGX_OPC_ILL,
+  TILEGX_OPC_INV,
+  TILEGX_OPC_IRET,
+  TILEGX_OPC_J,
+  TILEGX_OPC_JAL,
+  TILEGX_OPC_JALR,
+  TILEGX_OPC_JALRP,
+  TILEGX_OPC_JR,
+  TILEGX_OPC_JRP,
+  TILEGX_OPC_LD,
+  TILEGX_OPC_LD1S,
+  TILEGX_OPC_LD1S_ADD,
+  TILEGX_OPC_LD1U,
+  TILEGX_OPC_LD1U_ADD,
+  TILEGX_OPC_LD2S,
+  TILEGX_OPC_LD2S_ADD,
+  TILEGX_OPC_LD2U,
+  TILEGX_OPC_LD2U_ADD,
+  TILEGX_OPC_LD4S,
+  TILEGX_OPC_LD4S_ADD,
+  TILEGX_OPC_LD4U,
+  TILEGX_OPC_LD4U_ADD,
+  TILEGX_OPC_LD_ADD,
+  TILEGX_OPC_LDNA,
+  TILEGX_OPC_LDNA_ADD,
+  TILEGX_OPC_LDNT,
+  TILEGX_OPC_LDNT1S,
+  TILEGX_OPC_LDNT1S_ADD,
+  TILEGX_OPC_LDNT1U,
+  TILEGX_OPC_LDNT1U_ADD,
+  TILEGX_OPC_LDNT2S,
+  TILEGX_OPC_LDNT2S_ADD,
+  TILEGX_OPC_LDNT2U,
+  TILEGX_OPC_LDNT2U_ADD,
+  TILEGX_OPC_LDNT4S,
+  TILEGX_OPC_LDNT4S_ADD,
+  TILEGX_OPC_LDNT4U,
+  TILEGX_OPC_LDNT4U_ADD,
+  TILEGX_OPC_LDNT_ADD,
+  TILEGX_OPC_LNK,
+  TILEGX_OPC_MF,
+  TILEGX_OPC_MFSPR,
+  TILEGX_OPC_MM,
+  TILEGX_OPC_MNZ,
+  TILEGX_OPC_MTSPR,
+  TILEGX_OPC_MUL_HS_HS,
+  TILEGX_OPC_MUL_HS_HU,
+  TILEGX_OPC_MUL_HS_LS,
+  TILEGX_OPC_MUL_HS_LU,
+  TILEGX_OPC_MUL_HU_HU,
+  TILEGX_OPC_MUL_HU_LS,
+  TILEGX_OPC_MUL_HU_LU,
+  TILEGX_OPC_MUL_LS_LS,
+  TILEGX_OPC_MUL_LS_LU,
+  TILEGX_OPC_MUL_LU_LU,
+  TILEGX_OPC_MULA_HS_HS,
+  TILEGX_OPC_MULA_HS_HU,
+  TILEGX_OPC_MULA_HS_LS,
+  TILEGX_OPC_MULA_HS_LU,
+  TILEGX_OPC_MULA_HU_HU,
+  TILEGX_OPC_MULA_HU_LS,
+  TILEGX_OPC_MULA_HU_LU,
+  TILEGX_OPC_MULA_LS_LS,
+  TILEGX_OPC_MULA_LS_LU,
+  TILEGX_OPC_MULA_LU_LU,
+  TILEGX_OPC_MULAX,
+  TILEGX_OPC_MULX,
+  TILEGX_OPC_MZ,
+  TILEGX_OPC_NAP,
+  TILEGX_OPC_NOP,
+  TILEGX_OPC_NOR,
+  TILEGX_OPC_OR,
+  TILEGX_OPC_ORI,
+  TILEGX_OPC_PCNT,
+  TILEGX_OPC_REVBITS,
+  TILEGX_OPC_REVBYTES,
+  TILEGX_OPC_ROTL,
+  TILEGX_OPC_ROTLI,
+  TILEGX_OPC_SHL,
+  TILEGX_OPC_SHL16INSLI,
+  TILEGX_OPC_SHL1ADD,
+  TILEGX_OPC_SHL1ADDX,
+  TILEGX_OPC_SHL2ADD,
+  TILEGX_OPC_SHL2ADDX,
+  TILEGX_OPC_SHL3ADD,
+  TILEGX_OPC_SHL3ADDX,
+  TILEGX_OPC_SHLI,
+  TILEGX_OPC_SHLX,
+  TILEGX_OPC_SHLXI,
+  TILEGX_OPC_SHRS,
+  TILEGX_OPC_SHRSI,
+  TILEGX_OPC_SHRU,
+  TILEGX_OPC_SHRUI,
+  TILEGX_OPC_SHRUX,
+  TILEGX_OPC_SHRUXI,
+  TILEGX_OPC_SHUFFLEBYTES,
+  TILEGX_OPC_ST,
+  TILEGX_OPC_ST1,
+  TILEGX_OPC_ST1_ADD,
+  TILEGX_OPC_ST2,
+  TILEGX_OPC_ST2_ADD,
+  TILEGX_OPC_ST4,
+  TILEGX_OPC_ST4_ADD,
+  TILEGX_OPC_ST_ADD,
+  TILEGX_OPC_STNT,
+  TILEGX_OPC_STNT1,
+  TILEGX_OPC_STNT1_ADD,
+  TILEGX_OPC_STNT2,
+  TILEGX_OPC_STNT2_ADD,
+  TILEGX_OPC_STNT4,
+  TILEGX_OPC_STNT4_ADD,
+  TILEGX_OPC_STNT_ADD,
+  TILEGX_OPC_SUB,
+  TILEGX_OPC_SUBX,
+  TILEGX_OPC_SUBXSC,
+  TILEGX_OPC_SWINT0,
+  TILEGX_OPC_SWINT1,
+  TILEGX_OPC_SWINT2,
+  TILEGX_OPC_SWINT3,
+  TILEGX_OPC_TBLIDXB0,
+  TILEGX_OPC_TBLIDXB1,
+  TILEGX_OPC_TBLIDXB2,
+  TILEGX_OPC_TBLIDXB3,
+  TILEGX_OPC_V1ADD,
+  TILEGX_OPC_V1ADDI,
+  TILEGX_OPC_V1ADDUC,
+  TILEGX_OPC_V1ADIFFU,
+  TILEGX_OPC_V1AVGU,
+  TILEGX_OPC_V1CMPEQ,
+  TILEGX_OPC_V1CMPEQI,
+  TILEGX_OPC_V1CMPLES,
+  TILEGX_OPC_V1CMPLEU,
+  TILEGX_OPC_V1CMPLTS,
+  TILEGX_OPC_V1CMPLTSI,
+  TILEGX_OPC_V1CMPLTU,
+  TILEGX_OPC_V1CMPLTUI,
+  TILEGX_OPC_V1CMPNE,
+  TILEGX_OPC_V1DDOTPU,
+  TILEGX_OPC_V1DDOTPUA,
+  TILEGX_OPC_V1DDOTPUS,
+  TILEGX_OPC_V1DDOTPUSA,
+  TILEGX_OPC_V1DOTP,
+  TILEGX_OPC_V1DOTPA,
+  TILEGX_OPC_V1DOTPU,
+  TILEGX_OPC_V1DOTPUA,
+  TILEGX_OPC_V1DOTPUS,
+  TILEGX_OPC_V1DOTPUSA,
+  TILEGX_OPC_V1INT_H,
+  TILEGX_OPC_V1INT_L,
+  TILEGX_OPC_V1MAXU,
+  TILEGX_OPC_V1MAXUI,
+  TILEGX_OPC_V1MINU,
+  TILEGX_OPC_V1MINUI,
+  TILEGX_OPC_V1MNZ,
+  TILEGX_OPC_V1MULTU,
+  TILEGX_OPC_V1MULU,
+  TILEGX_OPC_V1MULUS,
+  TILEGX_OPC_V1MZ,
+  TILEGX_OPC_V1SADAU,
+  TILEGX_OPC_V1SADU,
+  TILEGX_OPC_V1SHL,
+  TILEGX_OPC_V1SHLI,
+  TILEGX_OPC_V1SHRS,
+  TILEGX_OPC_V1SHRSI,
+  TILEGX_OPC_V1SHRU,
+  TILEGX_OPC_V1SHRUI,
+  TILEGX_OPC_V1SUB,
+  TILEGX_OPC_V1SUBUC,
+  TILEGX_OPC_V2ADD,
+  TILEGX_OPC_V2ADDI,
+  TILEGX_OPC_V2ADDSC,
+  TILEGX_OPC_V2ADIFFS,
+  TILEGX_OPC_V2AVGS,
+  TILEGX_OPC_V2CMPEQ,
+  TILEGX_OPC_V2CMPEQI,
+  TILEGX_OPC_V2CMPLES,
+  TILEGX_OPC_V2CMPLEU,
+  TILEGX_OPC_V2CMPLTS,
+  TILEGX_OPC_V2CMPLTSI,
+  TILEGX_OPC_V2CMPLTU,
+  TILEGX_OPC_V2CMPLTUI,
+  TILEGX_OPC_V2CMPNE,
+  TILEGX_OPC_V2DOTP,
+  TILEGX_OPC_V2DOTPA,
+  TILEGX_OPC_V2INT_H,
+  TILEGX_OPC_V2INT_L,
+  TILEGX_OPC_V2MAXS,
+  TILEGX_OPC_V2MAXSI,
+  TILEGX_OPC_V2MINS,
+  TILEGX_OPC_V2MINSI,
+  TILEGX_OPC_V2MNZ,
+  TILEGX_OPC_V2MULFSC,
+  TILEGX_OPC_V2MULS,
+  TILEGX_OPC_V2MULTS,
+  TILEGX_OPC_V2MZ,
+  TILEGX_OPC_V2PACKH,
+  TILEGX_OPC_V2PACKL,
+  TILEGX_OPC_V2PACKUC,
+  TILEGX_OPC_V2SADAS,
+  TILEGX_OPC_V2SADAU,
+  TILEGX_OPC_V2SADS,
+  TILEGX_OPC_V2SADU,
+  TILEGX_OPC_V2SHL,
+  TILEGX_OPC_V2SHLI,
+  TILEGX_OPC_V2SHLSC,
+  TILEGX_OPC_V2SHRS,
+  TILEGX_OPC_V2SHRSI,
+  TILEGX_OPC_V2SHRU,
+  TILEGX_OPC_V2SHRUI,
+  TILEGX_OPC_V2SUB,
+  TILEGX_OPC_V2SUBSC,
+  TILEGX_OPC_V4ADD,
+  TILEGX_OPC_V4ADDSC,
+  TILEGX_OPC_V4INT_H,
+  TILEGX_OPC_V4INT_L,
+  TILEGX_OPC_V4PACKSC,
+  TILEGX_OPC_V4SHL,
+  TILEGX_OPC_V4SHLSC,
+  TILEGX_OPC_V4SHRS,
+  TILEGX_OPC_V4SHRU,
+  TILEGX_OPC_V4SUB,
+  TILEGX_OPC_V4SUBSC,
+  TILEGX_OPC_WH64,
+  TILEGX_OPC_XOR,
+  TILEGX_OPC_XORI,
+  TILEGX_OPC_NONE
+} tilegx_mnemonic;
+
+enum
+{
+  TILEGX_MAX_OPERANDS = 4 /* bfexts */
+};
+
+struct tilegx_opcode
+{
+  /* The opcode mnemonic, e.g. "add" */
+  const char *name;
+
+  /* The enum value for this mnemonic. */
+  tilegx_mnemonic mnemonic;
+
+  /* A bit mask of which of the five pipes this instruction
+     is compatible with:
+     X0  0x01
+     X1  0x02
+     Y0  0x04
+     Y1  0x08
+     Y2  0x10 */
+  unsigned char pipes;
+
+  /* How many operands are there? */
+  unsigned char num_operands;
+
+  /* Which register does this write implicitly, or TREG_ZERO if none? */
+  unsigned char implicitly_written_register;
+
+  /* Can this be bundled with other instructions (almost always true). */
+  unsigned char can_bundle;
+
+  /* The description of the operands. Each of these is an
+   * index into the tilegx_operands[] table. */
+  unsigned char operands[TILEGX_NUM_PIPELINE_ENCODINGS][TILEGX_MAX_OPERANDS];
+
+  /* A mask of which bits have predefined values for each pipeline.
+   * This is useful for disassembly. */
+  tilegx_bundle_bits fixed_bit_masks[TILEGX_NUM_PIPELINE_ENCODINGS];
+
+  /* For each bit set in fixed_bit_masks, what the value is for this
+   * instruction. */
+  tilegx_bundle_bits fixed_bit_values[TILEGX_NUM_PIPELINE_ENCODINGS];
+};
+
+/* Used for non-textual disassembly into structs. */
+struct tilegx_decoded_instruction
+{
+  const struct tilegx_opcode *opcode;
+  const struct tilegx_operand *operands[TILEGX_MAX_OPERANDS];
+  long long operand_values[TILEGX_MAX_OPERANDS];
+};
+
+enum
+{
+  ADDI_IMM8_OPCODE_X0 = 1,
+  ADDI_IMM8_OPCODE_X1 = 1,
+  ADDI_OPCODE_Y0 = 0,
+  ADDI_OPCODE_Y1 = 1,
+  ADDLI_OPCODE_X0 = 1,
+  ADDLI_OPCODE_X1 = 0,
+  ADDXI_IMM8_OPCODE_X0 = 2,
+  ADDXI_IMM8_OPCODE_X1 = 2,
+  ADDXI_OPCODE_Y0 = 1,
+  ADDXI_OPCODE_Y1 = 2,
+  ADDXLI_OPCODE_X0 = 2,
+  ADDXLI_OPCODE_X1 = 1,
+  ADDXSC_RRR_0_OPCODE_X0 = 1,
+  ADDXSC_RRR_0_OPCODE_X1 = 1,
+  ADDX_RRR_0_OPCODE_X0 = 2,
+  ADDX_RRR_0_OPCODE_X1 = 2,
+  ADDX_RRR_0_OPCODE_Y0 = 0,
+  ADDX_SPECIAL_0_OPCODE_Y1 = 0,
+  ADD_RRR_0_OPCODE_X0 = 3,
+  ADD_RRR_0_OPCODE_X1 = 3,
+  ADD_RRR_0_OPCODE_Y0 = 1,
+  ADD_SPECIAL_0_OPCODE_Y1 = 1,
+  ANDI_IMM8_OPCODE_X0 = 3,
+  ANDI_IMM8_OPCODE_X1 = 3,
+  ANDI_OPCODE_Y0 = 2,
+  ANDI_OPCODE_Y1 = 3,
+  AND_RRR_0_OPCODE_X0 = 4,
+  AND_RRR_0_OPCODE_X1 = 4,
+  AND_RRR_5_OPCODE_Y0 = 0,
+  AND_RRR_5_OPCODE_Y1 = 0,
+  BEQZT_BRANCH_OPCODE_X1 = 16,
+  BEQZ_BRANCH_OPCODE_X1 = 17,
+  BFEXTS_BF_OPCODE_X0 = 4,
+  BFEXTU_BF_OPCODE_X0 = 5,
+  BFINS_BF_OPCODE_X0 = 6,
+  BF_OPCODE_X0 = 3,
+  BGEZT_BRANCH_OPCODE_X1 = 18,
+  BGEZ_BRANCH_OPCODE_X1 = 19,
+  BGTZT_BRANCH_OPCODE_X1 = 20,
+  BGTZ_BRANCH_OPCODE_X1 = 21,
+  BLBCT_BRANCH_OPCODE_X1 = 22,
+  BLBC_BRANCH_OPCODE_X1 = 23,
+  BLBST_BRANCH_OPCODE_X1 = 24,
+  BLBS_BRANCH_OPCODE_X1 = 25,
+  BLEZT_BRANCH_OPCODE_X1 = 26,
+  BLEZ_BRANCH_OPCODE_X1 = 27,
+  BLTZT_BRANCH_OPCODE_X1 = 28,
+  BLTZ_BRANCH_OPCODE_X1 = 29,
+  BNEZT_BRANCH_OPCODE_X1 = 30,
+  BNEZ_BRANCH_OPCODE_X1 = 31,
+  BRANCH_OPCODE_X1 = 2,
+  CMOVEQZ_RRR_0_OPCODE_X0 = 5,
+  CMOVEQZ_RRR_4_OPCODE_Y0 = 0,
+  CMOVNEZ_RRR_0_OPCODE_X0 = 6,
+  CMOVNEZ_RRR_4_OPCODE_Y0 = 1,
+  CMPEQI_IMM8_OPCODE_X0 = 4,
+  CMPEQI_IMM8_OPCODE_X1 = 4,
+  CMPEQI_OPCODE_Y0 = 3,
+  CMPEQI_OPCODE_Y1 = 4,
+  CMPEQ_RRR_0_OPCODE_X0 = 7,
+  CMPEQ_RRR_0_OPCODE_X1 = 5,
+  CMPEQ_RRR_3_OPCODE_Y0 = 0,
+  CMPEQ_RRR_3_OPCODE_Y1 = 2,
+  CMPEXCH4_RRR_0_OPCODE_X1 = 6,
+  CMPEXCH_RRR_0_OPCODE_X1 = 7,
+  CMPLES_RRR_0_OPCODE_X0 = 8,
+  CMPLES_RRR_0_OPCODE_X1 = 8,
+  CMPLES_RRR_2_OPCODE_Y0 = 0,
+  CMPLES_RRR_2_OPCODE_Y1 = 0,
+  CMPLEU_RRR_0_OPCODE_X0 = 9,
+  CMPLEU_RRR_0_OPCODE_X1 = 9,
+  CMPLEU_RRR_2_OPCODE_Y0 = 1,
+  CMPLEU_RRR_2_OPCODE_Y1 = 1,
+  CMPLTSI_IMM8_OPCODE_X0 = 5,
+  CMPLTSI_IMM8_OPCODE_X1 = 5,
+  CMPLTSI_OPCODE_Y0 = 4,
+  CMPLTSI_OPCODE_Y1 = 5,
+  CMPLTS_RRR_0_OPCODE_X0 = 10,
+  CMPLTS_RRR_0_OPCODE_X1 = 10,
+  CMPLTS_RRR_2_OPCODE_Y0 = 2,
+  CMPLTS_RRR_2_OPCODE_Y1 = 2,
+  CMPLTUI_IMM8_OPCODE_X0 = 6,
+  CMPLTUI_IMM8_OPCODE_X1 = 6,
+  CMPLTU_RRR_0_OPCODE_X0 = 11,
+  CMPLTU_RRR_0_OPCODE_X1 = 11,
+  CMPLTU_RRR_2_OPCODE_Y0 = 3,
+  CMPLTU_RRR_2_OPCODE_Y1 = 3,
+  CMPNE_RRR_0_OPCODE_X0 = 12,
+  CMPNE_RRR_0_OPCODE_X1 = 12,
+  CMPNE_RRR_3_OPCODE_Y0 = 1,
+  CMPNE_RRR_3_OPCODE_Y1 = 3,
+  CMULAF_RRR_0_OPCODE_X0 = 13,
+  CMULA_RRR_0_OPCODE_X0 = 14,
+  CMULFR_RRR_0_OPCODE_X0 = 15,
+  CMULF_RRR_0_OPCODE_X0 = 16,
+  CMULHR_RRR_0_OPCODE_X0 = 17,
+  CMULH_RRR_0_OPCODE_X0 = 18,
+  CMUL_RRR_0_OPCODE_X0 = 19,
+  CNTLZ_UNARY_OPCODE_X0 = 1,
+  CNTLZ_UNARY_OPCODE_Y0 = 1,
+  CNTTZ_UNARY_OPCODE_X0 = 2,
+  CNTTZ_UNARY_OPCODE_Y0 = 2,
+  CRC32_32_RRR_0_OPCODE_X0 = 20,
+  CRC32_8_RRR_0_OPCODE_X0 = 21,
+  DBLALIGN2_RRR_0_OPCODE_X0 = 22,
+  DBLALIGN2_RRR_0_OPCODE_X1 = 13,
+  DBLALIGN4_RRR_0_OPCODE_X0 = 23,
+  DBLALIGN4_RRR_0_OPCODE_X1 = 14,
+  DBLALIGN6_RRR_0_OPCODE_X0 = 24,
+  DBLALIGN6_RRR_0_OPCODE_X1 = 15,
+  DBLALIGN_RRR_0_OPCODE_X0 = 25,
+  DRAIN_UNARY_OPCODE_X1 = 1,
+  DTLBPR_UNARY_OPCODE_X1 = 2,
+  EXCH4_RRR_0_OPCODE_X1 = 16,
+  EXCH_RRR_0_OPCODE_X1 = 17,
+  FDOUBLE_ADDSUB_RRR_0_OPCODE_X0 = 26,
+  FDOUBLE_ADD_FLAGS_RRR_0_OPCODE_X0 = 27,
+  FDOUBLE_MUL_FLAGS_RRR_0_OPCODE_X0 = 28,
+  FDOUBLE_PACK1_RRR_0_OPCODE_X0 = 29,
+  FDOUBLE_PACK2_RRR_0_OPCODE_X0 = 30,
+  FDOUBLE_SUB_FLAGS_RRR_0_OPCODE_X0 = 31,
+  FDOUBLE_UNPACK_MAX_RRR_0_OPCODE_X0 = 32,
+  FDOUBLE_UNPACK_MIN_RRR_0_OPCODE_X0 = 33,
+  FETCHADD4_RRR_0_OPCODE_X1 = 18,
+  FETCHADDGEZ4_RRR_0_OPCODE_X1 = 19,
+  FETCHADDGEZ_RRR_0_OPCODE_X1 = 20,
+  FETCHADD_RRR_0_OPCODE_X1 = 21,
+  FETCHAND4_RRR_0_OPCODE_X1 = 22,
+  FETCHAND_RRR_0_OPCODE_X1 = 23,
+  FETCHOR4_RRR_0_OPCODE_X1 = 24,
+  FETCHOR_RRR_0_OPCODE_X1 = 25,
+  FINV_UNARY_OPCODE_X1 = 3,
+  FLUSHWB_UNARY_OPCODE_X1 = 4,
+  FLUSH_UNARY_OPCODE_X1 = 5,
+  FNOP_UNARY_OPCODE_X0 = 3,
+  FNOP_UNARY_OPCODE_X1 = 6,
+  FNOP_UNARY_OPCODE_Y0 = 3,
+  FNOP_UNARY_OPCODE_Y1 = 8,
+  FSINGLE_ADD1_RRR_0_OPCODE_X0 = 34,
+  FSINGLE_ADDSUB2_RRR_0_OPCODE_X0 = 35,
+  FSINGLE_MUL1_RRR_0_OPCODE_X0 = 36,
+  FSINGLE_MUL2_RRR_0_OPCODE_X0 = 37,
+  FSINGLE_PACK1_UNARY_OPCODE_X0 = 4,
+  FSINGLE_PACK1_UNARY_OPCODE_Y0 = 4,
+  FSINGLE_PACK2_RRR_0_OPCODE_X0 = 38,
+  FSINGLE_SUB1_RRR_0_OPCODE_X0 = 39,
+  ICOH_UNARY_OPCODE_X1 = 7,
+  ILL_UNARY_OPCODE_X1 = 8,
+  ILL_UNARY_OPCODE_Y1 = 9,
+  IMM8_OPCODE_X0 = 4,
+  IMM8_OPCODE_X1 = 3,
+  INV_UNARY_OPCODE_X1 = 9,
+  IRET_UNARY_OPCODE_X1 = 10,
+  JALRP_UNARY_OPCODE_X1 = 11,
+  JALRP_UNARY_OPCODE_Y1 = 10,
+  JALR_UNARY_OPCODE_X1 = 12,
+  JALR_UNARY_OPCODE_Y1 = 11,
+  JAL_JUMP_OPCODE_X1 = 0,
+  JRP_UNARY_OPCODE_X1 = 13,
+  JRP_UNARY_OPCODE_Y1 = 12,
+  JR_UNARY_OPCODE_X1 = 14,
+  JR_UNARY_OPCODE_Y1 = 13,
+  JUMP_OPCODE_X1 = 4,
+  J_JUMP_OPCODE_X1 = 1,
+  LD1S_ADD_IMM8_OPCODE_X1 = 7,
+  LD1S_OPCODE_Y2 = 0,
+  LD1S_UNARY_OPCODE_X1 = 15,
+  LD1U_ADD_IMM8_OPCODE_X1 = 8,
+  LD1U_OPCODE_Y2 = 1,
+  LD1U_UNARY_OPCODE_X1 = 16,
+  LD2S_ADD_IMM8_OPCODE_X1 = 9,
+  LD2S_OPCODE_Y2 = 2,
+  LD2S_UNARY_OPCODE_X1 = 17,
+  LD2U_ADD_IMM8_OPCODE_X1 = 10,
+  LD2U_OPCODE_Y2 = 3,
+  LD2U_UNARY_OPCODE_X1 = 18,
+  LD4S_ADD_IMM8_OPCODE_X1 = 11,
+  LD4S_OPCODE_Y2 = 1,
+  LD4S_UNARY_OPCODE_X1 = 19,
+  LD4U_ADD_IMM8_OPCODE_X1 = 12,
+  LD4U_OPCODE_Y2 = 2,
+  LD4U_UNARY_OPCODE_X1 = 20,
+  LDNA_UNARY_OPCODE_X1 = 21,
+  LDNT1S_ADD_IMM8_OPCODE_X1 = 13,
+  LDNT1S_UNARY_OPCODE_X1 = 22,
+  LDNT1U_ADD_IMM8_OPCODE_X1 = 14,
+  LDNT1U_UNARY_OPCODE_X1 = 23,
+  LDNT2S_ADD_IMM8_OPCODE_X1 = 15,
+  LDNT2S_UNARY_OPCODE_X1 = 24,
+  LDNT2U_ADD_IMM8_OPCODE_X1 = 16,
+  LDNT2U_UNARY_OPCODE_X1 = 25,
+  LDNT4S_ADD_IMM8_OPCODE_X1 = 17,
+  LDNT4S_UNARY_OPCODE_X1 = 26,
+  LDNT4U_ADD_IMM8_OPCODE_X1 = 18,
+  LDNT4U_UNARY_OPCODE_X1 = 27,
+  LDNT_ADD_IMM8_OPCODE_X1 = 19,
+  LDNT_UNARY_OPCODE_X1 = 28,
+  LD_ADD_IMM8_OPCODE_X1 = 20,
+  LD_OPCODE_Y2 = 3,
+  LD_UNARY_OPCODE_X1 = 29,
+  LNK_UNARY_OPCODE_X1 = 30,
+  LNK_UNARY_OPCODE_Y1 = 14,
+  LWNA_ADD_IMM8_OPCODE_X1 = 21,
+  MFSPR_IMM8_OPCODE_X1 = 22,
+  MF_UNARY_OPCODE_X1 = 31,
+  MM_BF_OPCODE_X0 = 7,
+  MNZ_RRR_0_OPCODE_X0 = 40,
+  MNZ_RRR_0_OPCODE_X1 = 26,
+  MNZ_RRR_4_OPCODE_Y0 = 2,
+  MNZ_RRR_4_OPCODE_Y1 = 2,
+  MODE_OPCODE_YA2 = 1,
+  MODE_OPCODE_YB2 = 2,
+  MODE_OPCODE_YC2 = 3,
+  MTSPR_IMM8_OPCODE_X1 = 23,
+  MULAX_RRR_0_OPCODE_X0 = 41,
+  MULAX_RRR_3_OPCODE_Y0 = 2,
+  MULA_HS_HS_RRR_0_OPCODE_X0 = 42,
+  MULA_HS_HS_RRR_9_OPCODE_Y0 = 0,
+  MULA_HS_HU_RRR_0_OPCODE_X0 = 43,
+  MULA_HS_LS_RRR_0_OPCODE_X0 = 44,
+  MULA_HS_LU_RRR_0_OPCODE_X0 = 45,
+  MULA_HU_HU_RRR_0_OPCODE_X0 = 46,
+  MULA_HU_HU_RRR_9_OPCODE_Y0 = 1,
+  MULA_HU_LS_RRR_0_OPCODE_X0 = 47,
+  MULA_HU_LU_RRR_0_OPCODE_X0 = 48,
+  MULA_LS_LS_RRR_0_OPCODE_X0 = 49,
+  MULA_LS_LS_RRR_9_OPCODE_Y0 = 2,
+  MULA_LS_LU_RRR_0_OPCODE_X0 = 50,
+  MULA_LU_LU_RRR_0_OPCODE_X0 = 51,
+  MULA_LU_LU_RRR_9_OPCODE_Y0 = 3,
+  MULX_RRR_0_OPCODE_X0 = 52,
+  MULX_RRR_3_OPCODE_Y0 = 3,
+  MUL_HS_HS_RRR_0_OPCODE_X0 = 53,
+  MUL_HS_HS_RRR_8_OPCODE_Y0 = 0,
+  MUL_HS_HU_RRR_0_OPCODE_X0 = 54,
+  MUL_HS_LS_RRR_0_OPCODE_X0 = 55,
+  MUL_HS_LU_RRR_0_OPCODE_X0 = 56,
+  MUL_HU_HU_RRR_0_OPCODE_X0 = 57,
+  MUL_HU_HU_RRR_8_OPCODE_Y0 = 1,
+  MUL_HU_LS_RRR_0_OPCODE_X0 = 58,
+  MUL_HU_LU_RRR_0_OPCODE_X0 = 59,
+  MUL_LS_LS_RRR_0_OPCODE_X0 = 60,
+  MUL_LS_LS_RRR_8_OPCODE_Y0 = 2,
+  MUL_LS_LU_RRR_0_OPCODE_X0 = 61,
+  MUL_LU_LU_RRR_0_OPCODE_X0 = 62,
+  MUL_LU_LU_RRR_8_OPCODE_Y0 = 3,
+  MZ_RRR_0_OPCODE_X0 = 63,
+  MZ_RRR_0_OPCODE_X1 = 27,
+  MZ_RRR_4_OPCODE_Y0 = 3,
+  MZ_RRR_4_OPCODE_Y1 = 3,
+  NAP_UNARY_OPCODE_X1 = 32,
+  NOP_UNARY_OPCODE_X0 = 5,
+  NOP_UNARY_OPCODE_X1 = 33,
+  NOP_UNARY_OPCODE_Y0 = 5,
+  NOP_UNARY_OPCODE_Y1 = 15,
+  NOR_RRR_0_OPCODE_X0 = 64,
+  NOR_RRR_0_OPCODE_X1 = 28,
+  NOR_RRR_5_OPCODE_Y0 = 1,
+  NOR_RRR_5_OPCODE_Y1 = 1,
+  ORI_IMM8_OPCODE_X0 = 7,
+  ORI_IMM8_OPCODE_X1 = 24,
+  OR_RRR_0_OPCODE_X0 = 65,
+  OR_RRR_0_OPCODE_X1 = 29,
+  OR_RRR_5_OPCODE_Y0 = 2,
+  OR_RRR_5_OPCODE_Y1 = 2,
+  PCNT_UNARY_OPCODE_X0 = 6,
+  PCNT_UNARY_OPCODE_Y0 = 6,
+  REVBITS_UNARY_OPCODE_X0 = 7,
+  REVBITS_UNARY_OPCODE_Y0 = 7,
+  REVBYTES_UNARY_OPCODE_X0 = 8,
+  REVBYTES_UNARY_OPCODE_Y0 = 8,
+  ROTLI_SHIFT_OPCODE_X0 = 1,
+  ROTLI_SHIFT_OPCODE_X1 = 1,
+  ROTLI_SHIFT_OPCODE_Y0 = 0,
+  ROTLI_SHIFT_OPCODE_Y1 = 0,
+  ROTL_RRR_0_OPCODE_X0 = 66,
+  ROTL_RRR_0_OPCODE_X1 = 30,
+  ROTL_RRR_6_OPCODE_Y0 = 0,
+  ROTL_RRR_6_OPCODE_Y1 = 0,
+  RRR_0_OPCODE_X0 = 5,
+  RRR_0_OPCODE_X1 = 5,
+  RRR_0_OPCODE_Y0 = 5,
+  RRR_0_OPCODE_Y1 = 6,
+  RRR_1_OPCODE_Y0 = 6,
+  RRR_1_OPCODE_Y1 = 7,
+  RRR_2_OPCODE_Y0 = 7,
+  RRR_2_OPCODE_Y1 = 8,
+  RRR_3_OPCODE_Y0 = 8,
+  RRR_3_OPCODE_Y1 = 9,
+  RRR_4_OPCODE_Y0 = 9,
+  RRR_4_OPCODE_Y1 = 10,
+  RRR_5_OPCODE_Y0 = 10,
+  RRR_5_OPCODE_Y1 = 11,
+  RRR_6_OPCODE_Y0 = 11,
+  RRR_6_OPCODE_Y1 = 12,
+  RRR_7_OPCODE_Y0 = 12,
+  RRR_7_OPCODE_Y1 = 13,
+  RRR_8_OPCODE_Y0 = 13,
+  RRR_9_OPCODE_Y0 = 14,
+  SHIFT_OPCODE_X0 = 6,
+  SHIFT_OPCODE_X1 = 6,
+  SHIFT_OPCODE_Y0 = 15,
+  SHIFT_OPCODE_Y1 = 14,
+  SHL16INSLI_OPCODE_X0 = 7,
+  SHL16INSLI_OPCODE_X1 = 7,
+  SHL1ADDX_RRR_0_OPCODE_X0 = 67,
+  SHL1ADDX_RRR_0_OPCODE_X1 = 31,
+  SHL1ADDX_RRR_7_OPCODE_Y0 = 1,
+  SHL1ADDX_RRR_7_OPCODE_Y1 = 1,
+  SHL1ADD_RRR_0_OPCODE_X0 = 68,
+  SHL1ADD_RRR_0_OPCODE_X1 = 32,
+  SHL1ADD_RRR_1_OPCODE_Y0 = 0,
+  SHL1ADD_RRR_1_OPCODE_Y1 = 0,
+  SHL2ADDX_RRR_0_OPCODE_X0 = 69,
+  SHL2ADDX_RRR_0_OPCODE_X1 = 33,
+  SHL2ADDX_RRR_7_OPCODE_Y0 = 2,
+  SHL2ADDX_RRR_7_OPCODE_Y1 = 2,
+  SHL2ADD_RRR_0_OPCODE_X0 = 70,
+  SHL2ADD_RRR_0_OPCODE_X1 = 34,
+  SHL2ADD_RRR_1_OPCODE_Y0 = 1,
+  SHL2ADD_RRR_1_OPCODE_Y1 = 1,
+  SHL3ADDX_RRR_0_OPCODE_X0 = 71,
+  SHL3ADDX_RRR_0_OPCODE_X1 = 35,
+  SHL3ADDX_RRR_7_OPCODE_Y0 = 3,
+  SHL3ADDX_RRR_7_OPCODE_Y1 = 3,
+  SHL3ADD_RRR_0_OPCODE_X0 = 72,
+  SHL3ADD_RRR_0_OPCODE_X1 = 36,
+  SHL3ADD_RRR_1_OPCODE_Y0 = 2,
+  SHL3ADD_RRR_1_OPCODE_Y1 = 2,
+  SHLI_SHIFT_OPCODE_X0 = 2,
+  SHLI_SHIFT_OPCODE_X1 = 2,
+  SHLI_SHIFT_OPCODE_Y0 = 1,
+  SHLI_SHIFT_OPCODE_Y1 = 1,
+  SHLXI_SHIFT_OPCODE_X0 = 3,
+  SHLXI_SHIFT_OPCODE_X1 = 3,
+  SHLX_RRR_0_OPCODE_X0 = 73,
+  SHLX_RRR_0_OPCODE_X1 = 37,
+  SHL_RRR_0_OPCODE_X0 = 74,
+  SHL_RRR_0_OPCODE_X1 = 38,
+  SHL_RRR_6_OPCODE_Y0 = 1,
+  SHL_RRR_6_OPCODE_Y1 = 1,
+  SHRSI_SHIFT_OPCODE_X0 = 4,
+  SHRSI_SHIFT_OPCODE_X1 = 4,
+  SHRSI_SHIFT_OPCODE_Y0 = 2,
+  SHRSI_SHIFT_OPCODE_Y1 = 2,
+  SHRS_RRR_0_OPCODE_X0 = 75,
+  SHRS_RRR_0_OPCODE_X1 = 39,
+  SHRS_RRR_6_OPCODE_Y0 = 2,
+  SHRS_RRR_6_OPCODE_Y1 = 2,
+  SHRUI_SHIFT_OPCODE_X0 = 5,
+  SHRUI_SHIFT_OPCODE_X1 = 5,
+  SHRUI_SHIFT_OPCODE_Y0 = 3,
+  SHRUI_SHIFT_OPCODE_Y1 = 3,
+  SHRUXI_SHIFT_OPCODE_X0 = 6,
+  SHRUXI_SHIFT_OPCODE_X1 = 6,
+  SHRUX_RRR_0_OPCODE_X0 = 76,
+  SHRUX_RRR_0_OPCODE_X1 = 40,
+  SHRU_RRR_0_OPCODE_X0 = 77,
+  SHRU_RRR_0_OPCODE_X1 = 41,
+  SHRU_RRR_6_OPCODE_Y0 = 3,
+  SHRU_RRR_6_OPCODE_Y1 = 3,
+  SHUFFLEBYTES_RRR_0_OPCODE_X0 = 78,
+  ST1_ADD_IMM8_OPCODE_X1 = 25,
+  ST1_OPCODE_Y2 = 0,
+  ST1_RRR_0_OPCODE_X1 = 42,
+  ST2_ADD_IMM8_OPCODE_X1 = 26,
+  ST2_OPCODE_Y2 = 1,
+  ST2_RRR_0_OPCODE_X1 = 43,
+  ST4_ADD_IMM8_OPCODE_X1 = 27,
+  ST4_OPCODE_Y2 = 2,
+  ST4_RRR_0_OPCODE_X1 = 44,
+  STNT1_ADD_IMM8_OPCODE_X1 = 28,
+  STNT1_RRR_0_OPCODE_X1 = 45,
+  STNT2_ADD_IMM8_OPCODE_X1 = 29,
+  STNT2_RRR_0_OPCODE_X1 = 46,
+  STNT4_ADD_IMM8_OPCODE_X1 = 30,
+  STNT4_RRR_0_OPCODE_X1 = 47,
+  STNT_ADD_IMM8_OPCODE_X1 = 31,
+  STNT_RRR_0_OPCODE_X1 = 48,
+  ST_ADD_IMM8_OPCODE_X1 = 32,
+  ST_OPCODE_Y2 = 3,
+  ST_RRR_0_OPCODE_X1 = 49,
+  SUBXSC_RRR_0_OPCODE_X0 = 79,
+  SUBXSC_RRR_0_OPCODE_X1 = 50,
+  SUBX_RRR_0_OPCODE_X0 = 80,
+  SUBX_RRR_0_OPCODE_X1 = 51,
+  SUBX_RRR_0_OPCODE_Y0 = 2,
+  SUBX_RRR_0_OPCODE_Y1 = 2,
+  SUB_RRR_0_OPCODE_X0 = 81,
+  SUB_RRR_0_OPCODE_X1 = 52,
+  SUB_RRR_0_OPCODE_Y0 = 3,
+  SUB_RRR_0_OPCODE_Y1 = 3,
+  SWINT0_UNARY_OPCODE_X1 = 34,
+  SWINT1_UNARY_OPCODE_X1 = 35,
+  SWINT2_UNARY_OPCODE_X1 = 36,
+  SWINT3_UNARY_OPCODE_X1 = 37,
+  TBLIDXB0_UNARY_OPCODE_X0 = 9,
+  TBLIDXB0_UNARY_OPCODE_Y0 = 9,
+  TBLIDXB1_UNARY_OPCODE_X0 = 10,
+  TBLIDXB1_UNARY_OPCODE_Y0 = 10,
+  TBLIDXB2_UNARY_OPCODE_X0 = 11,
+  TBLIDXB2_UNARY_OPCODE_Y0 = 11,
+  TBLIDXB3_UNARY_OPCODE_X0 = 12,
+  TBLIDXB3_UNARY_OPCODE_Y0 = 12,
+  UNARY_RRR_0_OPCODE_X0 = 82,
+  UNARY_RRR_0_OPCODE_X1 = 53,
+  UNARY_RRR_1_OPCODE_Y0 = 3,
+  UNARY_RRR_1_OPCODE_Y1 = 3,
+  V1ADDI_IMM8_OPCODE_X0 = 8,
+  V1ADDI_IMM8_OPCODE_X1 = 33,
+  V1ADDUC_RRR_0_OPCODE_X0 = 83,
+  V1ADDUC_RRR_0_OPCODE_X1 = 54,
+  V1ADD_RRR_0_OPCODE_X0 = 84,
+  V1ADD_RRR_0_OPCODE_X1 = 55,
+  V1ADIFFU_RRR_0_OPCODE_X0 = 85,
+  V1AVGU_RRR_0_OPCODE_X0 = 86,
+  V1CMPEQI_IMM8_OPCODE_X0 = 9,
+  V1CMPEQI_IMM8_OPCODE_X1 = 34,
+  V1CMPEQ_RRR_0_OPCODE_X0 = 87,
+  V1CMPEQ_RRR_0_OPCODE_X1 = 56,
+  V1CMPLES_RRR_0_OPCODE_X0 = 88,
+  V1CMPLES_RRR_0_OPCODE_X1 = 57,
+  V1CMPLEU_RRR_0_OPCODE_X0 = 89,
+  V1CMPLEU_RRR_0_OPCODE_X1 = 58,
+  V1CMPLTSI_IMM8_OPCODE_X0 = 10,
+  V1CMPLTSI_IMM8_OPCODE_X1 = 35,
+  V1CMPLTS_RRR_0_OPCODE_X0 = 90,
+  V1CMPLTS_RRR_0_OPCODE_X1 = 59,
+  V1CMPLTUI_IMM8_OPCODE_X0 = 11,
+  V1CMPLTUI_IMM8_OPCODE_X1 = 36,
+  V1CMPLTU_RRR_0_OPCODE_X0 = 91,
+  V1CMPLTU_RRR_0_OPCODE_X1 = 60,
+  V1CMPNE_RRR_0_OPCODE_X0 = 92,
+  V1CMPNE_RRR_0_OPCODE_X1 = 61,
+  V1DDOTPUA_RRR_0_OPCODE_X0 = 161,
+  V1DDOTPUSA_RRR_0_OPCODE_X0 = 93,
+  V1DDOTPUS_RRR_0_OPCODE_X0 = 94,
+  V1DDOTPU_RRR_0_OPCODE_X0 = 162,
+  V1DOTPA_RRR_0_OPCODE_X0 = 95,
+  V1DOTPUA_RRR_0_OPCODE_X0 = 163,
+  V1DOTPUSA_RRR_0_OPCODE_X0 = 96,
+  V1DOTPUS_RRR_0_OPCODE_X0 = 97,
+  V1DOTPU_RRR_0_OPCODE_X0 = 164,
+  V1DOTP_RRR_0_OPCODE_X0 = 98,
+  V1INT_H_RRR_0_OPCODE_X0 = 99,
+  V1INT_H_RRR_0_OPCODE_X1 = 62,
+  V1INT_L_RRR_0_OPCODE_X0 = 100,
+  V1INT_L_RRR_0_OPCODE_X1 = 63,
+  V1MAXUI_IMM8_OPCODE_X0 = 12,
+  V1MAXUI_IMM8_OPCODE_X1 = 37,
+  V1MAXU_RRR_0_OPCODE_X0 = 101,
+  V1MAXU_RRR_0_OPCODE_X1 = 64,
+  V1MINUI_IMM8_OPCODE_X0 = 13,
+  V1MINUI_IMM8_OPCODE_X1 = 38,
+  V1MINU_RRR_0_OPCODE_X0 = 102,
+  V1MINU_RRR_0_OPCODE_X1 = 65,
+  V1MNZ_RRR_0_OPCODE_X0 = 103,
+  V1MNZ_RRR_0_OPCODE_X1 = 66,
+  V1MULTU_RRR_0_OPCODE_X0 = 104,
+  V1MULUS_RRR_0_OPCODE_X0 = 105,
+  V1MULU_RRR_0_OPCODE_X0 = 106,
+  V1MZ_RRR_0_OPCODE_X0 = 107,
+  V1MZ_RRR_0_OPCODE_X1 = 67,
+  V1SADAU_RRR_0_OPCODE_X0 = 108,
+  V1SADU_RRR_0_OPCODE_X0 = 109,
+  V1SHLI_SHIFT_OPCODE_X0 = 7,
+  V1SHLI_SHIFT_OPCODE_X1 = 7,
+  V1SHL_RRR_0_OPCODE_X0 = 110,
+  V1SHL_RRR_0_OPCODE_X1 = 68,
+  V1SHRSI_SHIFT_OPCODE_X0 = 8,
+  V1SHRSI_SHIFT_OPCODE_X1 = 8,
+  V1SHRS_RRR_0_OPCODE_X0 = 111,
+  V1SHRS_RRR_0_OPCODE_X1 = 69,
+  V1SHRUI_SHIFT_OPCODE_X0 = 9,
+  V1SHRUI_SHIFT_OPCODE_X1 = 9,
+  V1SHRU_RRR_0_OPCODE_X0 = 112,
+  V1SHRU_RRR_0_OPCODE_X1 = 70,
+  V1SUBUC_RRR_0_OPCODE_X0 = 113,
+  V1SUBUC_RRR_0_OPCODE_X1 = 71,
+  V1SUB_RRR_0_OPCODE_X0 = 114,
+  V1SUB_RRR_0_OPCODE_X1 = 72,
+  V2ADDI_IMM8_OPCODE_X0 = 14,
+  V2ADDI_IMM8_OPCODE_X1 = 39,
+  V2ADDSC_RRR_0_OPCODE_X0 = 115,
+  V2ADDSC_RRR_0_OPCODE_X1 = 73,
+  V2ADD_RRR_0_OPCODE_X0 = 116,
+  V2ADD_RRR_0_OPCODE_X1 = 74,
+  V2ADIFFS_RRR_0_OPCODE_X0 = 117,
+  V2AVGS_RRR_0_OPCODE_X0 = 118,
+  V2CMPEQI_IMM8_OPCODE_X0 = 15,
+  V2CMPEQI_IMM8_OPCODE_X1 = 40,
+  V2CMPEQ_RRR_0_OPCODE_X0 = 119,
+  V2CMPEQ_RRR_0_OPCODE_X1 = 75,
+  V2CMPLES_RRR_0_OPCODE_X0 = 120,
+  V2CMPLES_RRR_0_OPCODE_X1 = 76,
+  V2CMPLEU_RRR_0_OPCODE_X0 = 121,
+  V2CMPLEU_RRR_0_OPCODE_X1 = 77,
+  V2CMPLTSI_IMM8_OPCODE_X0 = 16,
+  V2CMPLTSI_IMM8_OPCODE_X1 = 41,
+  V2CMPLTS_RRR_0_OPCODE_X0 = 122,
+  V2CMPLTS_RRR_0_OPCODE_X1 = 78,
+  V2CMPLTUI_IMM8_OPCODE_X0 = 17,
+  V2CMPLTUI_IMM8_OPCODE_X1 = 42,
+  V2CMPLTU_RRR_0_OPCODE_X0 = 123,
+  V2CMPLTU_RRR_0_OPCODE_X1 = 79,
+  V2CMPNE_RRR_0_OPCODE_X0 = 124,
+  V2CMPNE_RRR_0_OPCODE_X1 = 80,
+  V2DOTPA_RRR_0_OPCODE_X0 = 125,
+  V2DOTP_RRR_0_OPCODE_X0 = 126,
+  V2INT_H_RRR_0_OPCODE_X0 = 127,
+  V2INT_H_RRR_0_OPCODE_X1 = 81,
+  V2INT_L_RRR_0_OPCODE_X0 = 128,
+  V2INT_L_RRR_0_OPCODE_X1 = 82,
+  V2MAXSI_IMM8_OPCODE_X0 = 18,
+  V2MAXSI_IMM8_OPCODE_X1 = 43,
+  V2MAXS_RRR_0_OPCODE_X0 = 129,
+  V2MAXS_RRR_0_OPCODE_X1 = 83,
+  V2MINSI_IMM8_OPCODE_X0 = 19,
+  V2MINSI_IMM8_OPCODE_X1 = 44,
+  V2MINS_RRR_0_OPCODE_X0 = 130,
+  V2MINS_RRR_0_OPCODE_X1 = 84,
+  V2MNZ_RRR_0_OPCODE_X0 = 131,
+  V2MNZ_RRR_0_OPCODE_X1 = 85,
+  V2MULFSC_RRR_0_OPCODE_X0 = 132,
+  V2MULS_RRR_0_OPCODE_X0 = 133,
+  V2MULTS_RRR_0_OPCODE_X0 = 134,
+  V2MZ_RRR_0_OPCODE_X0 = 135,
+  V2MZ_RRR_0_OPCODE_X1 = 86,
+  V2PACKH_RRR_0_OPCODE_X0 = 136,
+  V2PACKH_RRR_0_OPCODE_X1 = 87,
+  V2PACKL_RRR_0_OPCODE_X0 = 137,
+  V2PACKL_RRR_0_OPCODE_X1 = 88,
+  V2PACKUC_RRR_0_OPCODE_X0 = 138,
+  V2PACKUC_RRR_0_OPCODE_X1 = 89,
+  V2SADAS_RRR_0_OPCODE_X0 = 139,
+  V2SADAU_RRR_0_OPCODE_X0 = 140,
+  V2SADS_RRR_0_OPCODE_X0 = 141,
+  V2SADU_RRR_0_OPCODE_X0 = 142,
+  V2SHLI_SHIFT_OPCODE_X0 = 10,
+  V2SHLI_SHIFT_OPCODE_X1 = 10,
+  V2SHLSC_RRR_0_OPCODE_X0 = 143,
+  V2SHLSC_RRR_0_OPCODE_X1 = 90,
+  V2SHL_RRR_0_OPCODE_X0 = 144,
+  V2SHL_RRR_0_OPCODE_X1 = 91,
+  V2SHRSI_SHIFT_OPCODE_X0 = 11,
+  V2SHRSI_SHIFT_OPCODE_X1 = 11,
+  V2SHRS_RRR_0_OPCODE_X0 = 145,
+  V2SHRS_RRR_0_OPCODE_X1 = 92,
+  V2SHRUI_SHIFT_OPCODE_X0 = 12,
+  V2SHRUI_SHIFT_OPCODE_X1 = 12,
+  V2SHRU_RRR_0_OPCODE_X0 = 146,
+  V2SHRU_RRR_0_OPCODE_X1 = 93,
+  V2SUBSC_RRR_0_OPCODE_X0 = 147,
+  V2SUBSC_RRR_0_OPCODE_X1 = 94,
+  V2SUB_RRR_0_OPCODE_X0 = 148,
+  V2SUB_RRR_0_OPCODE_X1 = 95,
+  V4ADDSC_RRR_0_OPCODE_X0 = 149,
+  V4ADDSC_RRR_0_OPCODE_X1 = 96,
+  V4ADD_RRR_0_OPCODE_X0 = 150,
+  V4ADD_RRR_0_OPCODE_X1 = 97,
+  V4INT_H_RRR_0_OPCODE_X0 = 151,
+  V4INT_H_RRR_0_OPCODE_X1 = 98,
+  V4INT_L_RRR_0_OPCODE_X0 = 152,
+  V4INT_L_RRR_0_OPCODE_X1 = 99,
+  V4PACKSC_RRR_0_OPCODE_X0 = 153,
+  V4PACKSC_RRR_0_OPCODE_X1 = 100,
+  V4SHLSC_RRR_0_OPCODE_X0 = 154,
+  V4SHLSC_RRR_0_OPCODE_X1 = 101,
+  V4SHL_RRR_0_OPCODE_X0 = 155,
+  V4SHL_RRR_0_OPCODE_X1 = 102,
+  V4SHRS_RRR_0_OPCODE_X0 = 156,
+  V4SHRS_RRR_0_OPCODE_X1 = 103,
+  V4SHRU_RRR_0_OPCODE_X0 = 157,
+  V4SHRU_RRR_0_OPCODE_X1 = 104,
+  V4SUBSC_RRR_0_OPCODE_X0 = 158,
+  V4SUBSC_RRR_0_OPCODE_X1 = 105,
+  V4SUB_RRR_0_OPCODE_X0 = 159,
+  V4SUB_RRR_0_OPCODE_X1 = 106,
+  WH64_UNARY_OPCODE_X1 = 38,
+  XORI_IMM8_OPCODE_X0 = 20,
+  XORI_IMM8_OPCODE_X1 = 45,
+  XOR_RRR_0_OPCODE_X0 = 160,
+  XOR_RRR_0_OPCODE_X1 = 107,
+  XOR_RRR_5_OPCODE_Y0 = 3,
+  XOR_RRR_5_OPCODE_Y1 = 3
+};
+
+static __inline unsigned int
+get_BFEnd_X0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0x3f);
+}
+
+static __inline unsigned int
+get_BFOpcodeExtension_X0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 24)) & 0xf);
+}
+
+static __inline unsigned int
+get_BFStart_X0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 18)) & 0x3f);
+}
+
+static __inline unsigned int
+get_BrOff_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 31)) & 0x0000003f) |
+         (((unsigned int)(n >> 37)) & 0x0001ffc0);
+}
+
+static __inline unsigned int
+get_BrType_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 54)) & 0x1f);
+}
+
+static __inline unsigned int
+get_Dest_Imm8_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 31)) & 0x0000003f) |
+         (((unsigned int)(n >> 43)) & 0x000000c0);
+}
+
+static __inline unsigned int
+get_Dest_X0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 0)) & 0x3f);
+}
+
+static __inline unsigned int
+get_Dest_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 31)) & 0x3f);
+}
+
+static __inline unsigned int
+get_Dest_Y0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 0)) & 0x3f);
+}
+
+static __inline unsigned int
+get_Dest_Y1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 31)) & 0x3f);
+}
+
+static __inline unsigned int
+get_Imm16_X0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0xffff);
+}
+
+static __inline unsigned int
+get_Imm16_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0xffff);
+}
+
+static __inline unsigned int
+get_Imm8OpcodeExtension_X0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 20)) & 0xff);
+}
+
+static __inline unsigned int
+get_Imm8OpcodeExtension_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 51)) & 0xff);
+}
+
+static __inline unsigned int
+get_Imm8_X0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0xff);
+}
+
+static __inline unsigned int
+get_Imm8_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0xff);
+}
+
+static __inline unsigned int
+get_Imm8_Y0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0xff);
+}
+
+static __inline unsigned int
+get_Imm8_Y1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0xff);
+}
+
+static __inline unsigned int
+get_JumpOff_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 31)) & 0x7ffffff);
+}
+
+static __inline unsigned int
+get_JumpOpcodeExtension_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 58)) & 0x1);
+}
+
+static __inline unsigned int
+get_MF_Imm14_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 37)) & 0x3fff);
+}
+
+static __inline unsigned int
+get_MT_Imm14_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 31)) & 0x0000003f) |
+         (((unsigned int)(n >> 37)) & 0x00003fc0);
+}
+
+static __inline unsigned int
+get_Mode(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 62)) & 0x3);
+}
+
+static __inline unsigned int
+get_Opcode_X0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 28)) & 0x7);
+}
+
+static __inline unsigned int
+get_Opcode_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 59)) & 0x7);
+}
+
+static __inline unsigned int
+get_Opcode_Y0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 27)) & 0xf);
+}
+
+static __inline unsigned int
+get_Opcode_Y1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 58)) & 0xf);
+}
+
+static __inline unsigned int
+get_Opcode_Y2(tilegx_bundle_bits n)
+{
+  return (((n >> 26)) & 0x00000001) |
+         (((unsigned int)(n >> 56)) & 0x00000002);
+}
+
+static __inline unsigned int
+get_RRROpcodeExtension_X0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 18)) & 0x3ff);
+}
+
+static __inline unsigned int
+get_RRROpcodeExtension_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 49)) & 0x3ff);
+}
+
+static __inline unsigned int
+get_RRROpcodeExtension_Y0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 18)) & 0x3);
+}
+
+static __inline unsigned int
+get_RRROpcodeExtension_Y1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 49)) & 0x3);
+}
+
+static __inline unsigned int
+get_ShAmt_X0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0x3f);
+}
+
+static __inline unsigned int
+get_ShAmt_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0x3f);
+}
+
+static __inline unsigned int
+get_ShAmt_Y0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0x3f);
+}
+
+static __inline unsigned int
+get_ShAmt_Y1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0x3f);
+}
+
+static __inline unsigned int
+get_ShiftOpcodeExtension_X0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 18)) & 0x3ff);
+}
+
+static __inline unsigned int
+get_ShiftOpcodeExtension_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 49)) & 0x3ff);
+}
+
+static __inline unsigned int
+get_ShiftOpcodeExtension_Y0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 18)) & 0x3);
+}
+
+static __inline unsigned int
+get_ShiftOpcodeExtension_Y1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 49)) & 0x3);
+}
+
+static __inline unsigned int
+get_SrcA_X0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 6)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcA_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 37)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcA_Y0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 6)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcA_Y1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 37)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcA_Y2(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 20)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcBDest_Y2(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 51)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcB_X0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcB_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcB_Y0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0x3f);
+}
+
+static __inline unsigned int
+get_SrcB_Y1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0x3f);
+}
+
+static __inline unsigned int
+get_UnaryOpcodeExtension_X0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0x3f);
+}
+
+static __inline unsigned int
+get_UnaryOpcodeExtension_X1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0x3f);
+}
+
+static __inline unsigned int
+get_UnaryOpcodeExtension_Y0(tilegx_bundle_bits num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((n >> 12)) & 0x3f);
+}
+
+static __inline unsigned int
+get_UnaryOpcodeExtension_Y1(tilegx_bundle_bits n)
+{
+  return (((unsigned int)(n >> 43)) & 0x3f);
+}
+
+static __inline int
+sign_extend(int n, int num_bits)
+{
+  int shift = (int)(sizeof(int) * 8 - num_bits);
+  return (n << shift) >> shift;
+}
+
+static __inline tilegx_bundle_bits
+create_BFEnd_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 12);
+}
+
+static __inline tilegx_bundle_bits
+create_BFOpcodeExtension_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0xf) << 24);
+}
+
+static __inline tilegx_bundle_bits
+create_BFStart_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 18);
+}
+
+static __inline tilegx_bundle_bits
+create_BrOff_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x0000003f)) << 31) |
+         (((tilegx_bundle_bits)(n & 0x0001ffc0)) << 37);
+}
+
+static __inline tilegx_bundle_bits
+create_BrType_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x1f)) << 54);
+}
+
+static __inline tilegx_bundle_bits
+create_Dest_Imm8_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x0000003f)) << 31) |
+         (((tilegx_bundle_bits)(n & 0x000000c0)) << 43);
+}
+
+static __inline tilegx_bundle_bits
+create_Dest_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 0);
+}
+
+static __inline tilegx_bundle_bits
+create_Dest_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3f)) << 31);
+}
+
+static __inline tilegx_bundle_bits
+create_Dest_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 0);
+}
+
+static __inline tilegx_bundle_bits
+create_Dest_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3f)) << 31);
+}
+
+static __inline tilegx_bundle_bits
+create_Imm16_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0xffff) << 12);
+}
+
+static __inline tilegx_bundle_bits
+create_Imm16_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0xffff)) << 43);
+}
+
+static __inline tilegx_bundle_bits
+create_Imm8OpcodeExtension_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0xff) << 20);
+}
+
+static __inline tilegx_bundle_bits
+create_Imm8OpcodeExtension_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0xff)) << 51);
+}
+
+static __inline tilegx_bundle_bits
+create_Imm8_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0xff) << 12);
+}
+
+static __inline tilegx_bundle_bits
+create_Imm8_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0xff)) << 43);
+}
+
+static __inline tilegx_bundle_bits
+create_Imm8_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0xff) << 12);
+}
+
+static __inline tilegx_bundle_bits
+create_Imm8_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0xff)) << 43);
+}
+
+static __inline tilegx_bundle_bits
+create_JumpOff_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x7ffffff)) << 31);
+}
+
+static __inline tilegx_bundle_bits
+create_JumpOpcodeExtension_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x1)) << 58);
+}
+
+static __inline tilegx_bundle_bits
+create_MF_Imm14_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3fff)) << 37);
+}
+
+static __inline tilegx_bundle_bits
+create_MT_Imm14_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x0000003f)) << 31) |
+         (((tilegx_bundle_bits)(n & 0x00003fc0)) << 37);
+}
+
+static __inline tilegx_bundle_bits
+create_Mode(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3)) << 62);
+}
+
+static __inline tilegx_bundle_bits
+create_Opcode_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x7) << 28);
+}
+
+static __inline tilegx_bundle_bits
+create_Opcode_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x7)) << 59);
+}
+
+static __inline tilegx_bundle_bits
+create_Opcode_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0xf) << 27);
+}
+
+static __inline tilegx_bundle_bits
+create_Opcode_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0xf)) << 58);
+}
+
+static __inline tilegx_bundle_bits
+create_Opcode_Y2(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x00000001) << 26) |
+         (((tilegx_bundle_bits)(n & 0x00000002)) << 56);
+}
+
+static __inline tilegx_bundle_bits
+create_RRROpcodeExtension_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3ff) << 18);
+}
+
+static __inline tilegx_bundle_bits
+create_RRROpcodeExtension_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3ff)) << 49);
+}
+
+static __inline tilegx_bundle_bits
+create_RRROpcodeExtension_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3) << 18);
+}
+
+static __inline tilegx_bundle_bits
+create_RRROpcodeExtension_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3)) << 49);
+}
+
+static __inline tilegx_bundle_bits
+create_ShAmt_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 12);
+}
+
+static __inline tilegx_bundle_bits
+create_ShAmt_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3f)) << 43);
+}
+
+static __inline tilegx_bundle_bits
+create_ShAmt_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 12);
+}
+
+static __inline tilegx_bundle_bits
+create_ShAmt_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3f)) << 43);
+}
+
+static __inline tilegx_bundle_bits
+create_ShiftOpcodeExtension_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3ff) << 18);
+}
+
+static __inline tilegx_bundle_bits
+create_ShiftOpcodeExtension_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3ff)) << 49);
+}
+
+static __inline tilegx_bundle_bits
+create_ShiftOpcodeExtension_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3) << 18);
+}
+
+static __inline tilegx_bundle_bits
+create_ShiftOpcodeExtension_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3)) << 49);
+}
+
+static __inline tilegx_bundle_bits
+create_SrcA_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 6);
+}
+
+static __inline tilegx_bundle_bits
+create_SrcA_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3f)) << 37);
+}
+
+static __inline tilegx_bundle_bits
+create_SrcA_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 6);
+}
+
+static __inline tilegx_bundle_bits
+create_SrcA_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3f)) << 37);
+}
+
+static __inline tilegx_bundle_bits
+create_SrcA_Y2(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 20);
+}
+
+static __inline tilegx_bundle_bits
+create_SrcBDest_Y2(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3f)) << 51);
+}
+
+static __inline tilegx_bundle_bits
+create_SrcB_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 12);
+}
+
+static __inline tilegx_bundle_bits
+create_SrcB_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3f)) << 43);
+}
+
+static __inline tilegx_bundle_bits
+create_SrcB_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 12);
+}
+
+static __inline tilegx_bundle_bits
+create_SrcB_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3f)) << 43);
+}
+
+static __inline tilegx_bundle_bits
+create_UnaryOpcodeExtension_X0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 12);
+}
+
+static __inline tilegx_bundle_bits
+create_UnaryOpcodeExtension_X1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3f)) << 43);
+}
+
+static __inline tilegx_bundle_bits
+create_UnaryOpcodeExtension_Y0(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return ((n & 0x3f) << 12);
+}
+
+static __inline tilegx_bundle_bits
+create_UnaryOpcodeExtension_Y1(int num)
+{
+  const unsigned int n = (unsigned int)num;
+  return (((tilegx_bundle_bits)(n & 0x3f)) << 43);
+}
+
+const struct tilegx_opcode tilegx_opcodes[336] =
+{
+ { "bpt", TILEGX_OPC_BPT, 0x2, 0, TREG_ZERO, 0,
+    { { 0, }, {  }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xffffffff80000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286a44ae00000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "info", TILEGX_OPC_INFO, 0xf, 1, TREG_ZERO, 1,
+    { { 0 }, { 1 }, { 2 }, { 3 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ff00fffULL,
+      0xfff807ff80000000ULL,
+      0x0000000078000fffULL,
+      0x3c0007ff80000000ULL,
+      0ULL
+    },
+    {
+      0x0000000040300fffULL,
+      0x181807ff80000000ULL,
+      0x0000000010000fffULL,
+      0x0c0007ff80000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "infol", TILEGX_OPC_INFOL, 0x3, 1, TREG_ZERO, 1,
+    { { 4 }, { 5 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc000000070000fffULL,
+      0xf80007ff80000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000070000fffULL,
+      0x380007ff80000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ld4s_tls", TILEGX_OPC_LD4S_TLS, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1858000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ld_tls", TILEGX_OPC_LD_TLS, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x18a0000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "move", TILEGX_OPC_MOVE, 0xf, 2, TREG_ZERO, 1,
+    { { 8, 9 }, { 6, 7 }, { 10, 11 }, { 12, 13 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffff000ULL,
+      0xfffff80000000000ULL,
+      0x00000000780ff000ULL,
+      0x3c07f80000000000ULL,
+      0ULL
+    },
+    {
+      0x000000005107f000ULL,
+      0x283bf80000000000ULL,
+      0x00000000500bf000ULL,
+      0x2c05f80000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "movei", TILEGX_OPC_MOVEI, 0xf, 2, TREG_ZERO, 1,
+    { { 8, 0 }, { 6, 1 }, { 10, 2 }, { 12, 3 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ff00fc0ULL,
+      0xfff807e000000000ULL,
+      0x0000000078000fc0ULL,
+      0x3c0007e000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000040100fc0ULL,
+      0x180807e000000000ULL,
+      0x0000000000000fc0ULL,
+      0x040007e000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "moveli", TILEGX_OPC_MOVELI, 0x3, 2, TREG_ZERO, 1,
+    { { 8, 4 }, { 6, 5 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc000000070000fc0ULL,
+      0xf80007e000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000010000fc0ULL,
+      0x000007e000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "prefetch", TILEGX_OPC_PREFETCH, 0x12, 1, TREG_ZERO, 1,
+    { { 0, }, { 7 }, { 0, }, { 0, }, { 14 } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff81f80000000ULL,
+      0ULL,
+      0ULL,
+      0xc3f8000004000000ULL
+    },
+    {
+      -1ULL,
+      0x286a801f80000000ULL,
+      -1ULL,
+      -1ULL,
+      0x41f8000004000000ULL
+    }
+#endif
+  },
+  { "prefetch_add_l1", TILEGX_OPC_PREFETCH_ADD_L1, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 15, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8001f80000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1840001f80000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "prefetch_add_l1_fault", TILEGX_OPC_PREFETCH_ADD_L1_FAULT, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 15, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8001f80000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1838001f80000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "prefetch_add_l2", TILEGX_OPC_PREFETCH_ADD_L2, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 15, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8001f80000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1850001f80000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "prefetch_add_l2_fault", TILEGX_OPC_PREFETCH_ADD_L2_FAULT, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 15, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8001f80000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1848001f80000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "prefetch_add_l3", TILEGX_OPC_PREFETCH_ADD_L3, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 15, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8001f80000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1860001f80000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "prefetch_add_l3_fault", TILEGX_OPC_PREFETCH_ADD_L3_FAULT, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 15, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8001f80000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1858001f80000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "prefetch_l1", TILEGX_OPC_PREFETCH_L1, 0x12, 1, TREG_ZERO, 1,
+    { { 0, }, { 7 }, { 0, }, { 0, }, { 14 } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff81f80000000ULL,
+      0ULL,
+      0ULL,
+      0xc3f8000004000000ULL
+    },
+    {
+      -1ULL,
+      0x286a801f80000000ULL,
+      -1ULL,
+      -1ULL,
+      0x41f8000004000000ULL
+    }
+#endif
+  },
+  { "prefetch_l1_fault", TILEGX_OPC_PREFETCH_L1_FAULT, 0x12, 1, TREG_ZERO, 1,
+    { { 0, }, { 7 }, { 0, }, { 0, }, { 14 } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff81f80000000ULL,
+      0ULL,
+      0ULL,
+      0xc3f8000004000000ULL
+    },
+    {
+      -1ULL,
+      0x286a781f80000000ULL,
+      -1ULL,
+      -1ULL,
+      0x41f8000000000000ULL
+    }
+#endif
+  },
+  { "prefetch_l2", TILEGX_OPC_PREFETCH_L2, 0x12, 1, TREG_ZERO, 1,
+    { { 0, }, { 7 }, { 0, }, { 0, }, { 14 } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff81f80000000ULL,
+      0ULL,
+      0ULL,
+      0xc3f8000004000000ULL
+    },
+    {
+      -1ULL,
+      0x286a901f80000000ULL,
+      -1ULL,
+      -1ULL,
+      0x43f8000004000000ULL
+    }
+#endif
+  },
+  { "prefetch_l2_fault", TILEGX_OPC_PREFETCH_L2_FAULT, 0x12, 1, TREG_ZERO, 1,
+    { { 0, }, { 7 }, { 0, }, { 0, }, { 14 } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff81f80000000ULL,
+      0ULL,
+      0ULL,
+      0xc3f8000004000000ULL
+    },
+    {
+      -1ULL,
+      0x286a881f80000000ULL,
+      -1ULL,
+      -1ULL,
+      0x43f8000000000000ULL
+    }
+#endif
+  },
+  { "prefetch_l3", TILEGX_OPC_PREFETCH_L3, 0x12, 1, TREG_ZERO, 1,
+    { { 0, }, { 7 }, { 0, }, { 0, }, { 14 } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff81f80000000ULL,
+      0ULL,
+      0ULL,
+      0xc3f8000004000000ULL
+    },
+    {
+      -1ULL,
+      0x286aa01f80000000ULL,
+      -1ULL,
+      -1ULL,
+      0x83f8000000000000ULL
+    }
+#endif
+  },
+  { "prefetch_l3_fault", TILEGX_OPC_PREFETCH_L3_FAULT, 0x12, 1, TREG_ZERO, 1,
+    { { 0, }, { 7 }, { 0, }, { 0, }, { 14 } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff81f80000000ULL,
+      0ULL,
+      0ULL,
+      0xc3f8000004000000ULL
+    },
+    {
+      -1ULL,
+      0x286a981f80000000ULL,
+      -1ULL,
+      -1ULL,
+      0x81f8000004000000ULL
+    }
+#endif
+  },
+  { "raise", TILEGX_OPC_RAISE, 0x2, 0, TREG_ZERO, 1,
+    { { 0, }, {  }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xffffffff80000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286a44ae80000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "add", TILEGX_OPC_ADD, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x00000000500c0000ULL,
+      0x2806000000000000ULL,
+      0x0000000028040000ULL,
+      0x1802000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "addi", TILEGX_OPC_ADDI, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 0 }, { 6, 7, 1 }, { 10, 11, 2 }, { 12, 13, 3 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0x0000000078000000ULL,
+      0x3c00000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000040100000ULL,
+      0x1808000000000000ULL,
+      0ULL,
+      0x0400000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "addli", TILEGX_OPC_ADDLI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 4 }, { 6, 7, 5 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc000000070000000ULL,
+      0xf800000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000010000000ULL,
+      0ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "addx", TILEGX_OPC_ADDX, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000050080000ULL,
+      0x2804000000000000ULL,
+      0x0000000028000000ULL,
+      0x1800000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "addxi", TILEGX_OPC_ADDXI, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 0 }, { 6, 7, 1 }, { 10, 11, 2 }, { 12, 13, 3 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0x0000000078000000ULL,
+      0x3c00000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000040200000ULL,
+      0x1810000000000000ULL,
+      0x0000000008000000ULL,
+      0x0800000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "addxli", TILEGX_OPC_ADDXLI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 4 }, { 6, 7, 5 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc000000070000000ULL,
+      0xf800000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000020000000ULL,
+      0x0800000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "addxsc", TILEGX_OPC_ADDXSC, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050040000ULL,
+      0x2802000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "and", TILEGX_OPC_AND, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000050100000ULL,
+      0x2808000000000000ULL,
+      0x0000000050000000ULL,
+      0x2c00000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "andi", TILEGX_OPC_ANDI, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 0 }, { 6, 7, 1 }, { 10, 11, 2 }, { 12, 13, 3 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0x0000000078000000ULL,
+      0x3c00000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000040300000ULL,
+      0x1818000000000000ULL,
+      0x0000000010000000ULL,
+      0x0c00000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "beqz", TILEGX_OPC_BEQZ, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 7, 20 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xffc0000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1440000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "beqzt", TILEGX_OPC_BEQZT, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 7, 20 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xffc0000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1400000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "bfexts", TILEGX_OPC_BFEXTS, 0x1, 4, TREG_ZERO, 1,
+    { { 8, 9, 21, 22 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007f000000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000034000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "bfextu", TILEGX_OPC_BFEXTU, 0x1, 4, TREG_ZERO, 1,
+    { { 8, 9, 21, 22 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007f000000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000035000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "bfins", TILEGX_OPC_BFINS, 0x1, 4, TREG_ZERO, 1,
+    { { 23, 9, 21, 22 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007f000000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000036000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "bgez", TILEGX_OPC_BGEZ, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 7, 20 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xffc0000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x14c0000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "bgezt", TILEGX_OPC_BGEZT, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 7, 20 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xffc0000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1480000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "bgtz", TILEGX_OPC_BGTZ, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 7, 20 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xffc0000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1540000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "bgtzt", TILEGX_OPC_BGTZT, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 7, 20 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xffc0000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1500000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "blbc", TILEGX_OPC_BLBC, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 7, 20 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xffc0000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x15c0000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "blbct", TILEGX_OPC_BLBCT, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 7, 20 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xffc0000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1580000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "blbs", TILEGX_OPC_BLBS, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 7, 20 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xffc0000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1640000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "blbst", TILEGX_OPC_BLBST, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 7, 20 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xffc0000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1600000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "blez", TILEGX_OPC_BLEZ, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 7, 20 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xffc0000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x16c0000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "blezt", TILEGX_OPC_BLEZT, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 7, 20 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xffc0000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1680000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "bltz", TILEGX_OPC_BLTZ, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 7, 20 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xffc0000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1740000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "bltzt", TILEGX_OPC_BLTZT, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 7, 20 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xffc0000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1700000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "bnez", TILEGX_OPC_BNEZ, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 7, 20 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xffc0000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x17c0000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "bnezt", TILEGX_OPC_BNEZT, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 7, 20 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xffc0000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1780000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "clz", TILEGX_OPC_CLZ, 0x5, 2, TREG_ZERO, 1,
+    { { 8, 9 }, { 0, }, { 10, 11 }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffff000ULL,
+      0ULL,
+      0x00000000780ff000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051481000ULL,
+      -1ULL,
+      0x00000000300c1000ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "cmoveqz", TILEGX_OPC_CMOVEQZ, 0x5, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 24, 11, 18 }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0x00000000780c0000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050140000ULL,
+      -1ULL,
+      0x0000000048000000ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "cmovnez", TILEGX_OPC_CMOVNEZ, 0x5, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 24, 11, 18 }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0x00000000780c0000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050180000ULL,
+      -1ULL,
+      0x0000000048040000ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "cmpeq", TILEGX_OPC_CMPEQ, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x00000000501c0000ULL,
+      0x280a000000000000ULL,
+      0x0000000040000000ULL,
+      0x2404000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "cmpeqi", TILEGX_OPC_CMPEQI, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 0 }, { 6, 7, 1 }, { 10, 11, 2 }, { 12, 13, 3 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0x0000000078000000ULL,
+      0x3c00000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000040400000ULL,
+      0x1820000000000000ULL,
+      0x0000000018000000ULL,
+      0x1000000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "cmpexch", TILEGX_OPC_CMPEXCH, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x280e000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "cmpexch4", TILEGX_OPC_CMPEXCH4, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x280c000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "cmples", TILEGX_OPC_CMPLES, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000050200000ULL,
+      0x2810000000000000ULL,
+      0x0000000038000000ULL,
+      0x2000000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "cmpleu", TILEGX_OPC_CMPLEU, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000050240000ULL,
+      0x2812000000000000ULL,
+      0x0000000038040000ULL,
+      0x2002000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "cmplts", TILEGX_OPC_CMPLTS, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000050280000ULL,
+      0x2814000000000000ULL,
+      0x0000000038080000ULL,
+      0x2004000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "cmpltsi", TILEGX_OPC_CMPLTSI, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 0 }, { 6, 7, 1 }, { 10, 11, 2 }, { 12, 13, 3 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0x0000000078000000ULL,
+      0x3c00000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000040500000ULL,
+      0x1828000000000000ULL,
+      0x0000000020000000ULL,
+      0x1400000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "cmpltu", TILEGX_OPC_CMPLTU, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x00000000502c0000ULL,
+      0x2816000000000000ULL,
+      0x00000000380c0000ULL,
+      0x2006000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "cmpltui", TILEGX_OPC_CMPLTUI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 0 }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000040600000ULL,
+      0x1830000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "cmpne", TILEGX_OPC_CMPNE, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000050300000ULL,
+      0x2818000000000000ULL,
+      0x0000000040040000ULL,
+      0x2406000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "cmul", TILEGX_OPC_CMUL, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000504c0000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "cmula", TILEGX_OPC_CMULA, 0x1, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050380000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "cmulaf", TILEGX_OPC_CMULAF, 0x1, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050340000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "cmulf", TILEGX_OPC_CMULF, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050400000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "cmulfr", TILEGX_OPC_CMULFR, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000503c0000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "cmulh", TILEGX_OPC_CMULH, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050480000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "cmulhr", TILEGX_OPC_CMULHR, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050440000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "crc32_32", TILEGX_OPC_CRC32_32, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050500000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "crc32_8", TILEGX_OPC_CRC32_8, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050540000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ctz", TILEGX_OPC_CTZ, 0x5, 2, TREG_ZERO, 1,
+    { { 8, 9 }, { 0, }, { 10, 11 }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffff000ULL,
+      0ULL,
+      0x00000000780ff000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051482000ULL,
+      -1ULL,
+      0x00000000300c2000ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "dblalign", TILEGX_OPC_DBLALIGN, 0x1, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050640000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "dblalign2", TILEGX_OPC_DBLALIGN2, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050580000ULL,
+      0x281a000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "dblalign4", TILEGX_OPC_DBLALIGN4, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000505c0000ULL,
+      0x281c000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "dblalign6", TILEGX_OPC_DBLALIGN6, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050600000ULL,
+      0x281e000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "drain", TILEGX_OPC_DRAIN, 0x2, 0, TREG_ZERO, 0,
+    { { 0, }, {  }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286a080000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "dtlbpr", TILEGX_OPC_DTLBPR, 0x2, 1, TREG_ZERO, 1,
+    { { 0, }, { 7 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286a100000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "exch", TILEGX_OPC_EXCH, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x2822000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "exch4", TILEGX_OPC_EXCH4, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x2820000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "fdouble_add_flags", TILEGX_OPC_FDOUBLE_ADD_FLAGS, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000506c0000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "fdouble_addsub", TILEGX_OPC_FDOUBLE_ADDSUB, 0x1, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050680000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "fdouble_mul_flags", TILEGX_OPC_FDOUBLE_MUL_FLAGS, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050700000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "fdouble_pack1", TILEGX_OPC_FDOUBLE_PACK1, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050740000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "fdouble_pack2", TILEGX_OPC_FDOUBLE_PACK2, 0x1, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050780000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "fdouble_sub_flags", TILEGX_OPC_FDOUBLE_SUB_FLAGS, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000507c0000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "fdouble_unpack_max", TILEGX_OPC_FDOUBLE_UNPACK_MAX, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050800000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "fdouble_unpack_min", TILEGX_OPC_FDOUBLE_UNPACK_MIN, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050840000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "fetchadd", TILEGX_OPC_FETCHADD, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x282a000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "fetchadd4", TILEGX_OPC_FETCHADD4, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x2824000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "fetchaddgez", TILEGX_OPC_FETCHADDGEZ, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x2828000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "fetchaddgez4", TILEGX_OPC_FETCHADDGEZ4, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x2826000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "fetchand", TILEGX_OPC_FETCHAND, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x282e000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "fetchand4", TILEGX_OPC_FETCHAND4, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x282c000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "fetchor", TILEGX_OPC_FETCHOR, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x2832000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "fetchor4", TILEGX_OPC_FETCHOR4, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x2830000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "finv", TILEGX_OPC_FINV, 0x2, 1, TREG_ZERO, 1,
+    { { 0, }, { 7 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286a180000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "flush", TILEGX_OPC_FLUSH, 0x2, 1, TREG_ZERO, 1,
+    { { 0, }, { 7 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286a280000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "flushwb", TILEGX_OPC_FLUSHWB, 0x2, 0, TREG_ZERO, 1,
+    { { 0, }, {  }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286a200000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "fnop", TILEGX_OPC_FNOP, 0xf, 0, TREG_ZERO, 1,
+    { {  }, {  }, {  }, {  }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffff000ULL,
+      0xfffff80000000000ULL,
+      0x00000000780ff000ULL,
+      0x3c07f80000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000051483000ULL,
+      0x286a300000000000ULL,
+      0x00000000300c3000ULL,
+      0x1c06400000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "fsingle_add1", TILEGX_OPC_FSINGLE_ADD1, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050880000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "fsingle_addsub2", TILEGX_OPC_FSINGLE_ADDSUB2, 0x1, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000508c0000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "fsingle_mul1", TILEGX_OPC_FSINGLE_MUL1, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050900000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "fsingle_mul2", TILEGX_OPC_FSINGLE_MUL2, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050940000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "fsingle_pack1", TILEGX_OPC_FSINGLE_PACK1, 0x5, 2, TREG_ZERO, 1,
+    { { 8, 9 }, { 0, }, { 10, 11 }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffff000ULL,
+      0ULL,
+      0x00000000780ff000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051484000ULL,
+      -1ULL,
+      0x00000000300c4000ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "fsingle_pack2", TILEGX_OPC_FSINGLE_PACK2, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050980000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "fsingle_sub1", TILEGX_OPC_FSINGLE_SUB1, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000509c0000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "icoh", TILEGX_OPC_ICOH, 0x2, 1, TREG_ZERO, 1,
+    { { 0, }, { 7 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286a380000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ill", TILEGX_OPC_ILL, 0xa, 0, TREG_ZERO, 1,
+    { { 0, }, {  }, { 0, }, {  }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0x3c07f80000000000ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286a400000000000ULL,
+      -1ULL,
+      0x1c06480000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "inv", TILEGX_OPC_INV, 0x2, 1, TREG_ZERO, 1,
+    { { 0, }, { 7 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286a480000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "iret", TILEGX_OPC_IRET, 0x2, 0, TREG_ZERO, 1,
+    { { 0, }, {  }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286a500000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "j", TILEGX_OPC_J, 0x2, 1, TREG_ZERO, 1,
+    { { 0, }, { 25 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfc00000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x2400000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "jal", TILEGX_OPC_JAL, 0x2, 1, TREG_LR, 1,
+    { { 0, }, { 25 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfc00000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x2000000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "jalr", TILEGX_OPC_JALR, 0xa, 1, TREG_LR, 1,
+    { { 0, }, { 7 }, { 0, }, { 13 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0x3c07f80000000000ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286a600000000000ULL,
+      -1ULL,
+      0x1c06580000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "jalrp", TILEGX_OPC_JALRP, 0xa, 1, TREG_LR, 1,
+    { { 0, }, { 7 }, { 0, }, { 13 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0x3c07f80000000000ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286a580000000000ULL,
+      -1ULL,
+      0x1c06500000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "jr", TILEGX_OPC_JR, 0xa, 1, TREG_ZERO, 1,
+    { { 0, }, { 7 }, { 0, }, { 13 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0x3c07f80000000000ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286a700000000000ULL,
+      -1ULL,
+      0x1c06680000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "jrp", TILEGX_OPC_JRP, 0xa, 1, TREG_ZERO, 1,
+    { { 0, }, { 7 }, { 0, }, { 13 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0x3c07f80000000000ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286a680000000000ULL,
+      -1ULL,
+      0x1c06600000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ld", TILEGX_OPC_LD, 0x12, 2, TREG_ZERO, 1,
+    { { 0, }, { 6, 7 }, { 0, }, { 0, }, { 26, 14 } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0xc200000004000000ULL
+    },
+    {
+      -1ULL,
+      0x286ae80000000000ULL,
+      -1ULL,
+      -1ULL,
+      0x8200000004000000ULL
+    }
+#endif
+  },
+  { "ld1s", TILEGX_OPC_LD1S, 0x12, 2, TREG_ZERO, 1,
+    { { 0, }, { 6, 7 }, { 0, }, { 0, }, { 26, 14 } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0xc200000004000000ULL
+    },
+    {
+      -1ULL,
+      0x286a780000000000ULL,
+      -1ULL,
+      -1ULL,
+      0x4000000000000000ULL
+    }
+#endif
+  },
+  { "ld1s_add", TILEGX_OPC_LD1S_ADD, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 15, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1838000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ld1u", TILEGX_OPC_LD1U, 0x12, 2, TREG_ZERO, 1,
+    { { 0, }, { 6, 7 }, { 0, }, { 0, }, { 26, 14 } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0xc200000004000000ULL
+    },
+    {
+      -1ULL,
+      0x286a800000000000ULL,
+      -1ULL,
+      -1ULL,
+      0x4000000004000000ULL
+    }
+#endif
+  },
+  { "ld1u_add", TILEGX_OPC_LD1U_ADD, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 15, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1840000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ld2s", TILEGX_OPC_LD2S, 0x12, 2, TREG_ZERO, 1,
+    { { 0, }, { 6, 7 }, { 0, }, { 0, }, { 26, 14 } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0xc200000004000000ULL
+    },
+    {
+      -1ULL,
+      0x286a880000000000ULL,
+      -1ULL,
+      -1ULL,
+      0x4200000000000000ULL
+    }
+#endif
+  },
+  { "ld2s_add", TILEGX_OPC_LD2S_ADD, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 15, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1848000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ld2u", TILEGX_OPC_LD2U, 0x12, 2, TREG_ZERO, 1,
+    { { 0, }, { 6, 7 }, { 0, }, { 0, }, { 26, 14 } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0xc200000004000000ULL
+    },
+    {
+      -1ULL,
+      0x286a900000000000ULL,
+      -1ULL,
+      -1ULL,
+      0x4200000004000000ULL
+    }
+#endif
+  },
+  { "ld2u_add", TILEGX_OPC_LD2U_ADD, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 15, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1850000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ld4s", TILEGX_OPC_LD4S, 0x12, 2, TREG_ZERO, 1,
+    { { 0, }, { 6, 7 }, { 0, }, { 0, }, { 26, 14 } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0xc200000004000000ULL
+    },
+    {
+      -1ULL,
+      0x286a980000000000ULL,
+      -1ULL,
+      -1ULL,
+      0x8000000004000000ULL
+    }
+#endif
+  },
+  { "ld4s_add", TILEGX_OPC_LD4S_ADD, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 15, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1858000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ld4u", TILEGX_OPC_LD4U, 0x12, 2, TREG_ZERO, 1,
+    { { 0, }, { 6, 7 }, { 0, }, { 0, }, { 26, 14 } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0xc200000004000000ULL
+    },
+    {
+      -1ULL,
+      0x286aa00000000000ULL,
+      -1ULL,
+      -1ULL,
+      0x8200000000000000ULL
+    }
+#endif
+  },
+  { "ld4u_add", TILEGX_OPC_LD4U_ADD, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 15, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1860000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ld_add", TILEGX_OPC_LD_ADD, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 15, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x18a0000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ldna", TILEGX_OPC_LDNA, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 6, 7 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286aa80000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ldna_add", TILEGX_OPC_LDNA_ADD, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 15, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x18a8000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ldnt", TILEGX_OPC_LDNT, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 6, 7 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286ae00000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ldnt1s", TILEGX_OPC_LDNT1S, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 6, 7 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286ab00000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ldnt1s_add", TILEGX_OPC_LDNT1S_ADD, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 15, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1868000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ldnt1u", TILEGX_OPC_LDNT1U, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 6, 7 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286ab80000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ldnt1u_add", TILEGX_OPC_LDNT1U_ADD, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 15, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1870000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ldnt2s", TILEGX_OPC_LDNT2S, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 6, 7 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286ac00000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ldnt2s_add", TILEGX_OPC_LDNT2S_ADD, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 15, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1878000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ldnt2u", TILEGX_OPC_LDNT2U, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 6, 7 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286ac80000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ldnt2u_add", TILEGX_OPC_LDNT2U_ADD, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 15, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1880000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ldnt4s", TILEGX_OPC_LDNT4S, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 6, 7 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286ad00000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ldnt4s_add", TILEGX_OPC_LDNT4S_ADD, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 15, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1888000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ldnt4u", TILEGX_OPC_LDNT4U, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 6, 7 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286ad80000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ldnt4u_add", TILEGX_OPC_LDNT4U_ADD, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 15, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1890000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ldnt_add", TILEGX_OPC_LDNT_ADD, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 6, 15, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1898000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "lnk", TILEGX_OPC_LNK, 0xa, 1, TREG_ZERO, 1,
+    { { 0, }, { 6 }, { 0, }, { 12 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0x3c07f80000000000ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286af00000000000ULL,
+      -1ULL,
+      0x1c06700000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mf", TILEGX_OPC_MF, 0x2, 0, TREG_ZERO, 1,
+    { { 0, }, {  }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286af80000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mfspr", TILEGX_OPC_MFSPR, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 6, 27 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x18b0000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mm", TILEGX_OPC_MM, 0x1, 4, TREG_ZERO, 1,
+    { { 23, 9, 21, 22 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007f000000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000037000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mnz", TILEGX_OPC_MNZ, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000050a00000ULL,
+      0x2834000000000000ULL,
+      0x0000000048080000ULL,
+      0x2804000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mtspr", TILEGX_OPC_MTSPR, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 28, 7 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x18b8000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mul_hs_hs", TILEGX_OPC_MUL_HS_HS, 0x5, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 10, 11, 18 }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0x00000000780c0000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050d40000ULL,
+      -1ULL,
+      0x0000000068000000ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mul_hs_hu", TILEGX_OPC_MUL_HS_HU, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050d80000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mul_hs_ls", TILEGX_OPC_MUL_HS_LS, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050dc0000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mul_hs_lu", TILEGX_OPC_MUL_HS_LU, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050e00000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mul_hu_hu", TILEGX_OPC_MUL_HU_HU, 0x5, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 10, 11, 18 }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0x00000000780c0000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050e40000ULL,
+      -1ULL,
+      0x0000000068040000ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mul_hu_ls", TILEGX_OPC_MUL_HU_LS, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050e80000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mul_hu_lu", TILEGX_OPC_MUL_HU_LU, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050ec0000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mul_ls_ls", TILEGX_OPC_MUL_LS_LS, 0x5, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 10, 11, 18 }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0x00000000780c0000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050f00000ULL,
+      -1ULL,
+      0x0000000068080000ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mul_ls_lu", TILEGX_OPC_MUL_LS_LU, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050f40000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mul_lu_lu", TILEGX_OPC_MUL_LU_LU, 0x5, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 10, 11, 18 }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0x00000000780c0000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050f80000ULL,
+      -1ULL,
+      0x00000000680c0000ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mula_hs_hs", TILEGX_OPC_MULA_HS_HS, 0x5, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 24, 11, 18 }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0x00000000780c0000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050a80000ULL,
+      -1ULL,
+      0x0000000070000000ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mula_hs_hu", TILEGX_OPC_MULA_HS_HU, 0x1, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050ac0000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mula_hs_ls", TILEGX_OPC_MULA_HS_LS, 0x1, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050b00000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mula_hs_lu", TILEGX_OPC_MULA_HS_LU, 0x1, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050b40000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mula_hu_hu", TILEGX_OPC_MULA_HU_HU, 0x5, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 24, 11, 18 }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0x00000000780c0000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050b80000ULL,
+      -1ULL,
+      0x0000000070040000ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mula_hu_ls", TILEGX_OPC_MULA_HU_LS, 0x1, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050bc0000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mula_hu_lu", TILEGX_OPC_MULA_HU_LU, 0x1, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050c00000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mula_ls_ls", TILEGX_OPC_MULA_LS_LS, 0x5, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 24, 11, 18 }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0x00000000780c0000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050c40000ULL,
+      -1ULL,
+      0x0000000070080000ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mula_ls_lu", TILEGX_OPC_MULA_LS_LU, 0x1, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050c80000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mula_lu_lu", TILEGX_OPC_MULA_LU_LU, 0x5, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 24, 11, 18 }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0x00000000780c0000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050cc0000ULL,
+      -1ULL,
+      0x00000000700c0000ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mulax", TILEGX_OPC_MULAX, 0x5, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 24, 11, 18 }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0x00000000780c0000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050a40000ULL,
+      -1ULL,
+      0x0000000040080000ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mulx", TILEGX_OPC_MULX, 0x5, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 10, 11, 18 }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0x00000000780c0000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000050d00000ULL,
+      -1ULL,
+      0x00000000400c0000ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "mz", TILEGX_OPC_MZ, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000050fc0000ULL,
+      0x2836000000000000ULL,
+      0x00000000480c0000ULL,
+      0x2806000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "nap", TILEGX_OPC_NAP, 0x2, 0, TREG_ZERO, 0,
+    { { 0, }, {  }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286b000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "nop", TILEGX_OPC_NOP, 0xf, 0, TREG_ZERO, 1,
+    { {  }, {  }, {  }, {  }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffff000ULL,
+      0xfffff80000000000ULL,
+      0x00000000780ff000ULL,
+      0x3c07f80000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000051485000ULL,
+      0x286b080000000000ULL,
+      0x00000000300c5000ULL,
+      0x1c06780000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "nor", TILEGX_OPC_NOR, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000051000000ULL,
+      0x2838000000000000ULL,
+      0x0000000050040000ULL,
+      0x2c02000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "or", TILEGX_OPC_OR, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000051040000ULL,
+      0x283a000000000000ULL,
+      0x0000000050080000ULL,
+      0x2c04000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "ori", TILEGX_OPC_ORI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 0 }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000040700000ULL,
+      0x18c0000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "pcnt", TILEGX_OPC_PCNT, 0x5, 2, TREG_ZERO, 1,
+    { { 8, 9 }, { 0, }, { 10, 11 }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffff000ULL,
+      0ULL,
+      0x00000000780ff000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051486000ULL,
+      -1ULL,
+      0x00000000300c6000ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "revbits", TILEGX_OPC_REVBITS, 0x5, 2, TREG_ZERO, 1,
+    { { 8, 9 }, { 0, }, { 10, 11 }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffff000ULL,
+      0ULL,
+      0x00000000780ff000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051487000ULL,
+      -1ULL,
+      0x00000000300c7000ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "revbytes", TILEGX_OPC_REVBYTES, 0x5, 2, TREG_ZERO, 1,
+    { { 8, 9 }, { 0, }, { 10, 11 }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffff000ULL,
+      0ULL,
+      0x00000000780ff000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051488000ULL,
+      -1ULL,
+      0x00000000300c8000ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "rotl", TILEGX_OPC_ROTL, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000051080000ULL,
+      0x283c000000000000ULL,
+      0x0000000058000000ULL,
+      0x3000000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "rotli", TILEGX_OPC_ROTLI, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 29 }, { 6, 7, 30 }, { 10, 11, 31 }, { 12, 13, 32 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000060040000ULL,
+      0x3002000000000000ULL,
+      0x0000000078000000ULL,
+      0x3800000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "shl", TILEGX_OPC_SHL, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000051280000ULL,
+      0x284c000000000000ULL,
+      0x0000000058040000ULL,
+      0x3002000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "shl16insli", TILEGX_OPC_SHL16INSLI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 4 }, { 6, 7, 5 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc000000070000000ULL,
+      0xf800000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000070000000ULL,
+      0x3800000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "shl1add", TILEGX_OPC_SHL1ADD, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000051100000ULL,
+      0x2840000000000000ULL,
+      0x0000000030000000ULL,
+      0x1c00000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "shl1addx", TILEGX_OPC_SHL1ADDX, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x00000000510c0000ULL,
+      0x283e000000000000ULL,
+      0x0000000060040000ULL,
+      0x3402000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "shl2add", TILEGX_OPC_SHL2ADD, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000051180000ULL,
+      0x2844000000000000ULL,
+      0x0000000030040000ULL,
+      0x1c02000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "shl2addx", TILEGX_OPC_SHL2ADDX, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000051140000ULL,
+      0x2842000000000000ULL,
+      0x0000000060080000ULL,
+      0x3404000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "shl3add", TILEGX_OPC_SHL3ADD, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000051200000ULL,
+      0x2848000000000000ULL,
+      0x0000000030080000ULL,
+      0x1c04000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "shl3addx", TILEGX_OPC_SHL3ADDX, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x00000000511c0000ULL,
+      0x2846000000000000ULL,
+      0x00000000600c0000ULL,
+      0x3406000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "shli", TILEGX_OPC_SHLI, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 29 }, { 6, 7, 30 }, { 10, 11, 31 }, { 12, 13, 32 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000060080000ULL,
+      0x3004000000000000ULL,
+      0x0000000078040000ULL,
+      0x3802000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "shlx", TILEGX_OPC_SHLX, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051240000ULL,
+      0x284a000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "shlxi", TILEGX_OPC_SHLXI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 29 }, { 6, 7, 30 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000600c0000ULL,
+      0x3006000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "shrs", TILEGX_OPC_SHRS, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x00000000512c0000ULL,
+      0x284e000000000000ULL,
+      0x0000000058080000ULL,
+      0x3004000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "shrsi", TILEGX_OPC_SHRSI, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 29 }, { 6, 7, 30 }, { 10, 11, 31 }, { 12, 13, 32 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000060100000ULL,
+      0x3008000000000000ULL,
+      0x0000000078080000ULL,
+      0x3804000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "shru", TILEGX_OPC_SHRU, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000051340000ULL,
+      0x2852000000000000ULL,
+      0x00000000580c0000ULL,
+      0x3006000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "shrui", TILEGX_OPC_SHRUI, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 29 }, { 6, 7, 30 }, { 10, 11, 31 }, { 12, 13, 32 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000060140000ULL,
+      0x300a000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3806000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "shrux", TILEGX_OPC_SHRUX, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051300000ULL,
+      0x2850000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "shruxi", TILEGX_OPC_SHRUXI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 29 }, { 6, 7, 30 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000060180000ULL,
+      0x300c000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "shufflebytes", TILEGX_OPC_SHUFFLEBYTES, 0x1, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051380000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "st", TILEGX_OPC_ST, 0x12, 2, TREG_ZERO, 1,
+    { { 0, }, { 7, 17 }, { 0, }, { 0, }, { 14, 33 } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0xc200000004000000ULL
+    },
+    {
+      -1ULL,
+      0x2862000000000000ULL,
+      -1ULL,
+      -1ULL,
+      0xc200000004000000ULL
+    }
+#endif
+  },
+  { "st1", TILEGX_OPC_ST1, 0x12, 2, TREG_ZERO, 1,
+    { { 0, }, { 7, 17 }, { 0, }, { 0, }, { 14, 33 } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0xc200000004000000ULL
+    },
+    {
+      -1ULL,
+      0x2854000000000000ULL,
+      -1ULL,
+      -1ULL,
+      0xc000000000000000ULL
+    }
+#endif
+  },
+  { "st1_add", TILEGX_OPC_ST1_ADD, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x18c8000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "st2", TILEGX_OPC_ST2, 0x12, 2, TREG_ZERO, 1,
+    { { 0, }, { 7, 17 }, { 0, }, { 0, }, { 14, 33 } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0xc200000004000000ULL
+    },
+    {
+      -1ULL,
+      0x2856000000000000ULL,
+      -1ULL,
+      -1ULL,
+      0xc000000004000000ULL
+    }
+#endif
+  },
+  { "st2_add", TILEGX_OPC_ST2_ADD, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x18d0000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "st4", TILEGX_OPC_ST4, 0x12, 2, TREG_ZERO, 1,
+    { { 0, }, { 7, 17 }, { 0, }, { 0, }, { 14, 33 } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0xc200000004000000ULL
+    },
+    {
+      -1ULL,
+      0x2858000000000000ULL,
+      -1ULL,
+      -1ULL,
+      0xc200000000000000ULL
+    }
+#endif
+  },
+  { "st4_add", TILEGX_OPC_ST4_ADD, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x18d8000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "st_add", TILEGX_OPC_ST_ADD, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x1900000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "stnt", TILEGX_OPC_STNT, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x2860000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "stnt1", TILEGX_OPC_STNT1, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x285a000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "stnt1_add", TILEGX_OPC_STNT1_ADD, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x18e0000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "stnt2", TILEGX_OPC_STNT2, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x285c000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "stnt2_add", TILEGX_OPC_STNT2_ADD, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x18e8000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "stnt4", TILEGX_OPC_STNT4, 0x2, 2, TREG_ZERO, 1,
+    { { 0, }, { 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x285e000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "stnt4_add", TILEGX_OPC_STNT4_ADD, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x18f0000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "stnt_add", TILEGX_OPC_STNT_ADD, 0x2, 3, TREG_ZERO, 1,
+    { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x18f8000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "sub", TILEGX_OPC_SUB, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000051440000ULL,
+      0x2868000000000000ULL,
+      0x00000000280c0000ULL,
+      0x1806000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "subx", TILEGX_OPC_SUBX, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000051400000ULL,
+      0x2866000000000000ULL,
+      0x0000000028080000ULL,
+      0x1804000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "subxsc", TILEGX_OPC_SUBXSC, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000513c0000ULL,
+      0x2864000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "swint0", TILEGX_OPC_SWINT0, 0x2, 0, TREG_ZERO, 0,
+    { { 0, }, {  }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286b100000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "swint1", TILEGX_OPC_SWINT1, 0x2, 0, TREG_ZERO, 0,
+    { { 0, }, {  }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286b180000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "swint2", TILEGX_OPC_SWINT2, 0x2, 0, TREG_ZERO, 0,
+    { { 0, }, {  }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286b200000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "swint3", TILEGX_OPC_SWINT3, 0x2, 0, TREG_ZERO, 0,
+    { { 0, }, {  }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286b280000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "tblidxb0", TILEGX_OPC_TBLIDXB0, 0x5, 2, TREG_ZERO, 1,
+    { { 23, 9 }, { 0, }, { 24, 11 }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffff000ULL,
+      0ULL,
+      0x00000000780ff000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051489000ULL,
+      -1ULL,
+      0x00000000300c9000ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "tblidxb1", TILEGX_OPC_TBLIDXB1, 0x5, 2, TREG_ZERO, 1,
+    { { 23, 9 }, { 0, }, { 24, 11 }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffff000ULL,
+      0ULL,
+      0x00000000780ff000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x000000005148a000ULL,
+      -1ULL,
+      0x00000000300ca000ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "tblidxb2", TILEGX_OPC_TBLIDXB2, 0x5, 2, TREG_ZERO, 1,
+    { { 23, 9 }, { 0, }, { 24, 11 }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffff000ULL,
+      0ULL,
+      0x00000000780ff000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x000000005148b000ULL,
+      -1ULL,
+      0x00000000300cb000ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "tblidxb3", TILEGX_OPC_TBLIDXB3, 0x5, 2, TREG_ZERO, 1,
+    { { 23, 9 }, { 0, }, { 24, 11 }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffff000ULL,
+      0ULL,
+      0x00000000780ff000ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x000000005148c000ULL,
+      -1ULL,
+      0x00000000300cc000ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1add", TILEGX_OPC_V1ADD, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051500000ULL,
+      0x286e000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1addi", TILEGX_OPC_V1ADDI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 0 }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000040800000ULL,
+      0x1908000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1adduc", TILEGX_OPC_V1ADDUC, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000514c0000ULL,
+      0x286c000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1adiffu", TILEGX_OPC_V1ADIFFU, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051540000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1avgu", TILEGX_OPC_V1AVGU, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051580000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1cmpeq", TILEGX_OPC_V1CMPEQ, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000515c0000ULL,
+      0x2870000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1cmpeqi", TILEGX_OPC_V1CMPEQI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 0 }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000040900000ULL,
+      0x1910000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1cmples", TILEGX_OPC_V1CMPLES, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051600000ULL,
+      0x2872000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1cmpleu", TILEGX_OPC_V1CMPLEU, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051640000ULL,
+      0x2874000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1cmplts", TILEGX_OPC_V1CMPLTS, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051680000ULL,
+      0x2876000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1cmpltsi", TILEGX_OPC_V1CMPLTSI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 0 }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000040a00000ULL,
+      0x1918000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1cmpltu", TILEGX_OPC_V1CMPLTU, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000516c0000ULL,
+      0x2878000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1cmpltui", TILEGX_OPC_V1CMPLTUI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 0 }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000040b00000ULL,
+      0x1920000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1cmpne", TILEGX_OPC_V1CMPNE, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051700000ULL,
+      0x287a000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1ddotpu", TILEGX_OPC_V1DDOTPU, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052880000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1ddotpua", TILEGX_OPC_V1DDOTPUA, 0x1, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052840000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1ddotpus", TILEGX_OPC_V1DDOTPUS, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051780000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1ddotpusa", TILEGX_OPC_V1DDOTPUSA, 0x1, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051740000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1dotp", TILEGX_OPC_V1DOTP, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051880000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1dotpa", TILEGX_OPC_V1DOTPA, 0x1, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000517c0000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1dotpu", TILEGX_OPC_V1DOTPU, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052900000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1dotpua", TILEGX_OPC_V1DOTPUA, 0x1, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000528c0000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1dotpus", TILEGX_OPC_V1DOTPUS, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051840000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1dotpusa", TILEGX_OPC_V1DOTPUSA, 0x1, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051800000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1int_h", TILEGX_OPC_V1INT_H, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000518c0000ULL,
+      0x287c000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1int_l", TILEGX_OPC_V1INT_L, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051900000ULL,
+      0x287e000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1maxu", TILEGX_OPC_V1MAXU, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051940000ULL,
+      0x2880000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1maxui", TILEGX_OPC_V1MAXUI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 0 }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000040c00000ULL,
+      0x1928000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1minu", TILEGX_OPC_V1MINU, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051980000ULL,
+      0x2882000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1minui", TILEGX_OPC_V1MINUI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 0 }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000040d00000ULL,
+      0x1930000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1mnz", TILEGX_OPC_V1MNZ, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000519c0000ULL,
+      0x2884000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1multu", TILEGX_OPC_V1MULTU, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051a00000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1mulu", TILEGX_OPC_V1MULU, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051a80000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1mulus", TILEGX_OPC_V1MULUS, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051a40000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1mz", TILEGX_OPC_V1MZ, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051ac0000ULL,
+      0x2886000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1sadau", TILEGX_OPC_V1SADAU, 0x1, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051b00000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1sadu", TILEGX_OPC_V1SADU, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051b40000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1shl", TILEGX_OPC_V1SHL, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051b80000ULL,
+      0x2888000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1shli", TILEGX_OPC_V1SHLI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 29 }, { 6, 7, 30 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000601c0000ULL,
+      0x300e000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1shrs", TILEGX_OPC_V1SHRS, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051bc0000ULL,
+      0x288a000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1shrsi", TILEGX_OPC_V1SHRSI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 29 }, { 6, 7, 30 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000060200000ULL,
+      0x3010000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1shru", TILEGX_OPC_V1SHRU, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051c00000ULL,
+      0x288c000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1shrui", TILEGX_OPC_V1SHRUI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 29 }, { 6, 7, 30 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000060240000ULL,
+      0x3012000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1sub", TILEGX_OPC_V1SUB, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051c80000ULL,
+      0x2890000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v1subuc", TILEGX_OPC_V1SUBUC, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051c40000ULL,
+      0x288e000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2add", TILEGX_OPC_V2ADD, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051d00000ULL,
+      0x2894000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2addi", TILEGX_OPC_V2ADDI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 0 }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000040e00000ULL,
+      0x1938000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2addsc", TILEGX_OPC_V2ADDSC, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051cc0000ULL,
+      0x2892000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2adiffs", TILEGX_OPC_V2ADIFFS, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051d40000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2avgs", TILEGX_OPC_V2AVGS, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051d80000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2cmpeq", TILEGX_OPC_V2CMPEQ, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051dc0000ULL,
+      0x2896000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2cmpeqi", TILEGX_OPC_V2CMPEQI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 0 }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000040f00000ULL,
+      0x1940000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2cmples", TILEGX_OPC_V2CMPLES, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051e00000ULL,
+      0x2898000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2cmpleu", TILEGX_OPC_V2CMPLEU, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051e40000ULL,
+      0x289a000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2cmplts", TILEGX_OPC_V2CMPLTS, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051e80000ULL,
+      0x289c000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2cmpltsi", TILEGX_OPC_V2CMPLTSI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 0 }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000041000000ULL,
+      0x1948000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2cmpltu", TILEGX_OPC_V2CMPLTU, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051ec0000ULL,
+      0x289e000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2cmpltui", TILEGX_OPC_V2CMPLTUI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 0 }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000041100000ULL,
+      0x1950000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2cmpne", TILEGX_OPC_V2CMPNE, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051f00000ULL,
+      0x28a0000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2dotp", TILEGX_OPC_V2DOTP, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051f80000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2dotpa", TILEGX_OPC_V2DOTPA, 0x1, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051f40000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2int_h", TILEGX_OPC_V2INT_H, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000051fc0000ULL,
+      0x28a2000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2int_l", TILEGX_OPC_V2INT_L, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052000000ULL,
+      0x28a4000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2maxs", TILEGX_OPC_V2MAXS, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052040000ULL,
+      0x28a6000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2maxsi", TILEGX_OPC_V2MAXSI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 0 }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000041200000ULL,
+      0x1958000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2mins", TILEGX_OPC_V2MINS, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052080000ULL,
+      0x28a8000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2minsi", TILEGX_OPC_V2MINSI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 0 }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000041300000ULL,
+      0x1960000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2mnz", TILEGX_OPC_V2MNZ, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000520c0000ULL,
+      0x28aa000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2mulfsc", TILEGX_OPC_V2MULFSC, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052100000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2muls", TILEGX_OPC_V2MULS, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052140000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2mults", TILEGX_OPC_V2MULTS, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052180000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2mz", TILEGX_OPC_V2MZ, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000521c0000ULL,
+      0x28ac000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2packh", TILEGX_OPC_V2PACKH, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052200000ULL,
+      0x28ae000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2packl", TILEGX_OPC_V2PACKL, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052240000ULL,
+      0x28b0000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2packuc", TILEGX_OPC_V2PACKUC, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052280000ULL,
+      0x28b2000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2sadas", TILEGX_OPC_V2SADAS, 0x1, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000522c0000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2sadau", TILEGX_OPC_V2SADAU, 0x1, 3, TREG_ZERO, 1,
+    { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052300000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2sads", TILEGX_OPC_V2SADS, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052340000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2sadu", TILEGX_OPC_V2SADU, 0x1, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052380000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2shl", TILEGX_OPC_V2SHL, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052400000ULL,
+      0x28b6000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2shli", TILEGX_OPC_V2SHLI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 29 }, { 6, 7, 30 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000060280000ULL,
+      0x3014000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2shlsc", TILEGX_OPC_V2SHLSC, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000523c0000ULL,
+      0x28b4000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2shrs", TILEGX_OPC_V2SHRS, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052440000ULL,
+      0x28b8000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2shrsi", TILEGX_OPC_V2SHRSI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 29 }, { 6, 7, 30 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000602c0000ULL,
+      0x3016000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2shru", TILEGX_OPC_V2SHRU, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052480000ULL,
+      0x28ba000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2shrui", TILEGX_OPC_V2SHRUI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 29 }, { 6, 7, 30 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000060300000ULL,
+      0x3018000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2sub", TILEGX_OPC_V2SUB, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052500000ULL,
+      0x28be000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v2subsc", TILEGX_OPC_V2SUBSC, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000524c0000ULL,
+      0x28bc000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v4add", TILEGX_OPC_V4ADD, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052580000ULL,
+      0x28c2000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v4addsc", TILEGX_OPC_V4ADDSC, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052540000ULL,
+      0x28c0000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v4int_h", TILEGX_OPC_V4INT_H, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000525c0000ULL,
+      0x28c4000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v4int_l", TILEGX_OPC_V4INT_L, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052600000ULL,
+      0x28c6000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v4packsc", TILEGX_OPC_V4PACKSC, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052640000ULL,
+      0x28c8000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v4shl", TILEGX_OPC_V4SHL, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000526c0000ULL,
+      0x28cc000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v4shlsc", TILEGX_OPC_V4SHLSC, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052680000ULL,
+      0x28ca000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v4shrs", TILEGX_OPC_V4SHRS, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052700000ULL,
+      0x28ce000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v4shru", TILEGX_OPC_V4SHRU, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052740000ULL,
+      0x28d0000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v4sub", TILEGX_OPC_V4SUB, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x00000000527c0000ULL,
+      0x28d4000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "v4subsc", TILEGX_OPC_V4SUBSC, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000052780000ULL,
+      0x28d2000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "wh64", TILEGX_OPC_WH64, 0x2, 1, TREG_ZERO, 1,
+    { { 0, }, { 7 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0ULL,
+      0xfffff80000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      -1ULL,
+      0x286b300000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "xor", TILEGX_OPC_XOR, 0xf, 3, TREG_ZERO, 1,
+    { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ffc0000ULL,
+      0xfffe000000000000ULL,
+      0x00000000780c0000ULL,
+      0x3c06000000000000ULL,
+      0ULL
+    },
+    {
+      0x0000000052800000ULL,
+      0x28d6000000000000ULL,
+      0x00000000500c0000ULL,
+      0x2c06000000000000ULL,
+      -1ULL
+    }
+#endif
+  },
+  { "xori", TILEGX_OPC_XORI, 0x3, 3, TREG_ZERO, 1,
+    { { 8, 9, 0 }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } },
+#ifndef DISASM_ONLY
+    {
+      0xc00000007ff00000ULL,
+      0xfff8000000000000ULL,
+      0ULL,
+      0ULL,
+      0ULL
+    },
+    {
+      0x0000000041400000ULL,
+      0x1968000000000000ULL,
+      -1ULL,
+      -1ULL,
+      -1ULL
+    }
+#endif
+  },
+  { NULL, TILEGX_OPC_NONE, 0, 0, TREG_ZERO, 0, { { 0, } },
+#ifndef DISASM_ONLY
+    { 0, }, { 0, }
+#endif
+  }
+};
+
+#define BITFIELD(start, size) ((start) | (((1 << (size)) - 1) << 6))
+#define CHILD(array_index) (TILEGX_OPC_NONE + (array_index))
+
+static const unsigned short decode_X0_fsm[936] =
+{
+  BITFIELD(22, 9) /* index 0 */,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+  CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+  CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+  CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+  CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+  CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+  CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+  CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+  CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+  CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+  CHILD(513), CHILD(513), CHILD(513), CHILD(513), TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_BFEXTS,
+  TILEGX_OPC_BFEXTS, TILEGX_OPC_BFEXTS, TILEGX_OPC_BFEXTS, TILEGX_OPC_BFEXTU,
+  TILEGX_OPC_BFEXTU, TILEGX_OPC_BFEXTU, TILEGX_OPC_BFEXTU, TILEGX_OPC_BFINS,
+  TILEGX_OPC_BFINS, TILEGX_OPC_BFINS, TILEGX_OPC_BFINS, TILEGX_OPC_MM,
+  TILEGX_OPC_MM, TILEGX_OPC_MM, TILEGX_OPC_MM, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, CHILD(528), CHILD(578),
+  CHILD(583), CHILD(588), CHILD(593), CHILD(598), TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, CHILD(603), CHILD(620), CHILD(637), CHILD(654), CHILD(671),
+  CHILD(703), CHILD(797), CHILD(814), CHILD(831), CHILD(848), CHILD(865),
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, CHILD(889), TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906),
+  CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906),
+  CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906),
+  CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906),
+  CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906),
+  CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906),
+  CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906),
+  CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906),
+  CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906),
+  CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906),
+  CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906),
+  BITFIELD(6, 2) /* index 513 */,
+  TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, CHILD(518),
+  BITFIELD(8, 2) /* index 518 */,
+  TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, CHILD(523),
+  BITFIELD(10, 2) /* index 523 */,
+  TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_MOVELI,
+  BITFIELD(20, 2) /* index 528 */,
+  TILEGX_OPC_NONE, CHILD(533), TILEGX_OPC_ADDXI, CHILD(548),
+  BITFIELD(6, 2) /* index 533 */,
+  TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(538),
+  BITFIELD(8, 2) /* index 538 */,
+  TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(543),
+  BITFIELD(10, 2) /* index 543 */,
+  TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_MOVEI,
+  BITFIELD(0, 2) /* index 548 */,
+  TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(553),
+  BITFIELD(2, 2) /* index 553 */,
+  TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(558),
+  BITFIELD(4, 2) /* index 558 */,
+  TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(563),
+  BITFIELD(6, 2) /* index 563 */,
+  TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(568),
+  BITFIELD(8, 2) /* index 568 */,
+  TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(573),
+  BITFIELD(10, 2) /* index 573 */,
+  TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_INFO,
+  BITFIELD(20, 2) /* index 578 */,
+  TILEGX_OPC_CMPEQI, TILEGX_OPC_CMPLTSI, TILEGX_OPC_CMPLTUI, TILEGX_OPC_ORI,
+  BITFIELD(20, 2) /* index 583 */,
+  TILEGX_OPC_V1ADDI, TILEGX_OPC_V1CMPEQI, TILEGX_OPC_V1CMPLTSI,
+  TILEGX_OPC_V1CMPLTUI,
+  BITFIELD(20, 2) /* index 588 */,
+  TILEGX_OPC_V1MAXUI, TILEGX_OPC_V1MINUI, TILEGX_OPC_V2ADDI,
+  TILEGX_OPC_V2CMPEQI,
+  BITFIELD(20, 2) /* index 593 */,
+  TILEGX_OPC_V2CMPLTSI, TILEGX_OPC_V2CMPLTUI, TILEGX_OPC_V2MAXSI,
+  TILEGX_OPC_V2MINSI,
+  BITFIELD(20, 2) /* index 598 */,
+  TILEGX_OPC_XORI, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  BITFIELD(18, 4) /* index 603 */,
+  TILEGX_OPC_NONE, TILEGX_OPC_ADDXSC, TILEGX_OPC_ADDX, TILEGX_OPC_ADD,
+  TILEGX_OPC_AND, TILEGX_OPC_CMOVEQZ, TILEGX_OPC_CMOVNEZ, TILEGX_OPC_CMPEQ,
+  TILEGX_OPC_CMPLES, TILEGX_OPC_CMPLEU, TILEGX_OPC_CMPLTS, TILEGX_OPC_CMPLTU,
+  TILEGX_OPC_CMPNE, TILEGX_OPC_CMULAF, TILEGX_OPC_CMULA, TILEGX_OPC_CMULFR,
+  BITFIELD(18, 4) /* index 620 */,
+  TILEGX_OPC_CMULF, TILEGX_OPC_CMULHR, TILEGX_OPC_CMULH, TILEGX_OPC_CMUL,
+  TILEGX_OPC_CRC32_32, TILEGX_OPC_CRC32_8, TILEGX_OPC_DBLALIGN2,
+  TILEGX_OPC_DBLALIGN4, TILEGX_OPC_DBLALIGN6, TILEGX_OPC_DBLALIGN,
+  TILEGX_OPC_FDOUBLE_ADDSUB, TILEGX_OPC_FDOUBLE_ADD_FLAGS,
+  TILEGX_OPC_FDOUBLE_MUL_FLAGS, TILEGX_OPC_FDOUBLE_PACK1,
+  TILEGX_OPC_FDOUBLE_PACK2, TILEGX_OPC_FDOUBLE_SUB_FLAGS,
+  BITFIELD(18, 4) /* index 637 */,
+  TILEGX_OPC_FDOUBLE_UNPACK_MAX, TILEGX_OPC_FDOUBLE_UNPACK_MIN,
+  TILEGX_OPC_FSINGLE_ADD1, TILEGX_OPC_FSINGLE_ADDSUB2,
+  TILEGX_OPC_FSINGLE_MUL1, TILEGX_OPC_FSINGLE_MUL2, TILEGX_OPC_FSINGLE_PACK2,
+  TILEGX_OPC_FSINGLE_SUB1, TILEGX_OPC_MNZ, TILEGX_OPC_MULAX,
+  TILEGX_OPC_MULA_HS_HS, TILEGX_OPC_MULA_HS_HU, TILEGX_OPC_MULA_HS_LS,
+  TILEGX_OPC_MULA_HS_LU, TILEGX_OPC_MULA_HU_HU, TILEGX_OPC_MULA_HU_LS,
+  BITFIELD(18, 4) /* index 654 */,
+  TILEGX_OPC_MULA_HU_LU, TILEGX_OPC_MULA_LS_LS, TILEGX_OPC_MULA_LS_LU,
+  TILEGX_OPC_MULA_LU_LU, TILEGX_OPC_MULX, TILEGX_OPC_MUL_HS_HS,
+  TILEGX_OPC_MUL_HS_HU, TILEGX_OPC_MUL_HS_LS, TILEGX_OPC_MUL_HS_LU,
+  TILEGX_OPC_MUL_HU_HU, TILEGX_OPC_MUL_HU_LS, TILEGX_OPC_MUL_HU_LU,
+  TILEGX_OPC_MUL_LS_LS, TILEGX_OPC_MUL_LS_LU, TILEGX_OPC_MUL_LU_LU,
+  TILEGX_OPC_MZ,
+  BITFIELD(18, 4) /* index 671 */,
+  TILEGX_OPC_NOR, CHILD(688), TILEGX_OPC_ROTL, TILEGX_OPC_SHL1ADDX,
+  TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL2ADDX, TILEGX_OPC_SHL2ADD,
+  TILEGX_OPC_SHL3ADDX, TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHLX, TILEGX_OPC_SHL,
+  TILEGX_OPC_SHRS, TILEGX_OPC_SHRUX, TILEGX_OPC_SHRU, TILEGX_OPC_SHUFFLEBYTES,
+  TILEGX_OPC_SUBXSC,
+  BITFIELD(12, 2) /* index 688 */,
+  TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(693),
+  BITFIELD(14, 2) /* index 693 */,
+  TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(698),
+  BITFIELD(16, 2) /* index 698 */,
+  TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_MOVE,
+  BITFIELD(18, 4) /* index 703 */,
+  TILEGX_OPC_SUBX, TILEGX_OPC_SUB, CHILD(720), TILEGX_OPC_V1ADDUC,
+  TILEGX_OPC_V1ADD, TILEGX_OPC_V1ADIFFU, TILEGX_OPC_V1AVGU,
+  TILEGX_OPC_V1CMPEQ, TILEGX_OPC_V1CMPLES, TILEGX_OPC_V1CMPLEU,
+  TILEGX_OPC_V1CMPLTS, TILEGX_OPC_V1CMPLTU, TILEGX_OPC_V1CMPNE,
+  TILEGX_OPC_V1DDOTPUSA, TILEGX_OPC_V1DDOTPUS, TILEGX_OPC_V1DOTPA,
+  BITFIELD(12, 4) /* index 720 */,
+  TILEGX_OPC_NONE, CHILD(737), CHILD(742), CHILD(747), CHILD(752), CHILD(757),
+  CHILD(762), CHILD(767), CHILD(772), CHILD(777), CHILD(782), CHILD(787),
+  CHILD(792), TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  BITFIELD(16, 2) /* index 737 */,
+  TILEGX_OPC_CLZ, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  BITFIELD(16, 2) /* index 742 */,
+  TILEGX_OPC_CTZ, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  BITFIELD(16, 2) /* index 747 */,
+  TILEGX_OPC_FNOP, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  BITFIELD(16, 2) /* index 752 */,
+  TILEGX_OPC_FSINGLE_PACK1, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  BITFIELD(16, 2) /* index 757 */,
+  TILEGX_OPC_NOP, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  BITFIELD(16, 2) /* index 762 */,
+  TILEGX_OPC_PCNT, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  BITFIELD(16, 2) /* index 767 */,
+  TILEGX_OPC_REVBITS, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  BITFIELD(16, 2) /* index 772 */,
+  TILEGX_OPC_REVBYTES, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  BITFIELD(16, 2) /* index 777 */,
+  TILEGX_OPC_TBLIDXB0, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  BITFIELD(16, 2) /* index 782 */,
+  TILEGX_OPC_TBLIDXB1, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  BITFIELD(16, 2) /* index 787 */,
+  TILEGX_OPC_TBLIDXB2, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  BITFIELD(16, 2) /* index 792 */,
+  TILEGX_OPC_TBLIDXB3, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  BITFIELD(18, 4) /* index 797 */,
+  TILEGX_OPC_V1DOTPUSA, TILEGX_OPC_V1DOTPUS, TILEGX_OPC_V1DOTP,
+  TILEGX_OPC_V1INT_H, TILEGX_OPC_V1INT_L, TILEGX_OPC_V1MAXU,
+  TILEGX_OPC_V1MINU, TILEGX_OPC_V1MNZ, TILEGX_OPC_V1MULTU, TILEGX_OPC_V1MULUS,
+  TILEGX_OPC_V1MULU, TILEGX_OPC_V1MZ, TILEGX_OPC_V1SADAU, TILEGX_OPC_V1SADU,
+  TILEGX_OPC_V1SHL, TILEGX_OPC_V1SHRS,
+  BITFIELD(18, 4) /* index 814 */,
+  TILEGX_OPC_V1SHRU, TILEGX_OPC_V1SUBUC, TILEGX_OPC_V1SUB, TILEGX_OPC_V2ADDSC,
+  TILEGX_OPC_V2ADD, TILEGX_OPC_V2ADIFFS, TILEGX_OPC_V2AVGS,
+  TILEGX_OPC_V2CMPEQ, TILEGX_OPC_V2CMPLES, TILEGX_OPC_V2CMPLEU,
+  TILEGX_OPC_V2CMPLTS, TILEGX_OPC_V2CMPLTU, TILEGX_OPC_V2CMPNE,
+  TILEGX_OPC_V2DOTPA, TILEGX_OPC_V2DOTP, TILEGX_OPC_V2INT_H,
+  BITFIELD(18, 4) /* index 831 */,
+  TILEGX_OPC_V2INT_L, TILEGX_OPC_V2MAXS, TILEGX_OPC_V2MINS, TILEGX_OPC_V2MNZ,
+  TILEGX_OPC_V2MULFSC, TILEGX_OPC_V2MULS, TILEGX_OPC_V2MULTS, TILEGX_OPC_V2MZ,
+  TILEGX_OPC_V2PACKH, TILEGX_OPC_V2PACKL, TILEGX_OPC_V2PACKUC,
+  TILEGX_OPC_V2SADAS, TILEGX_OPC_V2SADAU, TILEGX_OPC_V2SADS,
+  TILEGX_OPC_V2SADU, TILEGX_OPC_V2SHLSC,
+  BITFIELD(18, 4) /* index 848 */,
+  TILEGX_OPC_V2SHL, TILEGX_OPC_V2SHRS, TILEGX_OPC_V2SHRU, TILEGX_OPC_V2SUBSC,
+  TILEGX_OPC_V2SUB, TILEGX_OPC_V4ADDSC, TILEGX_OPC_V4ADD, TILEGX_OPC_V4INT_H,
+  TILEGX_OPC_V4INT_L, TILEGX_OPC_V4PACKSC, TILEGX_OPC_V4SHLSC,
+  TILEGX_OPC_V4SHL, TILEGX_OPC_V4SHRS, TILEGX_OPC_V4SHRU, TILEGX_OPC_V4SUBSC,
+  TILEGX_OPC_V4SUB,
+  BITFIELD(18, 3) /* index 865 */,
+  CHILD(874), CHILD(877), CHILD(880), CHILD(883), CHILD(886), TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  BITFIELD(21, 1) /* index 874 */,
+  TILEGX_OPC_XOR, TILEGX_OPC_NONE,
+  BITFIELD(21, 1) /* index 877 */,
+  TILEGX_OPC_V1DDOTPUA, TILEGX_OPC_NONE,
+  BITFIELD(21, 1) /* index 880 */,
+  TILEGX_OPC_V1DDOTPU, TILEGX_OPC_NONE,
+  BITFIELD(21, 1) /* index 883 */,
+  TILEGX_OPC_V1DOTPUA, TILEGX_OPC_NONE,
+  BITFIELD(21, 1) /* index 886 */,
+  TILEGX_OPC_V1DOTPU, TILEGX_OPC_NONE,
+  BITFIELD(18, 4) /* index 889 */,
+  TILEGX_OPC_NONE, TILEGX_OPC_ROTLI, TILEGX_OPC_SHLI, TILEGX_OPC_SHLXI,
+  TILEGX_OPC_SHRSI, TILEGX_OPC_SHRUI, TILEGX_OPC_SHRUXI, TILEGX_OPC_V1SHLI,
+  TILEGX_OPC_V1SHRSI, TILEGX_OPC_V1SHRUI, TILEGX_OPC_V2SHLI,
+  TILEGX_OPC_V2SHRSI, TILEGX_OPC_V2SHRUI, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE,
+  BITFIELD(0, 2) /* index 906 */,
+  TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI,
+  CHILD(911),
+  BITFIELD(2, 2) /* index 911 */,
+  TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI,
+  CHILD(916),
+  BITFIELD(4, 2) /* index 916 */,
+  TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI,
+  CHILD(921),
+  BITFIELD(6, 2) /* index 921 */,
+  TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI,
+  CHILD(926),
+  BITFIELD(8, 2) /* index 926 */,
+  TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI,
+  CHILD(931),
+  BITFIELD(10, 2) /* index 931 */,
+  TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI,
+  TILEGX_OPC_INFOL,
+};
+
+static const unsigned short decode_X1_fsm[1266] =
+{
+  BITFIELD(53, 9) /* index 0 */,
+  CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+  CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+  CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+  CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+  CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+  CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+  CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+  CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+  CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+  CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513),
+  CHILD(513), CHILD(513), CHILD(513), CHILD(513), TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI,
+  TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_BEQZT,
+  TILEGX_OPC_BEQZT, TILEGX_OPC_BEQZ, TILEGX_OPC_BEQZ, TILEGX_OPC_BGEZT,
+  TILEGX_OPC_BGEZT, TILEGX_OPC_BGEZ, TILEGX_OPC_BGEZ, TILEGX_OPC_BGTZT,
+  TILEGX_OPC_BGTZT, TILEGX_OPC_BGTZ, TILEGX_OPC_BGTZ, TILEGX_OPC_BLBCT,
+  TILEGX_OPC_BLBCT, TILEGX_OPC_BLBC, TILEGX_OPC_BLBC, TILEGX_OPC_BLBST,
+  TILEGX_OPC_BLBST, TILEGX_OPC_BLBS, TILEGX_OPC_BLBS, TILEGX_OPC_BLEZT,
+  TILEGX_OPC_BLEZT, TILEGX_OPC_BLEZ, TILEGX_OPC_BLEZ, TILEGX_OPC_BLTZT,
+  TILEGX_OPC_BLTZT, TILEGX_OPC_BLTZ, TILEGX_OPC_BLTZ, TILEGX_OPC_BNEZT,
+  TILEGX_OPC_BNEZT, TILEGX_OPC_BNEZ, TILEGX_OPC_BNEZ, CHILD(528), CHILD(578),
+  CHILD(598), CHILD(703), CHILD(723), CHILD(728), CHILD(753), CHILD(758),
+  CHILD(763), CHILD(768), CHILD(773), CHILD(778), TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_JAL,
+  TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL,
+  TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL,
+  TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL,
+  TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL,
+  TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL,
+  TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL,
+  TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL,
+  TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_J, TILEGX_OPC_J,
+  TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J,
+  TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J,
+  TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J,
+  TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J,
+  TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J,
+  TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J,
+  CHILD(783), CHILD(800), CHILD(832), CHILD(849), CHILD(1168), CHILD(1185),
+  CHILD(1202), TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, CHILD(1219), TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, CHILD(1236), CHILD(1236), CHILD(1236),
+  CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236),
+  CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236),
+  CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236),
+  CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236),
+  CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236),
+  CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236),
+  CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236),
+  CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236),
+  CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236),
+  CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236),
+  CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236),
+  CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236),
+  CHILD(1236),
+  BITFIELD(37, 2) /* index 513 */,
+  TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, CHILD(518),
+  BITFIELD(39, 2) /* index 518 */,
+  TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, CHILD(523),
+  BITFIELD(41, 2) /* index 523 */,
+  TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_MOVELI,
+  BITFIELD(51, 2) /* index 528 */,
+  TILEGX_OPC_NONE, CHILD(533), TILEGX_OPC_ADDXI, CHILD(548),
+  BITFIELD(37, 2) /* index 533 */,
+  TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(538),
+  BITFIELD(39, 2) /* index 538 */,
+  TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(543),
+  BITFIELD(41, 2) /* index 543 */,
+  TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_MOVEI,
+  BITFIELD(31, 2) /* index 548 */,
+  TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(553),
+  BITFIELD(33, 2) /* index 553 */,
+  TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(558),
+  BITFIELD(35, 2) /* index 558 */,
+  TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(563),
+  BITFIELD(37, 2) /* index 563 */,
+  TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(568),
+  BITFIELD(39, 2) /* index 568 */,
+  TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(573),
+  BITFIELD(41, 2) /* index 573 */,
+  TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_INFO,
+  BITFIELD(51, 2) /* index 578 */,
+  TILEGX_OPC_CMPEQI, TILEGX_OPC_CMPLTSI, TILEGX_OPC_CMPLTUI, CHILD(583),
+  BITFIELD(31, 2) /* index 583 */,
+  TILEGX_OPC_LD1S_ADD, TILEGX_OPC_LD1S_ADD, TILEGX_OPC_LD1S_ADD, CHILD(588),
+  BITFIELD(33, 2) /* index 588 */,
+  TILEGX_OPC_LD1S_ADD, TILEGX_OPC_LD1S_ADD, TILEGX_OPC_LD1S_ADD, CHILD(593),
+  BITFIELD(35, 2) /* index 593 */,
+  TILEGX_OPC_LD1S_ADD, TILEGX_OPC_LD1S_ADD, TILEGX_OPC_LD1S_ADD,
+  TILEGX_OPC_PREFETCH_ADD_L1_FAULT,
+  BITFIELD(51, 2) /* index 598 */,
+  CHILD(603), CHILD(618), CHILD(633), CHILD(648),
+  BITFIELD(31, 2) /* index 603 */,
+  TILEGX_OPC_LD1U_ADD, TILEGX_OPC_LD1U_ADD, TILEGX_OPC_LD1U_ADD, CHILD(608),
+  BITFIELD(33, 2) /* index 608 */,
+  TILEGX_OPC_LD1U_ADD, TILEGX_OPC_LD1U_ADD, TILEGX_OPC_LD1U_ADD, CHILD(613),
+  BITFIELD(35, 2) /* index 613 */,
+  TILEGX_OPC_LD1U_ADD, TILEGX_OPC_LD1U_ADD, TILEGX_OPC_LD1U_ADD,
+  TILEGX_OPC_PREFETCH_ADD_L1,
+  BITFIELD(31, 2) /* index 618 */,
+  TILEGX_OPC_LD2S_ADD, TILEGX_OPC_LD2S_ADD, TILEGX_OPC_LD2S_ADD, CHILD(623),
+  BITFIELD(33, 2) /* index 623 */,
+  TILEGX_OPC_LD2S_ADD, TILEGX_OPC_LD2S_ADD, TILEGX_OPC_LD2S_ADD, CHILD(628),
+  BITFIELD(35, 2) /* index 628 */,
+  TILEGX_OPC_LD2S_ADD, TILEGX_OPC_LD2S_ADD, TILEGX_OPC_LD2S_ADD,
+  TILEGX_OPC_PREFETCH_ADD_L2_FAULT,
+  BITFIELD(31, 2) /* index 633 */,
+  TILEGX_OPC_LD2U_ADD, TILEGX_OPC_LD2U_ADD, TILEGX_OPC_LD2U_ADD, CHILD(638),
+  BITFIELD(33, 2) /* index 638 */,
+  TILEGX_OPC_LD2U_ADD, TILEGX_OPC_LD2U_ADD, TILEGX_OPC_LD2U_ADD, CHILD(643),
+  BITFIELD(35, 2) /* index 643 */,
+  TILEGX_OPC_LD2U_ADD, TILEGX_OPC_LD2U_ADD, TILEGX_OPC_LD2U_ADD,
+  TILEGX_OPC_PREFETCH_ADD_L2,
+  BITFIELD(31, 2) /* index 648 */,
+  CHILD(653), CHILD(653), CHILD(653), CHILD(673),
+  BITFIELD(43, 2) /* index 653 */,
+  CHILD(658), TILEGX_OPC_LD4S_ADD, TILEGX_OPC_LD4S_ADD, TILEGX_OPC_LD4S_ADD,
+  BITFIELD(45, 2) /* index 658 */,
+  CHILD(663), TILEGX_OPC_LD4S_ADD, TILEGX_OPC_LD4S_ADD, TILEGX_OPC_LD4S_ADD,
+  BITFIELD(47, 2) /* index 663 */,
+  CHILD(668), TILEGX_OPC_LD4S_ADD, TILEGX_OPC_LD4S_ADD, TILEGX_OPC_LD4S_ADD,
+  BITFIELD(49, 2) /* index 668 */,
+  TILEGX_OPC_LD4S_TLS, TILEGX_OPC_LD4S_ADD, TILEGX_OPC_LD4S_ADD,
+  TILEGX_OPC_LD4S_ADD,
+  BITFIELD(33, 2) /* index 673 */,
+  CHILD(653), CHILD(653), CHILD(653), CHILD(678),
+  BITFIELD(35, 2) /* index 678 */,
+  CHILD(653), CHILD(653), CHILD(653), CHILD(683),
+  BITFIELD(43, 2) /* index 683 */,
+  CHILD(688), TILEGX_OPC_PREFETCH_ADD_L3_FAULT,
+  TILEGX_OPC_PREFETCH_ADD_L3_FAULT, TILEGX_OPC_PREFETCH_ADD_L3_FAULT,
+  BITFIELD(45, 2) /* index 688 */,
+  CHILD(693), TILEGX_OPC_PREFETCH_ADD_L3_FAULT,
+  TILEGX_OPC_PREFETCH_ADD_L3_FAULT, TILEGX_OPC_PREFETCH_ADD_L3_FAULT,
+  BITFIELD(47, 2) /* index 693 */,
+  CHILD(698), TILEGX_OPC_PREFETCH_ADD_L3_FAULT,
+  TILEGX_OPC_PREFETCH_ADD_L3_FAULT, TILEGX_OPC_PREFETCH_ADD_L3_FAULT,
+  BITFIELD(49, 2) /* index 698 */,
+  TILEGX_OPC_LD4S_TLS, TILEGX_OPC_PREFETCH_ADD_L3_FAULT,
+  TILEGX_OPC_PREFETCH_ADD_L3_FAULT, TILEGX_OPC_PREFETCH_ADD_L3_FAULT,
+  BITFIELD(51, 2) /* index 703 */,
+  CHILD(708), TILEGX_OPC_LDNT1S_ADD, TILEGX_OPC_LDNT1U_ADD,
+  TILEGX_OPC_LDNT2S_ADD,
+  BITFIELD(31, 2) /* index 708 */,
+  TILEGX_OPC_LD4U_ADD, TILEGX_OPC_LD4U_ADD, TILEGX_OPC_LD4U_ADD, CHILD(713),
+  BITFIELD(33, 2) /* index 713 */,
+  TILEGX_OPC_LD4U_ADD, TILEGX_OPC_LD4U_ADD, TILEGX_OPC_LD4U_ADD, CHILD(718),
+  BITFIELD(35, 2) /* index 718 */,
+  TILEGX_OPC_LD4U_ADD, TILEGX_OPC_LD4U_ADD, TILEGX_OPC_LD4U_ADD,
+  TILEGX_OPC_PREFETCH_ADD_L3,
+  BITFIELD(51, 2) /* index 723 */,
+  TILEGX_OPC_LDNT2U_ADD, TILEGX_OPC_LDNT4S_ADD, TILEGX_OPC_LDNT4U_ADD,
+  TILEGX_OPC_LDNT_ADD,
+  BITFIELD(51, 2) /* index 728 */,
+  CHILD(733), TILEGX_OPC_LDNA_ADD, TILEGX_OPC_MFSPR, TILEGX_OPC_MTSPR,
+  BITFIELD(43, 2) /* index 733 */,
+  CHILD(738), TILEGX_OPC_LD_ADD, TILEGX_OPC_LD_ADD, TILEGX_OPC_LD_ADD,
+  BITFIELD(45, 2) /* index 738 */,
+  CHILD(743), TILEGX_OPC_LD_ADD, TILEGX_OPC_LD_ADD, TILEGX_OPC_LD_ADD,
+  BITFIELD(47, 2) /* index 743 */,
+  CHILD(748), TILEGX_OPC_LD_ADD, TILEGX_OPC_LD_ADD, TILEGX_OPC_LD_ADD,
+  BITFIELD(49, 2) /* index 748 */,
+  TILEGX_OPC_LD_TLS, TILEGX_OPC_LD_ADD, TILEGX_OPC_LD_ADD, TILEGX_OPC_LD_ADD,
+  BITFIELD(51, 2) /* index 753 */,
+  TILEGX_OPC_ORI, TILEGX_OPC_ST1_ADD, TILEGX_OPC_ST2_ADD, TILEGX_OPC_ST4_ADD,
+  BITFIELD(51, 2) /* index 758 */,
+  TILEGX_OPC_STNT1_ADD, TILEGX_OPC_STNT2_ADD, TILEGX_OPC_STNT4_ADD,
+  TILEGX_OPC_STNT_ADD,
+  BITFIELD(51, 2) /* index 763 */,
+  TILEGX_OPC_ST_ADD, TILEGX_OPC_V1ADDI, TILEGX_OPC_V1CMPEQI,
+  TILEGX_OPC_V1CMPLTSI,
+  BITFIELD(51, 2) /* index 768 */,
+  TILEGX_OPC_V1CMPLTUI, TILEGX_OPC_V1MAXUI, TILEGX_OPC_V1MINUI,
+  TILEGX_OPC_V2ADDI,
+  BITFIELD(51, 2) /* index 773 */,
+  TILEGX_OPC_V2CMPEQI, TILEGX_OPC_V2CMPLTSI, TILEGX_OPC_V2CMPLTUI,
+  TILEGX_OPC_V2MAXSI,
+  BITFIELD(51, 2) /* index 778 */,
+  TILEGX_OPC_V2MINSI, TILEGX_OPC_XORI, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  BITFIELD(49, 4) /* index 783 */,
+  TILEGX_OPC_NONE, TILEGX_OPC_ADDXSC, TILEGX_OPC_ADDX, TILEGX_OPC_ADD,
+  TILEGX_OPC_AND, TILEGX_OPC_CMPEQ, TILEGX_OPC_CMPEXCH4, TILEGX_OPC_CMPEXCH,
+  TILEGX_OPC_CMPLES, TILEGX_OPC_CMPLEU, TILEGX_OPC_CMPLTS, TILEGX_OPC_CMPLTU,
+  TILEGX_OPC_CMPNE, TILEGX_OPC_DBLALIGN2, TILEGX_OPC_DBLALIGN4,
+  TILEGX_OPC_DBLALIGN6,
+  BITFIELD(49, 4) /* index 800 */,
+  TILEGX_OPC_EXCH4, TILEGX_OPC_EXCH, TILEGX_OPC_FETCHADD4,
+  TILEGX_OPC_FETCHADDGEZ4, TILEGX_OPC_FETCHADDGEZ, TILEGX_OPC_FETCHADD,
+  TILEGX_OPC_FETCHAND4, TILEGX_OPC_FETCHAND, TILEGX_OPC_FETCHOR4,
+  TILEGX_OPC_FETCHOR, TILEGX_OPC_MNZ, TILEGX_OPC_MZ, TILEGX_OPC_NOR,
+  CHILD(817), TILEGX_OPC_ROTL, TILEGX_OPC_SHL1ADDX,
+  BITFIELD(43, 2) /* index 817 */,
+  TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(822),
+  BITFIELD(45, 2) /* index 822 */,
+  TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(827),
+  BITFIELD(47, 2) /* index 827 */,
+  TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_MOVE,
+  BITFIELD(49, 4) /* index 832 */,
+  TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL2ADDX, TILEGX_OPC_SHL2ADD,
+  TILEGX_OPC_SHL3ADDX, TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHLX, TILEGX_OPC_SHL,
+  TILEGX_OPC_SHRS, TILEGX_OPC_SHRUX, TILEGX_OPC_SHRU, TILEGX_OPC_ST1,
+  TILEGX_OPC_ST2, TILEGX_OPC_ST4, TILEGX_OPC_STNT1, TILEGX_OPC_STNT2,
+  TILEGX_OPC_STNT4,
+  BITFIELD(46, 7) /* index 849 */,
+  TILEGX_OPC_STNT, TILEGX_OPC_STNT, TILEGX_OPC_STNT, TILEGX_OPC_STNT,
+  TILEGX_OPC_STNT, TILEGX_OPC_STNT, TILEGX_OPC_STNT, TILEGX_OPC_STNT,
+  TILEGX_OPC_ST, TILEGX_OPC_ST, TILEGX_OPC_ST, TILEGX_OPC_ST, TILEGX_OPC_ST,
+  TILEGX_OPC_ST, TILEGX_OPC_ST, TILEGX_OPC_ST, TILEGX_OPC_SUBXSC,
+  TILEGX_OPC_SUBXSC, TILEGX_OPC_SUBXSC, TILEGX_OPC_SUBXSC, TILEGX_OPC_SUBXSC,
+  TILEGX_OPC_SUBXSC, TILEGX_OPC_SUBXSC, TILEGX_OPC_SUBXSC, TILEGX_OPC_SUBX,
+  TILEGX_OPC_SUBX, TILEGX_OPC_SUBX, TILEGX_OPC_SUBX, TILEGX_OPC_SUBX,
+  TILEGX_OPC_SUBX, TILEGX_OPC_SUBX, TILEGX_OPC_SUBX, TILEGX_OPC_SUB,
+  TILEGX_OPC_SUB, TILEGX_OPC_SUB, TILEGX_OPC_SUB, TILEGX_OPC_SUB,
+  TILEGX_OPC_SUB, TILEGX_OPC_SUB, TILEGX_OPC_SUB, CHILD(978), CHILD(987),
+  CHILD(1066), CHILD(1150), CHILD(1159), TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_V1ADDUC, TILEGX_OPC_V1ADDUC, TILEGX_OPC_V1ADDUC,
+  TILEGX_OPC_V1ADDUC, TILEGX_OPC_V1ADDUC, TILEGX_OPC_V1ADDUC,
+  TILEGX_OPC_V1ADDUC, TILEGX_OPC_V1ADDUC, TILEGX_OPC_V1ADD, TILEGX_OPC_V1ADD,
+  TILEGX_OPC_V1ADD, TILEGX_OPC_V1ADD, TILEGX_OPC_V1ADD, TILEGX_OPC_V1ADD,
+  TILEGX_OPC_V1ADD, TILEGX_OPC_V1ADD, TILEGX_OPC_V1CMPEQ, TILEGX_OPC_V1CMPEQ,
+  TILEGX_OPC_V1CMPEQ, TILEGX_OPC_V1CMPEQ, TILEGX_OPC_V1CMPEQ,
+  TILEGX_OPC_V1CMPEQ, TILEGX_OPC_V1CMPEQ, TILEGX_OPC_V1CMPEQ,
+  TILEGX_OPC_V1CMPLES, TILEGX_OPC_V1CMPLES, TILEGX_OPC_V1CMPLES,
+  TILEGX_OPC_V1CMPLES, TILEGX_OPC_V1CMPLES, TILEGX_OPC_V1CMPLES,
+  TILEGX_OPC_V1CMPLES, TILEGX_OPC_V1CMPLES, TILEGX_OPC_V1CMPLEU,
+  TILEGX_OPC_V1CMPLEU, TILEGX_OPC_V1CMPLEU, TILEGX_OPC_V1CMPLEU,
+  TILEGX_OPC_V1CMPLEU, TILEGX_OPC_V1CMPLEU, TILEGX_OPC_V1CMPLEU,
+  TILEGX_OPC_V1CMPLEU, TILEGX_OPC_V1CMPLTS, TILEGX_OPC_V1CMPLTS,
+  TILEGX_OPC_V1CMPLTS, TILEGX_OPC_V1CMPLTS, TILEGX_OPC_V1CMPLTS,
+  TILEGX_OPC_V1CMPLTS, TILEGX_OPC_V1CMPLTS, TILEGX_OPC_V1CMPLTS,
+  TILEGX_OPC_V1CMPLTU, TILEGX_OPC_V1CMPLTU, TILEGX_OPC_V1CMPLTU,
+  TILEGX_OPC_V1CMPLTU, TILEGX_OPC_V1CMPLTU, TILEGX_OPC_V1CMPLTU,
+  TILEGX_OPC_V1CMPLTU, TILEGX_OPC_V1CMPLTU, TILEGX_OPC_V1CMPNE,
+  TILEGX_OPC_V1CMPNE, TILEGX_OPC_V1CMPNE, TILEGX_OPC_V1CMPNE,
+  TILEGX_OPC_V1CMPNE, TILEGX_OPC_V1CMPNE, TILEGX_OPC_V1CMPNE,
+  TILEGX_OPC_V1CMPNE, TILEGX_OPC_V1INT_H, TILEGX_OPC_V1INT_H,
+  TILEGX_OPC_V1INT_H, TILEGX_OPC_V1INT_H, TILEGX_OPC_V1INT_H,
+  TILEGX_OPC_V1INT_H, TILEGX_OPC_V1INT_H, TILEGX_OPC_V1INT_H,
+  TILEGX_OPC_V1INT_L, TILEGX_OPC_V1INT_L, TILEGX_OPC_V1INT_L,
+  TILEGX_OPC_V1INT_L, TILEGX_OPC_V1INT_L, TILEGX_OPC_V1INT_L,
+  TILEGX_OPC_V1INT_L, TILEGX_OPC_V1INT_L,
+  BITFIELD(43, 3) /* index 978 */,
+  TILEGX_OPC_NONE, TILEGX_OPC_DRAIN, TILEGX_OPC_DTLBPR, TILEGX_OPC_FINV,
+  TILEGX_OPC_FLUSHWB, TILEGX_OPC_FLUSH, TILEGX_OPC_FNOP, TILEGX_OPC_ICOH,
+  BITFIELD(43, 3) /* index 987 */,
+  CHILD(996), TILEGX_OPC_INV, TILEGX_OPC_IRET, TILEGX_OPC_JALRP,
+  TILEGX_OPC_JALR, TILEGX_OPC_JRP, TILEGX_OPC_JR, CHILD(1051),
+  BITFIELD(31, 2) /* index 996 */,
+  CHILD(1001), CHILD(1026), TILEGX_OPC_ILL, TILEGX_OPC_ILL,
+  BITFIELD(33, 2) /* index 1001 */,
+  TILEGX_OPC_ILL, TILEGX_OPC_ILL, TILEGX_OPC_ILL, CHILD(1006),
+  BITFIELD(35, 2) /* index 1006 */,
+  TILEGX_OPC_ILL, CHILD(1011), TILEGX_OPC_ILL, TILEGX_OPC_ILL,
+  BITFIELD(37, 2) /* index 1011 */,
+  TILEGX_OPC_ILL, CHILD(1016), TILEGX_OPC_ILL, TILEGX_OPC_ILL,
+  BITFIELD(39, 2) /* index 1016 */,
+  TILEGX_OPC_ILL, CHILD(1021), TILEGX_OPC_ILL, TILEGX_OPC_ILL,
+  BITFIELD(41, 2) /* index 1021 */,
+  TILEGX_OPC_ILL, TILEGX_OPC_ILL, TILEGX_OPC_BPT, TILEGX_OPC_ILL,
+  BITFIELD(33, 2) /* index 1026 */,
+  TILEGX_OPC_ILL, TILEGX_OPC_ILL, TILEGX_OPC_ILL, CHILD(1031),
+  BITFIELD(35, 2) /* index 1031 */,
+  TILEGX_OPC_ILL, CHILD(1036), TILEGX_OPC_ILL, TILEGX_OPC_ILL,
+  BITFIELD(37, 2) /* index 1036 */,
+  TILEGX_OPC_ILL, CHILD(1041), TILEGX_OPC_ILL, TILEGX_OPC_ILL,
+  BITFIELD(39, 2) /* index 1041 */,
+  TILEGX_OPC_ILL, CHILD(1046), TILEGX_OPC_ILL, TILEGX_OPC_ILL,
+  BITFIELD(41, 2) /* index 1046 */,
+  TILEGX_OPC_ILL, TILEGX_OPC_ILL, TILEGX_OPC_RAISE, TILEGX_OPC_ILL,
+  BITFIELD(31, 2) /* index 1051 */,
+  TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, CHILD(1056),
+  BITFIELD(33, 2) /* index 1056 */,
+  TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, CHILD(1061),
+  BITFIELD(35, 2) /* index 1061 */,
+  TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, TILEGX_OPC_LD1S,
+  TILEGX_OPC_PREFETCH_L1_FAULT,
+  BITFIELD(43, 3) /* index 1066 */,
+  CHILD(1075), CHILD(1090), CHILD(1105), CHILD(1120), CHILD(1135),
+  TILEGX_OPC_LDNA, TILEGX_OPC_LDNT1S, TILEGX_OPC_LDNT1U,
+  BITFIELD(31, 2) /* index 1075 */,
+  TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, CHILD(1080),
+  BITFIELD(33, 2) /* index 1080 */,
+  TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, CHILD(1085),
+  BITFIELD(35, 2) /* index 1085 */,
+  TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, TILEGX_OPC_PREFETCH,
+  BITFIELD(31, 2) /* index 1090 */,
+  TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, CHILD(1095),
+  BITFIELD(33, 2) /* index 1095 */,
+  TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, CHILD(1100),
+  BITFIELD(35, 2) /* index 1100 */,
+  TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, TILEGX_OPC_LD2S,
+  TILEGX_OPC_PREFETCH_L2_FAULT,
+  BITFIELD(31, 2) /* index 1105 */,
+  TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, CHILD(1110),
+  BITFIELD(33, 2) /* index 1110 */,
+  TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, CHILD(1115),
+  BITFIELD(35, 2) /* index 1115 */,
+  TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, TILEGX_OPC_PREFETCH_L2,
+  BITFIELD(31, 2) /* index 1120 */,
+  TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, CHILD(1125),
+  BITFIELD(33, 2) /* index 1125 */,
+  TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, CHILD(1130),
+  BITFIELD(35, 2) /* index 1130 */,
+  TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, TILEGX_OPC_LD4S,
+  TILEGX_OPC_PREFETCH_L3_FAULT,
+  BITFIELD(31, 2) /* index 1135 */,
+  TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, CHILD(1140),
+  BITFIELD(33, 2) /* index 1140 */,
+  TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, CHILD(1145),
+  BITFIELD(35, 2) /* index 1145 */,
+  TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, TILEGX_OPC_PREFETCH_L3,
+  BITFIELD(43, 3) /* index 1150 */,
+  TILEGX_OPC_LDNT2S, TILEGX_OPC_LDNT2U, TILEGX_OPC_LDNT4S, TILEGX_OPC_LDNT4U,
+  TILEGX_OPC_LDNT, TILEGX_OPC_LD, TILEGX_OPC_LNK, TILEGX_OPC_MF,
+  BITFIELD(43, 3) /* index 1159 */,
+  TILEGX_OPC_NAP, TILEGX_OPC_NOP, TILEGX_OPC_SWINT0, TILEGX_OPC_SWINT1,
+  TILEGX_OPC_SWINT2, TILEGX_OPC_SWINT3, TILEGX_OPC_WH64, TILEGX_OPC_NONE,
+  BITFIELD(49, 4) /* index 1168 */,
+  TILEGX_OPC_V1MAXU, TILEGX_OPC_V1MINU, TILEGX_OPC_V1MNZ, TILEGX_OPC_V1MZ,
+  TILEGX_OPC_V1SHL, TILEGX_OPC_V1SHRS, TILEGX_OPC_V1SHRU, TILEGX_OPC_V1SUBUC,
+  TILEGX_OPC_V1SUB, TILEGX_OPC_V2ADDSC, TILEGX_OPC_V2ADD, TILEGX_OPC_V2CMPEQ,
+  TILEGX_OPC_V2CMPLES, TILEGX_OPC_V2CMPLEU, TILEGX_OPC_V2CMPLTS,
+  TILEGX_OPC_V2CMPLTU,
+  BITFIELD(49, 4) /* index 1185 */,
+  TILEGX_OPC_V2CMPNE, TILEGX_OPC_V2INT_H, TILEGX_OPC_V2INT_L,
+  TILEGX_OPC_V2MAXS, TILEGX_OPC_V2MINS, TILEGX_OPC_V2MNZ, TILEGX_OPC_V2MZ,
+  TILEGX_OPC_V2PACKH, TILEGX_OPC_V2PACKL, TILEGX_OPC_V2PACKUC,
+  TILEGX_OPC_V2SHLSC, TILEGX_OPC_V2SHL, TILEGX_OPC_V2SHRS, TILEGX_OPC_V2SHRU,
+  TILEGX_OPC_V2SUBSC, TILEGX_OPC_V2SUB,
+  BITFIELD(49, 4) /* index 1202 */,
+  TILEGX_OPC_V4ADDSC, TILEGX_OPC_V4ADD, TILEGX_OPC_V4INT_H,
+  TILEGX_OPC_V4INT_L, TILEGX_OPC_V4PACKSC, TILEGX_OPC_V4SHLSC,
+  TILEGX_OPC_V4SHL, TILEGX_OPC_V4SHRS, TILEGX_OPC_V4SHRU, TILEGX_OPC_V4SUBSC,
+  TILEGX_OPC_V4SUB, TILEGX_OPC_XOR, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  BITFIELD(49, 4) /* index 1219 */,
+  TILEGX_OPC_NONE, TILEGX_OPC_ROTLI, TILEGX_OPC_SHLI, TILEGX_OPC_SHLXI,
+  TILEGX_OPC_SHRSI, TILEGX_OPC_SHRUI, TILEGX_OPC_SHRUXI, TILEGX_OPC_V1SHLI,
+  TILEGX_OPC_V1SHRSI, TILEGX_OPC_V1SHRUI, TILEGX_OPC_V2SHLI,
+  TILEGX_OPC_V2SHRSI, TILEGX_OPC_V2SHRUI, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE,
+  BITFIELD(31, 2) /* index 1236 */,
+  TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI,
+  CHILD(1241),
+  BITFIELD(33, 2) /* index 1241 */,
+  TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI,
+  CHILD(1246),
+  BITFIELD(35, 2) /* index 1246 */,
+  TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI,
+  CHILD(1251),
+  BITFIELD(37, 2) /* index 1251 */,
+  TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI,
+  CHILD(1256),
+  BITFIELD(39, 2) /* index 1256 */,
+  TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI,
+  CHILD(1261),
+  BITFIELD(41, 2) /* index 1261 */,
+  TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI,
+  TILEGX_OPC_INFOL,
+};
+
+static const unsigned short decode_Y0_fsm[178] =
+{
+  BITFIELD(27, 4) /* index 0 */,
+  CHILD(17), TILEGX_OPC_ADDXI, CHILD(32), TILEGX_OPC_CMPEQI,
+  TILEGX_OPC_CMPLTSI, CHILD(62), CHILD(67), CHILD(118), CHILD(123),
+  CHILD(128), CHILD(133), CHILD(153), CHILD(158), CHILD(163), CHILD(168),
+  CHILD(173),
+  BITFIELD(6, 2) /* index 17 */,
+  TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(22),
+  BITFIELD(8, 2) /* index 22 */,
+  TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(27),
+  BITFIELD(10, 2) /* index 27 */,
+  TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_MOVEI,
+  BITFIELD(0, 2) /* index 32 */,
+  TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(37),
+  BITFIELD(2, 2) /* index 37 */,
+  TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(42),
+  BITFIELD(4, 2) /* index 42 */,
+  TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(47),
+  BITFIELD(6, 2) /* index 47 */,
+  TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(52),
+  BITFIELD(8, 2) /* index 52 */,
+  TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(57),
+  BITFIELD(10, 2) /* index 57 */,
+  TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_INFO,
+  BITFIELD(18, 2) /* index 62 */,
+  TILEGX_OPC_ADDX, TILEGX_OPC_ADD, TILEGX_OPC_SUBX, TILEGX_OPC_SUB,
+  BITFIELD(15, 5) /* index 67 */,
+  TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL1ADD,
+  TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL1ADD,
+  TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL2ADD,
+  TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL2ADD,
+  TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL2ADD,
+  TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHL3ADD,
+  TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHL3ADD,
+  TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHL3ADD, CHILD(100),
+  CHILD(109), TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  BITFIELD(12, 3) /* index 100 */,
+  TILEGX_OPC_NONE, TILEGX_OPC_CLZ, TILEGX_OPC_CTZ, TILEGX_OPC_FNOP,
+  TILEGX_OPC_FSINGLE_PACK1, TILEGX_OPC_NOP, TILEGX_OPC_PCNT,
+  TILEGX_OPC_REVBITS,
+  BITFIELD(12, 3) /* index 109 */,
+  TILEGX_OPC_REVBYTES, TILEGX_OPC_TBLIDXB0, TILEGX_OPC_TBLIDXB1,
+  TILEGX_OPC_TBLIDXB2, TILEGX_OPC_TBLIDXB3, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  TILEGX_OPC_NONE,
+  BITFIELD(18, 2) /* index 118 */,
+  TILEGX_OPC_CMPLES, TILEGX_OPC_CMPLEU, TILEGX_OPC_CMPLTS, TILEGX_OPC_CMPLTU,
+  BITFIELD(18, 2) /* index 123 */,
+  TILEGX_OPC_CMPEQ, TILEGX_OPC_CMPNE, TILEGX_OPC_MULAX, TILEGX_OPC_MULX,
+  BITFIELD(18, 2) /* index 128 */,
+  TILEGX_OPC_CMOVEQZ, TILEGX_OPC_CMOVNEZ, TILEGX_OPC_MNZ, TILEGX_OPC_MZ,
+  BITFIELD(18, 2) /* index 133 */,
+  TILEGX_OPC_AND, TILEGX_OPC_NOR, CHILD(138), TILEGX_OPC_XOR,
+  BITFIELD(12, 2) /* index 138 */,
+  TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(143),
+  BITFIELD(14, 2) /* index 143 */,
+  TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(148),
+  BITFIELD(16, 2) /* index 148 */,
+  TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_MOVE,
+  BITFIELD(18, 2) /* index 153 */,
+  TILEGX_OPC_ROTL, TILEGX_OPC_SHL, TILEGX_OPC_SHRS, TILEGX_OPC_SHRU,
+  BITFIELD(18, 2) /* index 158 */,
+  TILEGX_OPC_NONE, TILEGX_OPC_SHL1ADDX, TILEGX_OPC_SHL2ADDX,
+  TILEGX_OPC_SHL3ADDX,
+  BITFIELD(18, 2) /* index 163 */,
+  TILEGX_OPC_MUL_HS_HS, TILEGX_OPC_MUL_HU_HU, TILEGX_OPC_MUL_LS_LS,
+  TILEGX_OPC_MUL_LU_LU,
+  BITFIELD(18, 2) /* index 168 */,
+  TILEGX_OPC_MULA_HS_HS, TILEGX_OPC_MULA_HU_HU, TILEGX_OPC_MULA_LS_LS,
+  TILEGX_OPC_MULA_LU_LU,
+  BITFIELD(18, 2) /* index 173 */,
+  TILEGX_OPC_ROTLI, TILEGX_OPC_SHLI, TILEGX_OPC_SHRSI, TILEGX_OPC_SHRUI,
+};
+
+static const unsigned short decode_Y1_fsm[167] =
+{
+  BITFIELD(58, 4) /* index 0 */,
+  TILEGX_OPC_NONE, CHILD(17), TILEGX_OPC_ADDXI, CHILD(32), TILEGX_OPC_CMPEQI,
+  TILEGX_OPC_CMPLTSI, CHILD(62), CHILD(67), CHILD(117), CHILD(122),
+  CHILD(127), CHILD(132), CHILD(152), CHILD(157), CHILD(162), TILEGX_OPC_NONE,
+  BITFIELD(37, 2) /* index 17 */,
+  TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(22),
+  BITFIELD(39, 2) /* index 22 */,
+  TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(27),
+  BITFIELD(41, 2) /* index 27 */,
+  TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_MOVEI,
+  BITFIELD(31, 2) /* index 32 */,
+  TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(37),
+  BITFIELD(33, 2) /* index 37 */,
+  TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(42),
+  BITFIELD(35, 2) /* index 42 */,
+  TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(47),
+  BITFIELD(37, 2) /* index 47 */,
+  TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(52),
+  BITFIELD(39, 2) /* index 52 */,
+  TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(57),
+  BITFIELD(41, 2) /* index 57 */,
+  TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_INFO,
+  BITFIELD(49, 2) /* index 62 */,
+  TILEGX_OPC_ADDX, TILEGX_OPC_ADD, TILEGX_OPC_SUBX, TILEGX_OPC_SUB,
+  BITFIELD(47, 4) /* index 67 */,
+  TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL1ADD,
+  TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL2ADD,
+  TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL3ADD,
+  TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHL3ADD, CHILD(84),
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE,
+  BITFIELD(43, 3) /* index 84 */,
+  CHILD(93), CHILD(96), CHILD(99), CHILD(102), CHILD(105), CHILD(108),
+  CHILD(111), CHILD(114),
+  BITFIELD(46, 1) /* index 93 */,
+  TILEGX_OPC_NONE, TILEGX_OPC_FNOP,
+  BITFIELD(46, 1) /* index 96 */,
+  TILEGX_OPC_NONE, TILEGX_OPC_ILL,
+  BITFIELD(46, 1) /* index 99 */,
+  TILEGX_OPC_NONE, TILEGX_OPC_JALRP,
+  BITFIELD(46, 1) /* index 102 */,
+  TILEGX_OPC_NONE, TILEGX_OPC_JALR,
+  BITFIELD(46, 1) /* index 105 */,
+  TILEGX_OPC_NONE, TILEGX_OPC_JRP,
+  BITFIELD(46, 1) /* index 108 */,
+  TILEGX_OPC_NONE, TILEGX_OPC_JR,
+  BITFIELD(46, 1) /* index 111 */,
+  TILEGX_OPC_NONE, TILEGX_OPC_LNK,
+  BITFIELD(46, 1) /* index 114 */,
+  TILEGX_OPC_NONE, TILEGX_OPC_NOP,
+  BITFIELD(49, 2) /* index 117 */,
+  TILEGX_OPC_CMPLES, TILEGX_OPC_CMPLEU, TILEGX_OPC_CMPLTS, TILEGX_OPC_CMPLTU,
+  BITFIELD(49, 2) /* index 122 */,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_CMPEQ, TILEGX_OPC_CMPNE,
+  BITFIELD(49, 2) /* index 127 */,
+  TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_MNZ, TILEGX_OPC_MZ,
+  BITFIELD(49, 2) /* index 132 */,
+  TILEGX_OPC_AND, TILEGX_OPC_NOR, CHILD(137), TILEGX_OPC_XOR,
+  BITFIELD(43, 2) /* index 137 */,
+  TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(142),
+  BITFIELD(45, 2) /* index 142 */,
+  TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(147),
+  BITFIELD(47, 2) /* index 147 */,
+  TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_MOVE,
+  BITFIELD(49, 2) /* index 152 */,
+  TILEGX_OPC_ROTL, TILEGX_OPC_SHL, TILEGX_OPC_SHRS, TILEGX_OPC_SHRU,
+  BITFIELD(49, 2) /* index 157 */,
+  TILEGX_OPC_NONE, TILEGX_OPC_SHL1ADDX, TILEGX_OPC_SHL2ADDX,
+  TILEGX_OPC_SHL3ADDX,
+  BITFIELD(49, 2) /* index 162 */,
+  TILEGX_OPC_ROTLI, TILEGX_OPC_SHLI, TILEGX_OPC_SHRSI, TILEGX_OPC_SHRUI,
+};
+
+static const unsigned short decode_Y2_fsm[118] =
+{
+  BITFIELD(62, 2) /* index 0 */,
+  TILEGX_OPC_NONE, CHILD(5), CHILD(66), CHILD(109),
+  BITFIELD(55, 3) /* index 5 */,
+  CHILD(14), CHILD(14), CHILD(14), CHILD(17), CHILD(40), CHILD(40), CHILD(40),
+  CHILD(43),
+  BITFIELD(26, 1) /* index 14 */,
+  TILEGX_OPC_LD1S, TILEGX_OPC_LD1U,
+  BITFIELD(26, 1) /* index 17 */,
+  CHILD(20), CHILD(30),
+  BITFIELD(51, 2) /* index 20 */,
+  TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, CHILD(25),
+  BITFIELD(53, 2) /* index 25 */,
+  TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, TILEGX_OPC_LD1S,
+  TILEGX_OPC_PREFETCH_L1_FAULT,
+  BITFIELD(51, 2) /* index 30 */,
+  TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, CHILD(35),
+  BITFIELD(53, 2) /* index 35 */,
+  TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, TILEGX_OPC_PREFETCH,
+  BITFIELD(26, 1) /* index 40 */,
+  TILEGX_OPC_LD2S, TILEGX_OPC_LD2U,
+  BITFIELD(26, 1) /* index 43 */,
+  CHILD(46), CHILD(56),
+  BITFIELD(51, 2) /* index 46 */,
+  TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, CHILD(51),
+  BITFIELD(53, 2) /* index 51 */,
+  TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, TILEGX_OPC_LD2S,
+  TILEGX_OPC_PREFETCH_L2_FAULT,
+  BITFIELD(51, 2) /* index 56 */,
+  TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, CHILD(61),
+  BITFIELD(53, 2) /* index 61 */,
+  TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, TILEGX_OPC_PREFETCH_L2,
+  BITFIELD(56, 2) /* index 66 */,
+  CHILD(71), CHILD(74), CHILD(90), CHILD(93),
+  BITFIELD(26, 1) /* index 71 */,
+  TILEGX_OPC_NONE, TILEGX_OPC_LD4S,
+  BITFIELD(26, 1) /* index 74 */,
+  TILEGX_OPC_NONE, CHILD(77),
+  BITFIELD(51, 2) /* index 77 */,
+  TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, CHILD(82),
+  BITFIELD(53, 2) /* index 82 */,
+  TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, CHILD(87),
+  BITFIELD(55, 1) /* index 87 */,
+  TILEGX_OPC_LD4S, TILEGX_OPC_PREFETCH_L3_FAULT,
+  BITFIELD(26, 1) /* index 90 */,
+  TILEGX_OPC_LD4U, TILEGX_OPC_LD,
+  BITFIELD(26, 1) /* index 93 */,
+  CHILD(96), TILEGX_OPC_LD,
+  BITFIELD(51, 2) /* index 96 */,
+  TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, CHILD(101),
+  BITFIELD(53, 2) /* index 101 */,
+  TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, CHILD(106),
+  BITFIELD(55, 1) /* index 106 */,
+  TILEGX_OPC_LD4U, TILEGX_OPC_PREFETCH_L3,
+  BITFIELD(26, 1) /* index 109 */,
+  CHILD(112), CHILD(115),
+  BITFIELD(57, 1) /* index 112 */,
+  TILEGX_OPC_ST1, TILEGX_OPC_ST4,
+  BITFIELD(57, 1) /* index 115 */,
+  TILEGX_OPC_ST2, TILEGX_OPC_ST,
+};
+
+#undef BITFIELD
+#undef CHILD
+
+const unsigned short * const
+tilegx_bundle_decoder_fsms[TILEGX_NUM_PIPELINE_ENCODINGS] =
+{
+  decode_X0_fsm,
+  decode_X1_fsm,
+  decode_Y0_fsm,
+  decode_Y1_fsm,
+  decode_Y2_fsm
+};
+
+const struct tilegx_operand tilegx_operands[35] =
+{
+  {
+    TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_IMM8_X0),
+    8, 1, 0, 0, 0, 0,
+    create_Imm8_X0, get_Imm8_X0
+  },
+  {
+    TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_IMM8_X1),
+    8, 1, 0, 0, 0, 0,
+    create_Imm8_X1, get_Imm8_X1
+  },
+  {
+    TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_IMM8_Y0),
+    8, 1, 0, 0, 0, 0,
+    create_Imm8_Y0, get_Imm8_Y0
+  },
+  {
+    TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_IMM8_Y1),
+    8, 1, 0, 0, 0, 0,
+    create_Imm8_Y1, get_Imm8_Y1
+  },
+  {
+    TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_IMM16_X0_HW0_LAST),
+    16, 1, 0, 0, 0, 0,
+    create_Imm16_X0, get_Imm16_X0
+  },
+  {
+    TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_IMM16_X1_HW0_LAST),
+    16, 1, 0, 0, 0, 0,
+    create_Imm16_X1, get_Imm16_X1
+  },
+  {
+    TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+    6, 0, 0, 1, 0, 0,
+    create_Dest_X1, get_Dest_X1
+  },
+  {
+    TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+    6, 0, 1, 0, 0, 0,
+    create_SrcA_X1, get_SrcA_X1
+  },
+  {
+    TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+    6, 0, 0, 1, 0, 0,
+    create_Dest_X0, get_Dest_X0
+  },
+  {
+    TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+    6, 0, 1, 0, 0, 0,
+    create_SrcA_X0, get_SrcA_X0
+  },
+  {
+    TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+    6, 0, 0, 1, 0, 0,
+    create_Dest_Y0, get_Dest_Y0
+  },
+  {
+    TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+    6, 0, 1, 0, 0, 0,
+    create_SrcA_Y0, get_SrcA_Y0
+  },
+  {
+    TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+    6, 0, 0, 1, 0, 0,
+    create_Dest_Y1, get_Dest_Y1
+  },
+  {
+    TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+    6, 0, 1, 0, 0, 0,
+    create_SrcA_Y1, get_SrcA_Y1
+  },
+  {
+    TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+    6, 0, 1, 0, 0, 0,
+    create_SrcA_Y2, get_SrcA_Y2
+  },
+  {
+    TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+    6, 0, 1, 1, 0, 0,
+    create_SrcA_X1, get_SrcA_X1
+  },
+  {
+    TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+    6, 0, 1, 0, 0, 0,
+    create_SrcB_X0, get_SrcB_X0
+  },
+  {
+    TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+    6, 0, 1, 0, 0, 0,
+    create_SrcB_X1, get_SrcB_X1
+  },
+  {
+    TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+    6, 0, 1, 0, 0, 0,
+    create_SrcB_Y0, get_SrcB_Y0
+  },
+  {
+    TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+    6, 0, 1, 0, 0, 0,
+    create_SrcB_Y1, get_SrcB_Y1
+  },
+  {
+    TILEGX_OP_TYPE_ADDRESS, BFD_RELOC(TILEGX_BROFF_X1),
+    17, 1, 0, 0, 1, TILEGX_LOG2_BUNDLE_ALIGNMENT_IN_BYTES,
+    create_BrOff_X1, get_BrOff_X1
+  },
+  {
+    TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_MMSTART_X0),
+    6, 0, 0, 0, 0, 0,
+    create_BFStart_X0, get_BFStart_X0
+  },
+  {
+    TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_MMEND_X0),
+    6, 0, 0, 0, 0, 0,
+    create_BFEnd_X0, get_BFEnd_X0
+  },
+  {
+    TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+    6, 0, 1, 1, 0, 0,
+    create_Dest_X0, get_Dest_X0
+  },
+  {
+    TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+    6, 0, 1, 1, 0, 0,
+    create_Dest_Y0, get_Dest_Y0
+  },
+  {
+    TILEGX_OP_TYPE_ADDRESS, BFD_RELOC(TILEGX_JUMPOFF_X1),
+    27, 1, 0, 0, 1, TILEGX_LOG2_BUNDLE_ALIGNMENT_IN_BYTES,
+    create_JumpOff_X1, get_JumpOff_X1
+  },
+  {
+    TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+    6, 0, 0, 1, 0, 0,
+    create_SrcBDest_Y2, get_SrcBDest_Y2
+  },
+  {
+    TILEGX_OP_TYPE_SPR, BFD_RELOC(TILEGX_MF_IMM14_X1),
+    14, 0, 0, 0, 0, 0,
+    create_MF_Imm14_X1, get_MF_Imm14_X1
+  },
+  {
+    TILEGX_OP_TYPE_SPR, BFD_RELOC(TILEGX_MT_IMM14_X1),
+    14, 0, 0, 0, 0, 0,
+    create_MT_Imm14_X1, get_MT_Imm14_X1
+  },
+  {
+    TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_SHAMT_X0),
+    6, 0, 0, 0, 0, 0,
+    create_ShAmt_X0, get_ShAmt_X0
+  },
+  {
+    TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_SHAMT_X1),
+    6, 0, 0, 0, 0, 0,
+    create_ShAmt_X1, get_ShAmt_X1
+  },
+  {
+    TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_SHAMT_Y0),
+    6, 0, 0, 0, 0, 0,
+    create_ShAmt_Y0, get_ShAmt_Y0
+  },
+  {
+    TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_SHAMT_Y1),
+    6, 0, 0, 0, 0, 0,
+    create_ShAmt_Y1, get_ShAmt_Y1
+  },
+  {
+    TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE),
+    6, 0, 1, 0, 0, 0,
+    create_SrcBDest_Y2, get_SrcBDest_Y2
+  },
+  {
+    TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_DEST_IMM8_X1),
+    8, 1, 0, 0, 0, 0,
+    create_Dest_Imm8_X1, get_Dest_Imm8_X1
+  }
+};
+
+/* Given a set of bundle bits and a specific pipe, returns which
+ * instruction the bundle contains in that pipe.
+ */
+const struct tilegx_opcode *
+find_opcode(tilegx_bundle_bits bits, tilegx_pipeline pipe)
+{
+  const unsigned short *table = tilegx_bundle_decoder_fsms[pipe];
+  int index = 0;
+
+  while (1)
+  {
+    unsigned short bitspec = table[index];
+    unsigned int bitfield =
+      ((unsigned int)(bits >> (bitspec & 63))) & (bitspec >> 6);
+
+    unsigned short next = table[index + 1 + bitfield];
+    if (next <= TILEGX_OPC_NONE)
+      return &tilegx_opcodes[next];
+
+    index = next - TILEGX_OPC_NONE;
+  }
+}
+
+int
+parse_insn_tilegx(tilegx_bundle_bits bits,
+                  unsigned long long pc,
+                  struct tilegx_decoded_instruction
+                  decoded[TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE])
+{
+  int num_instructions = 0;
+  int pipe;
+
+  int min_pipe, max_pipe;
+  if ((bits & TILEGX_BUNDLE_MODE_MASK) == 0)
+  {
+    min_pipe = TILEGX_PIPELINE_X0;
+    max_pipe = TILEGX_PIPELINE_X1;
+  }
+  else
+  {
+    min_pipe = TILEGX_PIPELINE_Y0;
+    max_pipe = TILEGX_PIPELINE_Y2;
+  }
+
+  /* For each pipe, find an instruction that fits. */
+  for (pipe = min_pipe; pipe <= max_pipe; pipe++)
+  {
+    const struct tilegx_opcode *opc;
+    struct tilegx_decoded_instruction *d;
+    int i;
+
+    d = &decoded[num_instructions++];
+    opc = find_opcode (bits, (tilegx_pipeline)pipe);
+    d->opcode = opc;
+
+    /* Decode each operand, sign extending, etc. as appropriate. */
+    for (i = 0; i < opc->num_operands; i++)
+    {
+      const struct tilegx_operand *op =
+        &tilegx_operands[opc->operands[pipe][i]];
+      int raw_opval = op->extract (bits);
+      long long opval;
+
+      if (op->is_signed)
+      {
+        /* Sign-extend the operand. */
+        int shift = (int)((sizeof(int) * 8) - op->num_bits);
+        raw_opval = (raw_opval << shift) >> shift;
+      }
+
+      /* Adjust PC-relative scaled branch offsets. */
+      if (op->type == TILEGX_OP_TYPE_ADDRESS)
+        opval = (raw_opval * TILEGX_BUNDLE_SIZE_IN_BYTES) + pc;
+      else
+        opval = raw_opval;
+
+      /* Record the final value. */
+      d->operands[i] = op;
+      d->operand_values[i] = opval;
+    }
+  }
+
+  return num_instructions;
+}
+
+struct tilegx_spr
+{
+  /* The number */
+  int number;
+
+  /* The name */
+  const char *name;
+};
+
+static int
+tilegx_spr_compare (const void *a_ptr, const void *b_ptr)
+{
+  const struct tilegx_spr *a = (const struct tilegx_spr *) a_ptr;
+  const struct tilegx_spr *b = (const struct tilegx_spr *) b_ptr;
+  return (a->number - b->number);
+}
+
+const struct tilegx_spr tilegx_sprs[] = {
+  { 0, "MPL_MEM_ERROR_SET_0" },
+  { 1, "MPL_MEM_ERROR_SET_1" },
+  { 2, "MPL_MEM_ERROR_SET_2" },
+  { 3, "MPL_MEM_ERROR_SET_3" },
+  { 4, "MPL_MEM_ERROR" },
+  { 5, "MEM_ERROR_CBOX_ADDR" },
+  { 6, "MEM_ERROR_CBOX_STATUS" },
+  { 7, "MEM_ERROR_ENABLE" },
+  { 8, "MEM_ERROR_MBOX_ADDR" },
+  { 9, "MEM_ERROR_MBOX_STATUS" },
+  { 10, "SBOX_ERROR" },
+  { 11, "XDN_DEMUX_ERROR" },
+  { 256, "MPL_SINGLE_STEP_3_SET_0" },
+  { 257, "MPL_SINGLE_STEP_3_SET_1" },
+  { 258, "MPL_SINGLE_STEP_3_SET_2" },
+  { 259, "MPL_SINGLE_STEP_3_SET_3" },
+  { 260, "MPL_SINGLE_STEP_3" },
+  { 261, "SINGLE_STEP_CONTROL_3" },
+  { 512, "MPL_SINGLE_STEP_2_SET_0" },
+  { 513, "MPL_SINGLE_STEP_2_SET_1" },
+  { 514, "MPL_SINGLE_STEP_2_SET_2" },
+  { 515, "MPL_SINGLE_STEP_2_SET_3" },
+  { 516, "MPL_SINGLE_STEP_2" },
+  { 517, "SINGLE_STEP_CONTROL_2" },
+  { 768, "MPL_SINGLE_STEP_1_SET_0" },
+  { 769, "MPL_SINGLE_STEP_1_SET_1" },
+  { 770, "MPL_SINGLE_STEP_1_SET_2" },
+  { 771, "MPL_SINGLE_STEP_1_SET_3" },
+  { 772, "MPL_SINGLE_STEP_1" },
+  { 773, "SINGLE_STEP_CONTROL_1" },
+  { 1024, "MPL_SINGLE_STEP_0_SET_0" },
+  { 1025, "MPL_SINGLE_STEP_0_SET_1" },
+  { 1026, "MPL_SINGLE_STEP_0_SET_2" },
+  { 1027, "MPL_SINGLE_STEP_0_SET_3" },
+  { 1028, "MPL_SINGLE_STEP_0" },
+  { 1029, "SINGLE_STEP_CONTROL_0" },
+  { 1280, "MPL_IDN_COMPLETE_SET_0" },
+  { 1281, "MPL_IDN_COMPLETE_SET_1" },
+  { 1282, "MPL_IDN_COMPLETE_SET_2" },
+  { 1283, "MPL_IDN_COMPLETE_SET_3" },
+  { 1284, "MPL_IDN_COMPLETE" },
+  { 1285, "IDN_COMPLETE_PENDING" },
+  { 1536, "MPL_UDN_COMPLETE_SET_0" },
+  { 1537, "MPL_UDN_COMPLETE_SET_1" },
+  { 1538, "MPL_UDN_COMPLETE_SET_2" },
+  { 1539, "MPL_UDN_COMPLETE_SET_3" },
+  { 1540, "MPL_UDN_COMPLETE" },
+  { 1541, "UDN_COMPLETE_PENDING" },
+  { 1792, "MPL_ITLB_MISS_SET_0" },
+  { 1793, "MPL_ITLB_MISS_SET_1" },
+  { 1794, "MPL_ITLB_MISS_SET_2" },
+  { 1795, "MPL_ITLB_MISS_SET_3" },
+  { 1796, "MPL_ITLB_MISS" },
+  { 1797, "ITLB_TSB_BASE_ADDR_0" },
+  { 1798, "ITLB_TSB_BASE_ADDR_1" },
+  { 1920, "ITLB_CURRENT_ATTR" },
+  { 1921, "ITLB_CURRENT_PA" },
+  { 1922, "ITLB_CURRENT_VA" },
+  { 1923, "ITLB_INDEX" },
+  { 1924, "ITLB_MATCH_0" },
+  { 1925, "ITLB_PERF" },
+  { 1926, "ITLB_PR" },
+  { 1927, "ITLB_TSB_ADDR_0" },
+  { 1928, "ITLB_TSB_ADDR_1" },
+  { 1929, "ITLB_TSB_FILL_CURRENT_ATTR" },
+  { 1930, "ITLB_TSB_FILL_MATCH" },
+  { 1931, "NUMBER_ITLB" },
+  { 1932, "REPLACEMENT_ITLB" },
+  { 1933, "WIRED_ITLB" },
+  { 2048, "MPL_ILL_SET_0" },
+  { 2049, "MPL_ILL_SET_1" },
+  { 2050, "MPL_ILL_SET_2" },
+  { 2051, "MPL_ILL_SET_3" },
+  { 2052, "MPL_ILL" },
+  { 2304, "MPL_GPV_SET_0" },
+  { 2305, "MPL_GPV_SET_1" },
+  { 2306, "MPL_GPV_SET_2" },
+  { 2307, "MPL_GPV_SET_3" },
+  { 2308, "MPL_GPV" },
+  { 2309, "GPV_REASON" },
+  { 2560, "MPL_IDN_ACCESS_SET_0" },
+  { 2561, "MPL_IDN_ACCESS_SET_1" },
+  { 2562, "MPL_IDN_ACCESS_SET_2" },
+  { 2563, "MPL_IDN_ACCESS_SET_3" },
+  { 2564, "MPL_IDN_ACCESS" },
+  { 2565, "IDN_DEMUX_COUNT_0" },
+  { 2566, "IDN_DEMUX_COUNT_1" },
+  { 2567, "IDN_FLUSH_EGRESS" },
+  { 2568, "IDN_PENDING" },
+  { 2569, "IDN_ROUTE_ORDER" },
+  { 2570, "IDN_SP_FIFO_CNT" },
+  { 2688, "IDN_DATA_AVAIL" },
+  { 2816, "MPL_UDN_ACCESS_SET_0" },
+  { 2817, "MPL_UDN_ACCESS_SET_1" },
+  { 2818, "MPL_UDN_ACCESS_SET_2" },
+  { 2819, "MPL_UDN_ACCESS_SET_3" },
+  { 2820, "MPL_UDN_ACCESS" },
+  { 2821, "UDN_DEMUX_COUNT_0" },
+  { 2822, "UDN_DEMUX_COUNT_1" },
+  { 2823, "UDN_DEMUX_COUNT_2" },
+  { 2824, "UDN_DEMUX_COUNT_3" },
+  { 2825, "UDN_FLUSH_EGRESS" },
+  { 2826, "UDN_PENDING" },
+  { 2827, "UDN_ROUTE_ORDER" },
+  { 2828, "UDN_SP_FIFO_CNT" },
+  { 2944, "UDN_DATA_AVAIL" },
+  { 3072, "MPL_SWINT_3_SET_0" },
+  { 3073, "MPL_SWINT_3_SET_1" },
+  { 3074, "MPL_SWINT_3_SET_2" },
+  { 3075, "MPL_SWINT_3_SET_3" },
+  { 3076, "MPL_SWINT_3" },
+  { 3328, "MPL_SWINT_2_SET_0" },
+  { 3329, "MPL_SWINT_2_SET_1" },
+  { 3330, "MPL_SWINT_2_SET_2" },
+  { 3331, "MPL_SWINT_2_SET_3" },
+  { 3332, "MPL_SWINT_2" },
+  { 3584, "MPL_SWINT_1_SET_0" },
+  { 3585, "MPL_SWINT_1_SET_1" },
+  { 3586, "MPL_SWINT_1_SET_2" },
+  { 3587, "MPL_SWINT_1_SET_3" },
+  { 3588, "MPL_SWINT_1" },
+  { 3840, "MPL_SWINT_0_SET_0" },
+  { 3841, "MPL_SWINT_0_SET_1" },
+  { 3842, "MPL_SWINT_0_SET_2" },
+  { 3843, "MPL_SWINT_0_SET_3" },
+  { 3844, "MPL_SWINT_0" },
+  { 4096, "MPL_ILL_TRANS_SET_0" },
+  { 4097, "MPL_ILL_TRANS_SET_1" },
+  { 4098, "MPL_ILL_TRANS_SET_2" },
+  { 4099, "MPL_ILL_TRANS_SET_3" },
+  { 4100, "MPL_ILL_TRANS" },
+  { 4101, "ILL_TRANS_REASON" },
+  { 4102, "ILL_VA_PC" },
+  { 4352, "MPL_UNALIGN_DATA_SET_0" },
+  { 4353, "MPL_UNALIGN_DATA_SET_1" },
+  { 4354, "MPL_UNALIGN_DATA_SET_2" },
+  { 4355, "MPL_UNALIGN_DATA_SET_3" },
+  { 4356, "MPL_UNALIGN_DATA" },
+  { 4608, "MPL_DTLB_MISS_SET_0" },
+  { 4609, "MPL_DTLB_MISS_SET_1" },
+  { 4610, "MPL_DTLB_MISS_SET_2" },
+  { 4611, "MPL_DTLB_MISS_SET_3" },
+  { 4612, "MPL_DTLB_MISS" },
+  { 4613, "DTLB_TSB_BASE_ADDR_0" },
+  { 4614, "DTLB_TSB_BASE_ADDR_1" },
+  { 4736, "AAR" },
+  { 4737, "CACHE_PINNED_WAYS" },
+  { 4738, "DTLB_BAD_ADDR" },
+  { 4739, "DTLB_BAD_ADDR_REASON" },
+  { 4740, "DTLB_CURRENT_ATTR" },
+  { 4741, "DTLB_CURRENT_PA" },
+  { 4742, "DTLB_CURRENT_VA" },
+  { 4743, "DTLB_INDEX" },
+  { 4744, "DTLB_MATCH_0" },
+  { 4745, "DTLB_PERF" },
+  { 4746, "DTLB_TSB_ADDR_0" },
+  { 4747, "DTLB_TSB_ADDR_1" },
+  { 4748, "DTLB_TSB_FILL_CURRENT_ATTR" },
+  { 4749, "DTLB_TSB_FILL_MATCH" },
+  { 4750, "NUMBER_DTLB" },
+  { 4751, "REPLACEMENT_DTLB" },
+  { 4752, "WIRED_DTLB" },
+  { 4864, "MPL_DTLB_ACCESS_SET_0" },
+  { 4865, "MPL_DTLB_ACCESS_SET_1" },
+  { 4866, "MPL_DTLB_ACCESS_SET_2" },
+  { 4867, "MPL_DTLB_ACCESS_SET_3" },
+  { 4868, "MPL_DTLB_ACCESS" },
+  { 5120, "MPL_IDN_FIREWALL_SET_0" },
+  { 5121, "MPL_IDN_FIREWALL_SET_1" },
+  { 5122, "MPL_IDN_FIREWALL_SET_2" },
+  { 5123, "MPL_IDN_FIREWALL_SET_3" },
+  { 5124, "MPL_IDN_FIREWALL" },
+  { 5125, "IDN_DIRECTION_PROTECT" },
+  { 5376, "MPL_UDN_FIREWALL_SET_0" },
+  { 5377, "MPL_UDN_FIREWALL_SET_1" },
+  { 5378, "MPL_UDN_FIREWALL_SET_2" },
+  { 5379, "MPL_UDN_FIREWALL_SET_3" },
+  { 5380, "MPL_UDN_FIREWALL" },
+  { 5381, "UDN_DIRECTION_PROTECT" },
+  { 5632, "MPL_TILE_TIMER_SET_0" },
+  { 5633, "MPL_TILE_TIMER_SET_1" },
+  { 5634, "MPL_TILE_TIMER_SET_2" },
+  { 5635, "MPL_TILE_TIMER_SET_3" },
+  { 5636, "MPL_TILE_TIMER" },
+  { 5637, "TILE_TIMER_CONTROL" },
+  { 5888, "MPL_AUX_TILE_TIMER_SET_0" },
+  { 5889, "MPL_AUX_TILE_TIMER_SET_1" },
+  { 5890, "MPL_AUX_TILE_TIMER_SET_2" },
+  { 5891, "MPL_AUX_TILE_TIMER_SET_3" },
+  { 5892, "MPL_AUX_TILE_TIMER" },
+  { 5893, "AUX_TILE_TIMER_CONTROL" },
+  { 6144, "MPL_IDN_TIMER_SET_0" },
+  { 6145, "MPL_IDN_TIMER_SET_1" },
+  { 6146, "MPL_IDN_TIMER_SET_2" },
+  { 6147, "MPL_IDN_TIMER_SET_3" },
+  { 6148, "MPL_IDN_TIMER" },
+  { 6149, "IDN_DEADLOCK_COUNT" },
+  { 6150, "IDN_DEADLOCK_TIMEOUT" },
+  { 6400, "MPL_UDN_TIMER_SET_0" },
+  { 6401, "MPL_UDN_TIMER_SET_1" },
+  { 6402, "MPL_UDN_TIMER_SET_2" },
+  { 6403, "MPL_UDN_TIMER_SET_3" },
+  { 6404, "MPL_UDN_TIMER" },
+  { 6405, "UDN_DEADLOCK_COUNT" },
+  { 6406, "UDN_DEADLOCK_TIMEOUT" },
+  { 6656, "MPL_IDN_AVAIL_SET_0" },
+  { 6657, "MPL_IDN_AVAIL_SET_1" },
+  { 6658, "MPL_IDN_AVAIL_SET_2" },
+  { 6659, "MPL_IDN_AVAIL_SET_3" },
+  { 6660, "MPL_IDN_AVAIL" },
+  { 6661, "IDN_AVAIL_EN" },
+  { 6912, "MPL_UDN_AVAIL_SET_0" },
+  { 6913, "MPL_UDN_AVAIL_SET_1" },
+  { 6914, "MPL_UDN_AVAIL_SET_2" },
+  { 6915, "MPL_UDN_AVAIL_SET_3" },
+  { 6916, "MPL_UDN_AVAIL" },
+  { 6917, "UDN_AVAIL_EN" },
+  { 7168, "MPL_IPI_3_SET_0" },
+  { 7169, "MPL_IPI_3_SET_1" },
+  { 7170, "MPL_IPI_3_SET_2" },
+  { 7171, "MPL_IPI_3_SET_3" },
+  { 7172, "MPL_IPI_3" },
+  { 7173, "IPI_EVENT_3" },
+  { 7174, "IPI_EVENT_RESET_3" },
+  { 7175, "IPI_EVENT_SET_3" },
+  { 7176, "IPI_MASK_3" },
+  { 7177, "IPI_MASK_RESET_3" },
+  { 7178, "IPI_MASK_SET_3" },
+  { 7424, "MPL_IPI_2_SET_0" },
+  { 7425, "MPL_IPI_2_SET_1" },
+  { 7426, "MPL_IPI_2_SET_2" },
+  { 7427, "MPL_IPI_2_SET_3" },
+  { 7428, "MPL_IPI_2" },
+  { 7429, "IPI_EVENT_2" },
+  { 7430, "IPI_EVENT_RESET_2" },
+  { 7431, "IPI_EVENT_SET_2" },
+  { 7432, "IPI_MASK_2" },
+  { 7433, "IPI_MASK_RESET_2" },
+  { 7434, "IPI_MASK_SET_2" },
+  { 7680, "MPL_IPI_1_SET_0" },
+  { 7681, "MPL_IPI_1_SET_1" },
+  { 7682, "MPL_IPI_1_SET_2" },
+  { 7683, "MPL_IPI_1_SET_3" },
+  { 7684, "MPL_IPI_1" },
+  { 7685, "IPI_EVENT_1" },
+  { 7686, "IPI_EVENT_RESET_1" },
+  { 7687, "IPI_EVENT_SET_1" },
+  { 7688, "IPI_MASK_1" },
+  { 7689, "IPI_MASK_RESET_1" },
+  { 7690, "IPI_MASK_SET_1" },
+  { 7936, "MPL_IPI_0_SET_0" },
+  { 7937, "MPL_IPI_0_SET_1" },
+  { 7938, "MPL_IPI_0_SET_2" },
+  { 7939, "MPL_IPI_0_SET_3" },
+  { 7940, "MPL_IPI_0" },
+  { 7941, "IPI_EVENT_0" },
+  { 7942, "IPI_EVENT_RESET_0" },
+  { 7943, "IPI_EVENT_SET_0" },
+  { 7944, "IPI_MASK_0" },
+  { 7945, "IPI_MASK_RESET_0" },
+  { 7946, "IPI_MASK_SET_0" },
+  { 8192, "MPL_PERF_COUNT_SET_0" },
+  { 8193, "MPL_PERF_COUNT_SET_1" },
+  { 8194, "MPL_PERF_COUNT_SET_2" },
+  { 8195, "MPL_PERF_COUNT_SET_3" },
+  { 8196, "MPL_PERF_COUNT" },
+  { 8197, "PERF_COUNT_0" },
+  { 8198, "PERF_COUNT_1" },
+  { 8199, "PERF_COUNT_CTL" },
+  { 8200, "PERF_COUNT_DN_CTL" },
+  { 8201, "PERF_COUNT_STS" },
+  { 8202, "WATCH_MASK" },
+  { 8203, "WATCH_VAL" },
+  { 8448, "MPL_AUX_PERF_COUNT_SET_0" },
+  { 8449, "MPL_AUX_PERF_COUNT_SET_1" },
+  { 8450, "MPL_AUX_PERF_COUNT_SET_2" },
+  { 8451, "MPL_AUX_PERF_COUNT_SET_3" },
+  { 8452, "MPL_AUX_PERF_COUNT" },
+  { 8453, "AUX_PERF_COUNT_0" },
+  { 8454, "AUX_PERF_COUNT_1" },
+  { 8455, "AUX_PERF_COUNT_CTL" },
+  { 8456, "AUX_PERF_COUNT_STS" },
+  { 8704, "MPL_INTCTRL_3_SET_0" },
+  { 8705, "MPL_INTCTRL_3_SET_1" },
+  { 8706, "MPL_INTCTRL_3_SET_2" },
+  { 8707, "MPL_INTCTRL_3_SET_3" },
+  { 8708, "MPL_INTCTRL_3" },
+  { 8709, "INTCTRL_3_STATUS" },
+  { 8710, "INTERRUPT_MASK_3" },
+  { 8711, "INTERRUPT_MASK_RESET_3" },
+  { 8712, "INTERRUPT_MASK_SET_3" },
+  { 8713, "INTERRUPT_VECTOR_BASE_3" },
+  { 8714, "SINGLE_STEP_EN_0_3" },
+  { 8715, "SINGLE_STEP_EN_1_3" },
+  { 8716, "SINGLE_STEP_EN_2_3" },
+  { 8717, "SINGLE_STEP_EN_3_3" },
+  { 8832, "EX_CONTEXT_3_0" },
+  { 8833, "EX_CONTEXT_3_1" },
+  { 8834, "SYSTEM_SAVE_3_0" },
+  { 8835, "SYSTEM_SAVE_3_1" },
+  { 8836, "SYSTEM_SAVE_3_2" },
+  { 8837, "SYSTEM_SAVE_3_3" },
+  { 8960, "MPL_INTCTRL_2_SET_0" },
+  { 8961, "MPL_INTCTRL_2_SET_1" },
+  { 8962, "MPL_INTCTRL_2_SET_2" },
+  { 8963, "MPL_INTCTRL_2_SET_3" },
+  { 8964, "MPL_INTCTRL_2" },
+  { 8965, "INTCTRL_2_STATUS" },
+  { 8966, "INTERRUPT_MASK_2" },
+  { 8967, "INTERRUPT_MASK_RESET_2" },
+  { 8968, "INTERRUPT_MASK_SET_2" },
+  { 8969, "INTERRUPT_VECTOR_BASE_2" },
+  { 8970, "SINGLE_STEP_EN_0_2" },
+  { 8971, "SINGLE_STEP_EN_1_2" },
+  { 8972, "SINGLE_STEP_EN_2_2" },
+  { 8973, "SINGLE_STEP_EN_3_2" },
+  { 9088, "EX_CONTEXT_2_0" },
+  { 9089, "EX_CONTEXT_2_1" },
+  { 9090, "SYSTEM_SAVE_2_0" },
+  { 9091, "SYSTEM_SAVE_2_1" },
+  { 9092, "SYSTEM_SAVE_2_2" },
+  { 9093, "SYSTEM_SAVE_2_3" },
+  { 9216, "MPL_INTCTRL_1_SET_0" },
+  { 9217, "MPL_INTCTRL_1_SET_1" },
+  { 9218, "MPL_INTCTRL_1_SET_2" },
+  { 9219, "MPL_INTCTRL_1_SET_3" },
+  { 9220, "MPL_INTCTRL_1" },
+  { 9221, "INTCTRL_1_STATUS" },
+  { 9222, "INTERRUPT_MASK_1" },
+  { 9223, "INTERRUPT_MASK_RESET_1" },
+  { 9224, "INTERRUPT_MASK_SET_1" },
+  { 9225, "INTERRUPT_VECTOR_BASE_1" },
+  { 9226, "SINGLE_STEP_EN_0_1" },
+  { 9227, "SINGLE_STEP_EN_1_1" },
+  { 9228, "SINGLE_STEP_EN_2_1" },
+  { 9229, "SINGLE_STEP_EN_3_1" },
+  { 9344, "EX_CONTEXT_1_0" },
+  { 9345, "EX_CONTEXT_1_1" },
+  { 9346, "SYSTEM_SAVE_1_0" },
+  { 9347, "SYSTEM_SAVE_1_1" },
+  { 9348, "SYSTEM_SAVE_1_2" },
+  { 9349, "SYSTEM_SAVE_1_3" },
+  { 9472, "MPL_INTCTRL_0_SET_0" },
+  { 9473, "MPL_INTCTRL_0_SET_1" },
+  { 9474, "MPL_INTCTRL_0_SET_2" },
+  { 9475, "MPL_INTCTRL_0_SET_3" },
+  { 9476, "MPL_INTCTRL_0" },
+  { 9477, "INTCTRL_0_STATUS" },
+  { 9478, "INTERRUPT_MASK_0" },
+  { 9479, "INTERRUPT_MASK_RESET_0" },
+  { 9480, "INTERRUPT_MASK_SET_0" },
+  { 9481, "INTERRUPT_VECTOR_BASE_0" },
+  { 9482, "SINGLE_STEP_EN_0_0" },
+  { 9483, "SINGLE_STEP_EN_1_0" },
+  { 9484, "SINGLE_STEP_EN_2_0" },
+  { 9485, "SINGLE_STEP_EN_3_0" },
+  { 9600, "EX_CONTEXT_0_0" },
+  { 9601, "EX_CONTEXT_0_1" },
+  { 9602, "SYSTEM_SAVE_0_0" },
+  { 9603, "SYSTEM_SAVE_0_1" },
+  { 9604, "SYSTEM_SAVE_0_2" },
+  { 9605, "SYSTEM_SAVE_0_3" },
+  { 9728, "MPL_BOOT_ACCESS_SET_0" },
+  { 9729, "MPL_BOOT_ACCESS_SET_1" },
+  { 9730, "MPL_BOOT_ACCESS_SET_2" },
+  { 9731, "MPL_BOOT_ACCESS_SET_3" },
+  { 9732, "MPL_BOOT_ACCESS" },
+  { 9733, "BIG_ENDIAN_CONFIG" },
+  { 9734, "CACHE_INVALIDATION_COMPRESSION_MODE" },
+  { 9735, "CACHE_INVALIDATION_MASK_0" },
+  { 9736, "CACHE_INVALIDATION_MASK_1" },
+  { 9737, "CACHE_INVALIDATION_MASK_2" },
+  { 9738, "CBOX_CACHEASRAM_CONFIG" },
+  { 9739, "CBOX_CACHE_CONFIG" },
+  { 9740, "CBOX_HOME_MAP_ADDR" },
+  { 9741, "CBOX_HOME_MAP_DATA" },
+  { 9742, "CBOX_MMAP_0" },
+  { 9743, "CBOX_MMAP_1" },
+  { 9744, "CBOX_MMAP_2" },
+  { 9745, "CBOX_MMAP_3" },
+  { 9746, "CBOX_MSR" },
+  { 9747, "DIAG_BCST_CTL" },
+  { 9748, "DIAG_BCST_MASK" },
+  { 9749, "DIAG_BCST_TRIGGER" },
+  { 9750, "DIAG_MUX_CTL" },
+  { 9751, "DIAG_TRACE_CTL" },
+  { 9752, "DIAG_TRACE_DATA" },
+  { 9753, "DIAG_TRACE_STS" },
+  { 9754, "IDN_DEMUX_BUF_THRESH" },
+  { 9755, "L1_I_PIN_WAY_0" },
+  { 9756, "MEM_ROUTE_ORDER" },
+  { 9757, "MEM_STRIPE_CONFIG" },
+  { 9758, "PERF_COUNT_PLS" },
+  { 9759, "PSEUDO_RANDOM_NUMBER_MODIFY" },
+  { 9760, "QUIESCE_CTL" },
+  { 9761, "RSHIM_COORD" },
+  { 9762, "SBOX_CONFIG" },
+  { 9763, "UDN_DEMUX_BUF_THRESH" },
+  { 9764, "XDN_CORE_STARVATION_COUNT" },
+  { 9765, "XDN_ROUND_ROBIN_ARB_CTL" },
+  { 9856, "CYCLE_MODIFY" },
+  { 9857, "I_AAR" },
+  { 9984, "MPL_WORLD_ACCESS_SET_0" },
+  { 9985, "MPL_WORLD_ACCESS_SET_1" },
+  { 9986, "MPL_WORLD_ACCESS_SET_2" },
+  { 9987, "MPL_WORLD_ACCESS_SET_3" },
+  { 9988, "MPL_WORLD_ACCESS" },
+  { 9989, "DONE" },
+  { 9990, "DSTREAM_PF" },
+  { 9991, "FAIL" },
+  { 9992, "INTERRUPT_CRITICAL_SECTION" },
+  { 9993, "PASS" },
+  { 9994, "PSEUDO_RANDOM_NUMBER" },
+  { 9995, "TILE_COORD" },
+  { 9996, "TILE_RTF_HWM" },
+  { 10112, "CMPEXCH_VALUE" },
+  { 10113, "CYCLE" },
+  { 10114, "EVENT_BEGIN" },
+  { 10115, "EVENT_END" },
+  { 10116, "PROC_STATUS" },
+  { 10117, "SIM_CONTROL" },
+  { 10118, "SIM_SOCKET" },
+  { 10119, "STATUS_SATURATE" },
+  { 10240, "MPL_I_ASID_SET_0" },
+  { 10241, "MPL_I_ASID_SET_1" },
+  { 10242, "MPL_I_ASID_SET_2" },
+  { 10243, "MPL_I_ASID_SET_3" },
+  { 10244, "MPL_I_ASID" },
+  { 10245, "I_ASID" },
+  { 10496, "MPL_D_ASID_SET_0" },
+  { 10497, "MPL_D_ASID_SET_1" },
+  { 10498, "MPL_D_ASID_SET_2" },
+  { 10499, "MPL_D_ASID_SET_3" },
+  { 10500, "MPL_D_ASID" },
+  { 10501, "D_ASID" },
+  { 10752, "MPL_DOUBLE_FAULT_SET_0" },
+  { 10753, "MPL_DOUBLE_FAULT_SET_1" },
+  { 10754, "MPL_DOUBLE_FAULT_SET_2" },
+  { 10755, "MPL_DOUBLE_FAULT_SET_3" },
+  { 10756, "MPL_DOUBLE_FAULT" },
+  { 10757, "LAST_INTERRUPT_REASON" },
+};
+
+const int tilegx_num_sprs = 441;
+
+const char *
+get_tilegx_spr_name (int num)
+{
+  void *result;
+  struct tilegx_spr key;
+
+  key.number = num;
+  result = bsearch((const void *) &key, (const void *) tilegx_sprs,
+                   tilegx_num_sprs, sizeof (struct tilegx_spr),
+                   tilegx_spr_compare);
+
+  if (result == NULL)
+  {
+    return (NULL);
+  }
+  else
+  {
+    struct tilegx_spr *result_ptr = (struct tilegx_spr *) result;
+    return (result_ptr->name);
+  }
+}
+
+int
+print_insn_tilegx (unsigned char * memaddr)
+{
+  struct tilegx_decoded_instruction
+    decoded[TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE];
+  unsigned char opbuf[TILEGX_BUNDLE_SIZE_IN_BYTES];
+  int i, num_instructions, num_printed;
+  tilegx_mnemonic padding_mnemonic;
+
+  memcpy((void *)opbuf, (void *)memaddr, TILEGX_BUNDLE_SIZE_IN_BYTES);
+
+  /* Parse the instructions in the bundle. */
+  num_instructions =
+    parse_insn_tilegx (*(unsigned long long *)opbuf, (unsigned long long)memaddr, decoded);
+
+  /* Print the instructions in the bundle. */
+  printf("{ ");
+  num_printed = 0;
+
+  /* Determine which nop opcode is used for padding and should be skipped. */
+  padding_mnemonic = TILEGX_OPC_FNOP;
+  for (i = 0; i < num_instructions; i++)
+  {
+    if (!decoded[i].opcode->can_bundle)
+    {
+      /* Instructions that cannot be bundled are padded out with nops,
+         rather than fnops. Displaying them is always clutter. */
+      padding_mnemonic = TILEGX_OPC_NOP;
+      break;
+    }
+  }
+
+  for (i = 0; i < num_instructions; i++)
+  {
+    const struct tilegx_opcode *opcode = decoded[i].opcode;
+    const char *name;
+    int j;
+
+    /* Do not print out fnops, unless everything is an fnop, in
+       which case we will print out just the last one. */
+    if (opcode->mnemonic == padding_mnemonic
+        && (num_printed > 0 || i + 1 < num_instructions))
+      continue;
+
+    if (num_printed > 0)
+      printf(" ; ");
+    ++num_printed;
+
+    name = opcode->name;
+    if (name == NULL)
+      name = "<invalid>";
+    printf("%s", name);
+
+    for (j = 0; j < opcode->num_operands; j++)
+    {
+      unsigned long long num;
+      const struct tilegx_operand *op;
+      const char *spr_name;
+
+      if (j > 0)
+        printf (",");
+      printf (" ");
+
+      num = decoded[i].operand_values[j];
+
+      op = decoded[i].operands[j];
+      switch (op->type)
+      {
+      case TILEGX_OP_TYPE_REGISTER:
+        printf ("%s", tilegx_register_names[(int)num]);
+        break;
+      case TILEGX_OP_TYPE_SPR:
+        spr_name = get_tilegx_spr_name(num);
+        if (spr_name != NULL)
+          printf ("%s", spr_name);
+        else
+          printf ("%d", (int)num);
+        break;
+      case TILEGX_OP_TYPE_IMMEDIATE:
+        printf ("%d", (int)num);
+        break;
+      case TILEGX_OP_TYPE_ADDRESS:
+        printf ("0x%016llx", num);
+        break;
+      default:
+        abort ();
+      }
+    }
+  }
+  printf (" }\n");
+
+  return TILEGX_BUNDLE_SIZE_IN_BYTES;
+}
diff --git a/ext/pcre/pcrelib/sljit/sljitNativeTILEGX_64.c b/ext/pcre/pcrelib/sljit/sljitNativeTILEGX_64.c
new file mode 100644 (file)
index 0000000..1d6aa5a
--- /dev/null
@@ -0,0 +1,2574 @@
+/*
+ *    Stack-less Just-In-Time compiler
+ *
+ *    Copyright 2013-2013 Tilera Corporation(jiwang@tilera.com). All rights reserved.
+ *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice, this list of
+ *      conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *      of conditions and the following disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* TileGX architecture. */
+/* Contributed by Tilera Corporation. */
+#include "sljitNativeTILEGX-encoder.c"
+
+#define SIMM_8BIT_MAX (0x7f)
+#define SIMM_8BIT_MIN (-0x80)
+#define SIMM_16BIT_MAX (0x7fff)
+#define SIMM_16BIT_MIN (-0x8000)
+#define SIMM_17BIT_MAX (0xffff)
+#define SIMM_17BIT_MIN (-0x10000)
+#define SIMM_32BIT_MIN (-0x80000000)
+#define SIMM_32BIT_MAX (0x7fffffff)
+#define SIMM_48BIT_MIN (0x800000000000L)
+#define SIMM_48BIT_MAX (0x7fffffff0000L)
+#define IMM16(imm) ((imm) & 0xffff)
+
+#define UIMM_16BIT_MAX (0xffff)
+
+#define TMP_REG1 (SLJIT_NO_REGISTERS + 1)
+#define TMP_REG2 (SLJIT_NO_REGISTERS + 2)
+#define TMP_REG3 (SLJIT_NO_REGISTERS + 3)
+#define ADDR_TMP (SLJIT_NO_REGISTERS + 4)
+#define PIC_ADDR_REG TMP_REG2
+
+static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = {
+       63, 0, 1, 2, 3, 4, 30, 31, 32, 33, 34, 54, 5, 16, 6, 7
+};
+
+#define SLJIT_LOCALS_REG_mapped 54
+#define TMP_REG1_mapped 5
+#define TMP_REG2_mapped 16
+#define TMP_REG3_mapped 6
+#define ADDR_TMP_mapped 7
+#define SLJIT_SAVED_REG1_mapped 30
+#define SLJIT_SAVED_REG2_mapped 31
+#define SLJIT_SAVED_REG3_mapped 32
+#define SLJIT_SAVED_EREG1_mapped 33
+#define SLJIT_SAVED_EREG2_mapped 34
+
+/* Flags are keept in volatile registers. */
+#define EQUAL_FLAG 8
+/* And carry flag as well. */
+#define ULESS_FLAG 9
+#define UGREATER_FLAG 10
+#define LESS_FLAG 11
+#define GREATER_FLAG 12
+#define OVERFLOW_FLAG 13
+
+#define ZERO 63
+#define RA 55
+#define TMP_EREG1 14
+#define TMP_EREG2 15
+
+#define LOAD_DATA 0x01
+#define WORD_DATA 0x00
+#define BYTE_DATA 0x02
+#define HALF_DATA 0x04
+#define INT_DATA 0x06
+#define SIGNED_DATA 0x08
+#define DOUBLE_DATA 0x10
+
+/* Separates integer and floating point registers */
+#define GPR_REG 0xf
+
+#define MEM_MASK 0x1f
+
+#define WRITE_BACK 0x00020
+#define ARG_TEST 0x00040
+#define ALT_KEEP_CACHE 0x00080
+#define CUMULATIVE_OP 0x00100
+#define LOGICAL_OP 0x00200
+#define IMM_OP 0x00400
+#define SRC2_IMM 0x00800
+
+#define UNUSED_DEST 0x01000
+#define REG_DEST 0x02000
+#define REG1_SOURCE 0x04000
+#define REG2_SOURCE 0x08000
+#define SLOW_SRC1 0x10000
+#define SLOW_SRC2 0x20000
+#define SLOW_DEST 0x40000
+
+/* Only these flags are set. UNUSED_DEST is not set when no flags should be set.
+ */
+#define CHECK_FLAGS(list) (!(flags & UNUSED_DEST) || (op & GET_FLAGS(~(list))))
+
+SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char *sljit_get_platform_name(void)
+{
+       return "TileGX" SLJIT_CPUINFO;
+}
+
+/* Length of an instruction word */
+typedef sljit_uw sljit_ins;
+
+struct jit_instr {
+       const struct tilegx_opcode* opcode; 
+       tilegx_pipeline pipe;
+       unsigned long input_registers;
+       unsigned long output_registers;
+       int operand_value[4];
+       int line;
+};
+
+/* Opcode Helper Macros */
+#define TILEGX_X_MODE 0
+
+#define X_MODE create_Mode(TILEGX_X_MODE)
+
+#define FNOP_X0 \
+       create_Opcode_X0(RRR_0_OPCODE_X0) | \
+       create_RRROpcodeExtension_X0(UNARY_RRR_0_OPCODE_X0) | \
+       create_UnaryOpcodeExtension_X0(FNOP_UNARY_OPCODE_X0)
+
+#define FNOP_X1 \
+       create_Opcode_X1(RRR_0_OPCODE_X1) | \
+       create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \
+       create_UnaryOpcodeExtension_X1(FNOP_UNARY_OPCODE_X1)
+
+#define NOP \
+       create_Mode(TILEGX_X_MODE) | FNOP_X0 | FNOP_X1
+
+#define ANOP_X0 \
+       create_Opcode_X0(RRR_0_OPCODE_X0) | \
+       create_RRROpcodeExtension_X0(UNARY_RRR_0_OPCODE_X0) | \
+       create_UnaryOpcodeExtension_X0(NOP_UNARY_OPCODE_X0)
+
+#define BPT create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
+       create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \
+       create_UnaryOpcodeExtension_X1(ILL_UNARY_OPCODE_X1) | \
+       create_Dest_X1(0x1C) | create_SrcA_X1(0x25) | ANOP_X0
+
+#define ADD_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
+       create_RRROpcodeExtension_X1(ADD_RRR_0_OPCODE_X1) | FNOP_X0
+
+#define ADDI_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \
+       create_Imm8OpcodeExtension_X1(ADDI_IMM8_OPCODE_X1) | FNOP_X0
+
+#define SUB_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
+       create_RRROpcodeExtension_X1(SUB_RRR_0_OPCODE_X1) | FNOP_X0
+
+#define NOR_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
+       create_RRROpcodeExtension_X1(NOR_RRR_0_OPCODE_X1) | FNOP_X0
+
+#define OR_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
+       create_RRROpcodeExtension_X1(OR_RRR_0_OPCODE_X1) | FNOP_X0
+
+#define AND_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
+       create_RRROpcodeExtension_X1(AND_RRR_0_OPCODE_X1) | FNOP_X0
+
+#define XOR_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
+       create_RRROpcodeExtension_X1(XOR_RRR_0_OPCODE_X1) | FNOP_X0
+
+#define CMOVNEZ_X0 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X0(RRR_0_OPCODE_X0) | \
+       create_RRROpcodeExtension_X0(CMOVNEZ_RRR_0_OPCODE_X0) | FNOP_X1
+
+#define CMOVEQZ_X0 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X0(RRR_0_OPCODE_X0) | \
+       create_RRROpcodeExtension_X0(CMOVEQZ_RRR_0_OPCODE_X0) | FNOP_X1
+
+#define ADDLI_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(ADDLI_OPCODE_X1) | FNOP_X0
+
+#define V4INT_L_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
+       create_RRROpcodeExtension_X1(V4INT_L_RRR_0_OPCODE_X1) | FNOP_X0
+
+#define BFEXTU_X0 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X0(BF_OPCODE_X0) | \
+       create_BFOpcodeExtension_X0(BFEXTU_BF_OPCODE_X0) | FNOP_X1
+
+#define BFEXTS_X0 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X0(BF_OPCODE_X0) | \
+       create_BFOpcodeExtension_X0(BFEXTS_BF_OPCODE_X0) | FNOP_X1
+
+#define SHL16INSLI_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(SHL16INSLI_OPCODE_X1) | FNOP_X0
+
+#define ST_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
+       create_RRROpcodeExtension_X1(ST_RRR_0_OPCODE_X1) | create_Dest_X1(0x0) | FNOP_X0
+
+#define LD_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
+       create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \
+       create_UnaryOpcodeExtension_X1(LD_UNARY_OPCODE_X1) | FNOP_X0
+
+#define JR_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
+       create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \
+       create_UnaryOpcodeExtension_X1(JR_UNARY_OPCODE_X1) | FNOP_X0
+
+#define JALR_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
+       create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \
+       create_UnaryOpcodeExtension_X1(JALR_UNARY_OPCODE_X1) | FNOP_X0
+
+#define CLZ_X0 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X0(RRR_0_OPCODE_X0) | \
+       create_RRROpcodeExtension_X0(UNARY_RRR_0_OPCODE_X0) | \
+       create_UnaryOpcodeExtension_X0(CNTLZ_UNARY_OPCODE_X0) | FNOP_X1
+
+#define CMPLTUI_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \
+       create_Imm8OpcodeExtension_X1(CMPLTUI_IMM8_OPCODE_X1) | FNOP_X0
+
+#define CMPLTU_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
+       create_RRROpcodeExtension_X1(CMPLTU_RRR_0_OPCODE_X1) | FNOP_X0
+
+#define CMPLTS_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
+       create_RRROpcodeExtension_X1(CMPLTS_RRR_0_OPCODE_X1) | FNOP_X0
+
+#define XORI_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \
+       create_Imm8OpcodeExtension_X1(XORI_IMM8_OPCODE_X1) | FNOP_X0
+
+#define ORI_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \
+       create_Imm8OpcodeExtension_X1(ORI_IMM8_OPCODE_X1) | FNOP_X0
+
+#define ANDI_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \
+       create_Imm8OpcodeExtension_X1(ANDI_IMM8_OPCODE_X1) | FNOP_X0
+
+#define SHLI_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(SHIFT_OPCODE_X1) | \
+       create_ShiftOpcodeExtension_X1(SHLI_SHIFT_OPCODE_X1) | FNOP_X0
+
+#define SHL_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
+       create_RRROpcodeExtension_X1(SHL_RRR_0_OPCODE_X1) | FNOP_X0
+
+#define SHRSI_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(SHIFT_OPCODE_X1) | \
+       create_ShiftOpcodeExtension_X1(SHRSI_SHIFT_OPCODE_X1) | FNOP_X0
+
+#define SHRS_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
+       create_RRROpcodeExtension_X1(SHRS_RRR_0_OPCODE_X1) | FNOP_X0
+
+#define SHRUI_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(SHIFT_OPCODE_X1) | \
+       create_ShiftOpcodeExtension_X1(SHRUI_SHIFT_OPCODE_X1) | FNOP_X0
+
+#define SHRU_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
+       create_RRROpcodeExtension_X1(SHRU_RRR_0_OPCODE_X1) | FNOP_X0
+
+#define BEQZ_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(BRANCH_OPCODE_X1) | \
+       create_BrType_X1(BEQZ_BRANCH_OPCODE_X1) | FNOP_X0
+
+#define BNEZ_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(BRANCH_OPCODE_X1) | \
+       create_BrType_X1(BNEZ_BRANCH_OPCODE_X1) | FNOP_X0
+
+#define J_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(JUMP_OPCODE_X1) | \
+       create_JumpOpcodeExtension_X1(J_JUMP_OPCODE_X1) | FNOP_X0
+
+#define JAL_X1 \
+       create_Mode(TILEGX_X_MODE) | create_Opcode_X1(JUMP_OPCODE_X1) | \
+       create_JumpOpcodeExtension_X1(JAL_JUMP_OPCODE_X1) | FNOP_X0
+
+#define DEST_X0(x) create_Dest_X0(x)
+#define SRCA_X0(x) create_SrcA_X0(x)
+#define SRCB_X0(x) create_SrcB_X0(x)
+#define DEST_X1(x) create_Dest_X1(x)
+#define SRCA_X1(x) create_SrcA_X1(x)
+#define SRCB_X1(x) create_SrcB_X1(x)
+#define IMM16_X1(x) create_Imm16_X1(x)
+#define IMM8_X1(x) create_Imm8_X1(x)
+#define BFSTART_X0(x) create_BFStart_X0(x)
+#define BFEND_X0(x) create_BFEnd_X0(x)
+#define SHIFTIMM_X1(x) create_ShAmt_X1(x)
+#define JOFF_X1(x) create_JumpOff_X1(x)
+#define BOFF_X1(x) create_BrOff_X1(x)
+
+static SLJIT_CONST tilegx_mnemonic data_transfer_insts[16] = {
+       /* u w s */ TILEGX_OPC_ST   /* st */,
+       /* u w l */ TILEGX_OPC_LD   /* ld */,
+       /* u b s */ TILEGX_OPC_ST1  /* st1 */,
+       /* u b l */ TILEGX_OPC_LD1U /* ld1u */,
+       /* u h s */ TILEGX_OPC_ST2  /* st2 */,
+       /* u h l */ TILEGX_OPC_LD2U /* ld2u */,
+       /* u i s */ TILEGX_OPC_ST4  /* st4 */,
+       /* u i l */ TILEGX_OPC_LD4U /* ld4u */,
+       /* s w s */ TILEGX_OPC_ST   /* st */,
+       /* s w l */ TILEGX_OPC_LD   /* ld */,
+       /* s b s */ TILEGX_OPC_ST1  /* st1 */,
+       /* s b l */ TILEGX_OPC_LD1S /* ld1s */,
+       /* s h s */ TILEGX_OPC_ST2  /* st2 */,
+       /* s h l */ TILEGX_OPC_LD2S /* ld2s */,
+       /* s i s */ TILEGX_OPC_ST4  /* st4 */,
+       /* s i l */ TILEGX_OPC_LD4S /* ld4s */,
+};
+
+#ifdef TILEGX_JIT_DEBUG
+static sljit_si push_inst_debug(struct sljit_compiler *compiler, sljit_ins ins, int line)
+{
+       sljit_ins *ptr = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins));
+       FAIL_IF(!ptr);
+       *ptr = ins;
+       compiler->size++;
+       printf("|%04d|S0|:\t\t", line);
+       print_insn_tilegx(ptr);
+       return SLJIT_SUCCESS;
+}
+
+static sljit_si push_inst_nodebug(struct sljit_compiler *compiler, sljit_ins ins)
+{
+       sljit_ins *ptr = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins));
+       FAIL_IF(!ptr);
+       *ptr = ins;
+       compiler->size++;
+       return SLJIT_SUCCESS;
+}
+
+#define push_inst(a, b) push_inst_debug(a, b, __LINE__)
+#else
+static sljit_si push_inst(struct sljit_compiler *compiler, sljit_ins ins)
+{
+       sljit_ins *ptr = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins));
+       FAIL_IF(!ptr);
+       *ptr = ins;
+       compiler->size++;
+       return SLJIT_SUCCESS;
+}
+#endif
+
+#define BUNDLE_FORMAT_MASK(p0, p1, p2) \
+       ((p0) | ((p1) << 8) | ((p2) << 16))
+
+#define BUNDLE_FORMAT(p0, p1, p2) \
+       { \
+               { \
+                       (tilegx_pipeline)(p0), \
+                       (tilegx_pipeline)(p1), \
+                       (tilegx_pipeline)(p2) \
+               }, \
+               BUNDLE_FORMAT_MASK(1 << (p0), 1 << (p1), (1 << (p2))) \
+       }
+
+#define NO_PIPELINE TILEGX_NUM_PIPELINE_ENCODINGS
+
+#define tilegx_is_x_pipeline(p) ((int)(p) <= (int)TILEGX_PIPELINE_X1)
+
+#define PI(encoding) \
+       push_inst(compiler, encoding)
+
+#define PB3(opcode, dst, srca, srcb) \
+       push_3_buffer(compiler, opcode, dst, srca, srcb, __LINE__)
+
+#define PB2(opcode, dst, src) \
+       push_2_buffer(compiler, opcode, dst, src, __LINE__)
+
+#define JR(reg) \
+       push_jr_buffer(compiler, TILEGX_OPC_JR, reg, __LINE__)
+
+#define ADD(dst, srca, srcb) \
+       push_3_buffer(compiler, TILEGX_OPC_ADD, dst, srca, srcb, __LINE__)
+
+#define SUB(dst, srca, srcb) \
+       push_3_buffer(compiler, TILEGX_OPC_SUB, dst, srca, srcb, __LINE__)
+
+#define NOR(dst, srca, srcb) \
+       push_3_buffer(compiler, TILEGX_OPC_NOR, dst, srca, srcb, __LINE__)
+
+#define OR(dst, srca, srcb) \
+       push_3_buffer(compiler, TILEGX_OPC_OR, dst, srca, srcb, __LINE__)
+
+#define XOR(dst, srca, srcb) \
+       push_3_buffer(compiler, TILEGX_OPC_XOR, dst, srca, srcb, __LINE__)
+
+#define AND(dst, srca, srcb) \
+       push_3_buffer(compiler, TILEGX_OPC_AND, dst, srca, srcb, __LINE__)
+
+#define CLZ(dst, src) \
+       push_2_buffer(compiler, TILEGX_OPC_CLZ, dst, src, __LINE__)
+
+#define SHLI(dst, srca, srcb) \
+       push_3_buffer(compiler, TILEGX_OPC_SHLI, dst, srca, srcb, __LINE__)
+
+#define SHRUI(dst, srca, imm) \
+       push_3_buffer(compiler, TILEGX_OPC_SHRUI, dst, srca, imm, __LINE__)
+
+#define XORI(dst, srca, imm) \
+       push_3_buffer(compiler, TILEGX_OPC_XORI, dst, srca, imm, __LINE__)
+
+#define ORI(dst, srca, imm) \
+       push_3_buffer(compiler, TILEGX_OPC_ORI, dst, srca, imm, __LINE__)
+
+#define CMPLTU(dst, srca, srcb) \
+       push_3_buffer(compiler, TILEGX_OPC_CMPLTU, dst, srca, srcb, __LINE__)
+
+#define CMPLTS(dst, srca, srcb) \
+       push_3_buffer(compiler, TILEGX_OPC_CMPLTS, dst, srca, srcb, __LINE__)
+
+#define CMPLTUI(dst, srca, imm) \
+       push_3_buffer(compiler, TILEGX_OPC_CMPLTUI, dst, srca, imm, __LINE__)
+
+#define CMOVNEZ(dst, srca, srcb) \
+       push_3_buffer(compiler, TILEGX_OPC_CMOVNEZ, dst, srca, srcb, __LINE__)
+
+#define CMOVEQZ(dst, srca, srcb) \
+       push_3_buffer(compiler, TILEGX_OPC_CMOVEQZ, dst, srca, srcb, __LINE__)
+
+#define ADDLI(dst, srca, srcb) \
+       push_3_buffer(compiler, TILEGX_OPC_ADDLI, dst, srca, srcb, __LINE__)
+
+#define SHL16INSLI(dst, srca, srcb) \
+       push_3_buffer(compiler, TILEGX_OPC_SHL16INSLI, dst, srca, srcb, __LINE__)
+
+#define LD_ADD(dst, addr, adjust) \
+       push_3_buffer(compiler, TILEGX_OPC_LD_ADD, dst, addr, adjust, __LINE__)
+
+#define ST_ADD(src, addr, adjust) \
+       push_3_buffer(compiler, TILEGX_OPC_ST_ADD, src, addr, adjust, __LINE__)
+
+#define LD(dst, addr) \
+       push_2_buffer(compiler, TILEGX_OPC_LD, dst, addr, __LINE__)
+
+#define BFEXTU(dst, src, start, end) \
+       push_4_buffer(compiler, TILEGX_OPC_BFEXTU, dst, src, start, end, __LINE__)
+
+#define BFEXTS(dst, src, start, end) \
+       push_4_buffer(compiler, TILEGX_OPC_BFEXTS, dst, src, start, end, __LINE__)
+
+#define ADD_SOLO(dest, srca, srcb) \
+       push_inst(compiler, ADD_X1 | DEST_X1(dest) | SRCA_X1(srca) | SRCB_X1(srcb))
+
+#define ADDI_SOLO(dest, srca, imm) \
+       push_inst(compiler, ADDI_X1 | DEST_X1(dest) | SRCA_X1(srca) | IMM8_X1(imm))
+
+#define ADDLI_SOLO(dest, srca, imm) \
+       push_inst(compiler, ADDLI_X1 | DEST_X1(dest) | SRCA_X1(srca) | IMM16_X1(imm))
+
+#define SHL16INSLI_SOLO(dest, srca, imm) \
+       push_inst(compiler, SHL16INSLI_X1 | DEST_X1(dest) | SRCA_X1(srca) | IMM16_X1(imm))
+
+#define JALR_SOLO(reg) \
+       push_inst(compiler, JALR_X1 | SRCA_X1(reg))
+
+#define JR_SOLO(reg) \
+       push_inst(compiler, JR_X1 | SRCA_X1(reg))
+
+struct Format {
+       /* Mapping of bundle issue slot to assigned pipe. */
+       tilegx_pipeline pipe[TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE];
+
+       /* Mask of pipes used by this bundle. */
+       unsigned int pipe_mask;
+};
+
+const struct Format formats[] =
+{
+       /* In Y format we must always have something in Y2, since it has
+       * no fnop, so this conveys that Y2 must always be used. */
+       BUNDLE_FORMAT(TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y2, NO_PIPELINE),
+       BUNDLE_FORMAT(TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y2, NO_PIPELINE),
+       BUNDLE_FORMAT(TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y0, NO_PIPELINE),
+       BUNDLE_FORMAT(TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y1, NO_PIPELINE),
+
+       /* Y format has three instructions. */
+       BUNDLE_FORMAT(TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y2),
+       BUNDLE_FORMAT(TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y1),
+       BUNDLE_FORMAT(TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y2),
+       BUNDLE_FORMAT(TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y0),
+       BUNDLE_FORMAT(TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y1),
+       BUNDLE_FORMAT(TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y0),
+
+       /* X format has only two instructions. */
+       BUNDLE_FORMAT(TILEGX_PIPELINE_X0, TILEGX_PIPELINE_X1, NO_PIPELINE),
+       BUNDLE_FORMAT(TILEGX_PIPELINE_X1, TILEGX_PIPELINE_X0, NO_PIPELINE)
+};
+
+
+struct jit_instr inst_buf[TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE];
+unsigned long inst_buf_index;
+
+tilegx_pipeline get_any_valid_pipe(const struct tilegx_opcode* opcode)
+{
+       /* FIXME: tile: we could pregenerate this. */
+       int pipe;
+       for (pipe = 0; ((opcode->pipes & (1 << pipe)) == 0 && pipe < TILEGX_NUM_PIPELINE_ENCODINGS); pipe++)
+               ;
+       return (tilegx_pipeline)(pipe);
+}
+
+void insert_nop(tilegx_mnemonic opc, int line)
+{
+       const struct tilegx_opcode* opcode = NULL;
+
+       memmove(&inst_buf[1], &inst_buf[0], inst_buf_index * sizeof inst_buf[0]);
+
+       opcode = &tilegx_opcodes[opc];
+       inst_buf[0].opcode = opcode;
+       inst_buf[0].pipe = get_any_valid_pipe(opcode);
+       inst_buf[0].input_registers = 0;
+       inst_buf[0].output_registers = 0;
+       inst_buf[0].line = line;
+       ++inst_buf_index;
+}
+
+const struct Format* compute_format()
+{
+       unsigned int compatible_pipes = BUNDLE_FORMAT_MASK(
+               inst_buf[0].opcode->pipes,
+               inst_buf[1].opcode->pipes,
+               (inst_buf_index == 3 ? inst_buf[2].opcode->pipes : (1 << NO_PIPELINE)));
+
+       const struct Format* match = NULL;
+       const struct Format *b = NULL;
+       unsigned int i = 0;
+       for (i; i < sizeof formats / sizeof formats[0]; i++) {
+               b = &formats[i];
+               if ((b->pipe_mask & compatible_pipes) == b->pipe_mask) {
+                       match = b;
+                       break;
+               }
+       }
+
+       return match;
+}
+
+sljit_si assign_pipes()
+{
+       unsigned long output_registers = 0;
+       unsigned int i = 0;
+
+       if (inst_buf_index == 1) {
+               tilegx_mnemonic opc = inst_buf[0].opcode->can_bundle
+                                       ? TILEGX_OPC_FNOP : TILEGX_OPC_NOP;
+               insert_nop(opc, __LINE__);
+       }
+
+       const struct Format* match = compute_format();
+
+       if (match == NULL)
+               return -1;
+
+       for (i = 0; i < inst_buf_index; i++) {
+
+               if ((i > 0) && ((inst_buf[i].input_registers & output_registers) != 0))
+                       return -1;
+
+               if ((i > 0) && ((inst_buf[i].output_registers & output_registers) != 0))
+                       return -1;
+
+               /* Don't include Rzero in the match set, to avoid triggering
+                  needlessly on 'prefetch' instrs. */
+
+               output_registers |= inst_buf[i].output_registers & 0xFFFFFFFFFFFFFFL;
+
+               inst_buf[i].pipe = match->pipe[i];
+       }
+
+       /* If only 2 instrs, and in Y-mode, insert a nop. */
+       if (inst_buf_index == 2 && !tilegx_is_x_pipeline(match->pipe[0])) {
+               insert_nop(TILEGX_OPC_FNOP, __LINE__);
+
+               /* Select the yet unassigned pipe. */
+               tilegx_pipeline pipe = (tilegx_pipeline)(((TILEGX_PIPELINE_Y0
+                                       + TILEGX_PIPELINE_Y1 + TILEGX_PIPELINE_Y2)
+                                       - (inst_buf[1].pipe + inst_buf[2].pipe)));
+
+               inst_buf[0].pipe = pipe;
+       }
+
+       return 0;
+}
+
+tilegx_bundle_bits get_bundle_bit(struct jit_instr *inst)
+{
+       int i, val;
+       const struct tilegx_opcode* opcode = inst->opcode;
+       tilegx_bundle_bits bits = opcode->fixed_bit_values[inst->pipe];
+
+       const struct tilegx_operand* operand = NULL;
+       for (i = 0; i < opcode->num_operands; i++) {
+               operand = &tilegx_operands[opcode->operands[inst->pipe][i]];
+               val = inst->operand_value[i];
+
+               bits |= operand->insert(val);
+       }
+
+       return bits;
+}
+
+static sljit_si update_buffer(struct sljit_compiler *compiler)
+{
+       int count;
+       int i;
+       int orig_index = inst_buf_index;
+       struct jit_instr inst0 = inst_buf[0];
+       struct jit_instr inst1 = inst_buf[1];
+       struct jit_instr inst2 = inst_buf[2];
+       tilegx_bundle_bits bits = 0;
+
+       /* If the bundle is valid as is, perform the encoding and return 1. */
+       if (assign_pipes() == 0) {
+               for (i = 0; i < inst_buf_index; i++) {
+                       bits |= get_bundle_bit(inst_buf + i);
+#ifdef TILEGX_JIT_DEBUG
+                       printf("|%04d", inst_buf[i].line);
+#endif
+               }
+#ifdef TILEGX_JIT_DEBUG
+               if (inst_buf_index == 3)
+                       printf("|M0|:\t");
+               else
+                       printf("|M0|:\t\t");
+               print_insn_tilegx(&bits);
+#endif
+
+               inst_buf_index = 0;
+
+#ifdef TILEGX_JIT_DEBUG
+               return push_inst_nodebug(compiler, bits);
+#else
+               return push_inst(compiler, bits);
+#endif
+       }
+
+       /* If the bundle is invalid, split it in two. First encode the first two
+          (or possibly 1) instructions, and then the last, separately. Note that
+          assign_pipes may have re-ordered the instrs (by inserting no-ops in
+          lower slots) so we need to reset them. */
+
+       inst_buf_index = orig_index - 1;
+       inst_buf[0] = inst0;
+       inst_buf[1] = inst1;
+       inst_buf[2] = inst2;
+       if (assign_pipes() == 0) {
+               for (i = 0; i < inst_buf_index; i++) {
+                       bits |= get_bundle_bit(inst_buf + i);
+#ifdef TILEGX_JIT_DEBUG
+                       printf("|%04d", inst_buf[i].line);
+#endif
+               }
+
+#ifdef TILEGX_JIT_DEBUG
+               if (inst_buf_index == 3)
+                       printf("|M1|:\t");
+               else
+                       printf("|M1|:\t\t");
+               print_insn_tilegx(&bits);
+#endif
+
+               if ((orig_index - 1) == 2) {
+                       inst_buf[0] = inst2;
+                       inst_buf_index = 1;
+               } else if ((orig_index - 1) == 1) {
+                       inst_buf[0] = inst1;
+                       inst_buf_index = 1;
+               } else
+                       SLJIT_ASSERT_STOP();
+
+#ifdef TILEGX_JIT_DEBUG
+               return push_inst_nodebug(compiler, bits);
+#else
+               return push_inst(compiler, bits);
+#endif
+       } else {
+               /* We had 3 instrs of which the first 2 can't live in the same bundle.
+                  Split those two. Note that we don't try to then combine the second
+                  and third instr into a single bundle.  First instruction: */
+               inst_buf_index = 1;
+               inst_buf[0] = inst0;
+               inst_buf[1] = inst1;
+               inst_buf[2] = inst2;
+               if (assign_pipes() == 0) {
+                       for (i = 0; i < inst_buf_index; i++) {
+                               bits |= get_bundle_bit(inst_buf + i);
+#ifdef TILEGX_JIT_DEBUG
+                               printf("|%04d", inst_buf[i].line);
+#endif
+                       }
+
+#ifdef TILEGX_JIT_DEBUG
+                       if (inst_buf_index == 3)
+                               printf("|M2|:\t");
+                       else
+                               printf("|M2|:\t\t");
+                       print_insn_tilegx(&bits);
+#endif
+
+                       inst_buf[0] = inst1;
+                       inst_buf[1] = inst2;
+                       inst_buf_index = orig_index - 1;
+#ifdef TILEGX_JIT_DEBUG
+                       return push_inst_nodebug(compiler, bits);
+#else
+                       return push_inst(compiler, bits);
+#endif
+               } else
+                       SLJIT_ASSERT_STOP();
+       }
+
+       SLJIT_ASSERT_STOP();
+}
+
+static sljit_si flush_buffer(struct sljit_compiler *compiler)
+{
+       while (inst_buf_index != 0)
+               update_buffer(compiler);
+}
+
+static sljit_si push_4_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int op1, int op2, int op3, int line)
+{
+       if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE)
+               FAIL_IF(update_buffer(compiler));
+
+       const struct tilegx_opcode* opcode = &tilegx_opcodes[opc];
+       inst_buf[inst_buf_index].opcode = opcode;
+       inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode);
+       inst_buf[inst_buf_index].operand_value[0] = op0;
+       inst_buf[inst_buf_index].operand_value[1] = op1;
+       inst_buf[inst_buf_index].operand_value[2] = op2;
+       inst_buf[inst_buf_index].operand_value[3] = op3;
+       inst_buf[inst_buf_index].input_registers = 1L << op1;
+       inst_buf[inst_buf_index].output_registers = 1L << op0;
+       inst_buf[inst_buf_index].line = line;
+       inst_buf_index++;
+
+       return SLJIT_SUCCESS;
+}
+
+static sljit_si push_3_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int op1, int op2, int line)
+{
+       if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE)
+               FAIL_IF(update_buffer(compiler));
+
+       const struct tilegx_opcode* opcode = &tilegx_opcodes[opc];
+       inst_buf[inst_buf_index].opcode = opcode;
+       inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode);
+       inst_buf[inst_buf_index].operand_value[0] = op0;
+       inst_buf[inst_buf_index].operand_value[1] = op1;
+       inst_buf[inst_buf_index].operand_value[2] = op2;
+       inst_buf[inst_buf_index].line = line;
+
+       switch (opc) {
+       case TILEGX_OPC_ST_ADD:
+               inst_buf[inst_buf_index].input_registers = (1L << op0) | (1L << op1);
+               inst_buf[inst_buf_index].output_registers = 1L << op0;
+               break;
+       case TILEGX_OPC_LD_ADD:
+               inst_buf[inst_buf_index].input_registers = 1L << op1;
+               inst_buf[inst_buf_index].output_registers = (1L << op0) | (1L << op1);
+               break;
+       case TILEGX_OPC_ADD:
+       case TILEGX_OPC_AND:
+       case TILEGX_OPC_SUB:
+       case TILEGX_OPC_OR:
+       case TILEGX_OPC_XOR:
+       case TILEGX_OPC_NOR:
+       case TILEGX_OPC_SHL:
+       case TILEGX_OPC_SHRU:
+       case TILEGX_OPC_SHRS:
+       case TILEGX_OPC_CMPLTU:
+       case TILEGX_OPC_CMPLTS:
+       case TILEGX_OPC_CMOVEQZ:
+       case TILEGX_OPC_CMOVNEZ:
+               inst_buf[inst_buf_index].input_registers = (1L << op1) | (1L << op2);
+               inst_buf[inst_buf_index].output_registers = 1L << op0;
+               break;
+       case TILEGX_OPC_ADDLI:
+       case TILEGX_OPC_XORI:
+       case TILEGX_OPC_ORI:
+       case TILEGX_OPC_SHLI:
+       case TILEGX_OPC_SHRUI:
+       case TILEGX_OPC_SHRSI:
+       case TILEGX_OPC_SHL16INSLI:
+       case TILEGX_OPC_CMPLTUI:
+       case TILEGX_OPC_CMPLTSI:
+               inst_buf[inst_buf_index].input_registers = 1L << op1;
+               inst_buf[inst_buf_index].output_registers = 1L << op0;
+               break;
+       default:
+               printf("unrecoginzed opc: %s\n", opcode->name);
+               SLJIT_ASSERT_STOP();
+       }
+
+       inst_buf_index++;
+
+       return SLJIT_SUCCESS;
+}
+
+static sljit_si push_2_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int op1, int line)
+{
+       if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE)
+               FAIL_IF(update_buffer(compiler));
+
+       const struct tilegx_opcode* opcode = &tilegx_opcodes[opc];
+       inst_buf[inst_buf_index].opcode = opcode;
+       inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode);
+       inst_buf[inst_buf_index].operand_value[0] = op0;
+       inst_buf[inst_buf_index].operand_value[1] = op1;
+       inst_buf[inst_buf_index].line = line;
+
+       switch (opc) {
+       case TILEGX_OPC_BEQZ:
+       case TILEGX_OPC_BNEZ:
+               inst_buf[inst_buf_index].input_registers = 1L << op0;
+               break;
+       case TILEGX_OPC_ST:
+       case TILEGX_OPC_ST1:
+       case TILEGX_OPC_ST2:
+       case TILEGX_OPC_ST4:
+               inst_buf[inst_buf_index].input_registers = (1L << op0) | (1L << op1);
+               inst_buf[inst_buf_index].output_registers = 0;
+               break;
+       case TILEGX_OPC_CLZ:
+       case TILEGX_OPC_LD:
+       case TILEGX_OPC_LD1U:
+       case TILEGX_OPC_LD1S:
+       case TILEGX_OPC_LD2U:
+       case TILEGX_OPC_LD2S:
+       case TILEGX_OPC_LD4U:
+       case TILEGX_OPC_LD4S:
+               inst_buf[inst_buf_index].input_registers = 1L << op1;
+               inst_buf[inst_buf_index].output_registers = 1L << op0;
+               break;
+       default:
+               printf("unrecoginzed opc: %s\n", opcode->name);
+               SLJIT_ASSERT_STOP();
+       }
+
+       inst_buf_index++;
+
+       return SLJIT_SUCCESS;
+}
+
+static sljit_si push_0_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int line)
+{
+       if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE)
+               FAIL_IF(update_buffer(compiler));
+
+       const struct tilegx_opcode* opcode = &tilegx_opcodes[opc];
+       inst_buf[inst_buf_index].opcode = opcode;
+       inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode);
+       inst_buf[inst_buf_index].input_registers = 0;
+       inst_buf[inst_buf_index].output_registers = 0;
+       inst_buf[inst_buf_index].line = line;
+       inst_buf_index++;
+
+       return SLJIT_SUCCESS;
+}
+
+static sljit_si push_jr_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int line)
+{
+       if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE)
+               FAIL_IF(update_buffer(compiler));
+
+       const struct tilegx_opcode* opcode = &tilegx_opcodes[opc];
+       inst_buf[inst_buf_index].opcode = opcode;
+       inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode);
+       inst_buf[inst_buf_index].operand_value[0] = op0;
+       inst_buf[inst_buf_index].input_registers = 1L << op0;
+       inst_buf[inst_buf_index].output_registers = 0;
+       inst_buf[inst_buf_index].line = line;
+       inst_buf_index++;
+       return flush_buffer(compiler);
+}
+
+static SLJIT_INLINE sljit_ins * detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code)
+{
+       sljit_sw diff;
+       sljit_uw target_addr;
+       sljit_ins *inst;
+       sljit_ins saved_inst;
+
+       if (jump->flags & SLJIT_REWRITABLE_JUMP)
+               return code_ptr;
+
+       if (jump->flags & JUMP_ADDR)
+               target_addr = jump->u.target;
+       else {
+               SLJIT_ASSERT(jump->flags & JUMP_LABEL);
+               target_addr = (sljit_uw)(code + jump->u.label->size);
+       }
+
+       inst = (sljit_ins *)jump->addr;
+       if (jump->flags & IS_COND)
+               inst--;
+
+       diff = ((sljit_sw) target_addr - (sljit_sw) inst) >> 3;
+       if (diff <= SIMM_17BIT_MAX && diff >= SIMM_17BIT_MIN) {
+               jump->flags |= PATCH_B;
+
+               if (!(jump->flags & IS_COND)) {
+                       if (jump->flags & IS_JAL) {
+                               jump->flags &= ~(PATCH_B);
+                               jump->flags |= PATCH_J;
+                               inst[0] = JAL_X1;
+
+#ifdef TILEGX_JIT_DEBUG
+                               printf("[runtime relocate]%04d:\t", __LINE__);
+                               print_insn_tilegx(inst);
+#endif
+                       } else {
+                               inst[0] = BEQZ_X1 | SRCA_X1(ZERO);
+
+#ifdef TILEGX_JIT_DEBUG
+                               printf("[runtime relocate]%04d:\t", __LINE__);
+                               print_insn_tilegx(inst);
+#endif
+                       }
+
+                       return inst;
+               }
+
+               inst[0] = inst[0] ^ (0x7L << 55);
+
+#ifdef TILEGX_JIT_DEBUG
+               printf("[runtime relocate]%04d:\t", __LINE__);
+               print_insn_tilegx(inst);
+#endif
+               jump->addr -= sizeof(sljit_ins);
+               return inst;
+       }
+
+       if (jump->flags & IS_COND) {
+               if ((target_addr & ~0x3FFFFFFFL) == ((jump->addr + sizeof(sljit_ins)) & ~0x3FFFFFFFL)) {
+                       jump->flags |= PATCH_J;
+                       inst[0] = (inst[0] & ~(BOFF_X1(-1))) | BOFF_X1(2);
+                       inst[1] = J_X1;
+                       return inst + 1;
+               }
+
+               return code_ptr;
+       }
+
+       if ((target_addr & ~0x3FFFFFFFL) == ((jump->addr + sizeof(sljit_ins)) & ~0x3FFFFFFFL)) {
+               jump->flags |= PATCH_J;
+
+               if (jump->flags & IS_JAL) {
+                       inst[0] = JAL_X1;
+
+#ifdef TILEGX_JIT_DEBUG
+                       printf("[runtime relocate]%04d:\t", __LINE__);
+                       print_insn_tilegx(inst);
+#endif
+
+               } else {
+                       inst[0] = J_X1;
+
+#ifdef TILEGX_JIT_DEBUG
+                       printf("[runtime relocate]%04d:\t", __LINE__);
+                       print_insn_tilegx(inst);
+#endif
+               }
+
+               return inst;
+       }
+
+       return code_ptr;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void * sljit_generate_code(struct sljit_compiler *compiler)
+{
+       struct sljit_memory_fragment *buf;
+       sljit_ins *code;
+       sljit_ins *code_ptr;
+       sljit_ins *buf_ptr;
+       sljit_ins *buf_end;
+       sljit_uw word_count;
+       sljit_uw addr;
+
+       struct sljit_label *label;
+       struct sljit_jump *jump;
+       struct sljit_const *const_;
+
+       CHECK_ERROR_PTR();
+       check_sljit_generate_code(compiler);
+       reverse_buf(compiler);
+
+       code = (sljit_ins *)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins));
+       PTR_FAIL_WITH_EXEC_IF(code);
+       buf = compiler->buf;
+
+       code_ptr = code;
+       word_count = 0;
+       label = compiler->labels;
+       jump = compiler->jumps;
+       const_ = compiler->consts;
+       do {
+               buf_ptr = (sljit_ins *)buf->memory;
+               buf_end = buf_ptr + (buf->used_size >> 3);
+               do {
+                       *code_ptr = *buf_ptr++;
+                       SLJIT_ASSERT(!label || label->size >= word_count);
+                       SLJIT_ASSERT(!jump || jump->addr >= word_count);
+                       SLJIT_ASSERT(!const_ || const_->addr >= word_count);
+                       /* These structures are ordered by their address. */
+                       if (label && label->size == word_count) {
+                               /* Just recording the address. */
+                               label->addr = (sljit_uw) code_ptr;
+                               label->size = code_ptr - code;
+                               label = label->next;
+                       }
+
+                       if (jump && jump->addr == word_count) {
+                               if (jump->flags & IS_JAL)
+                                       jump->addr = (sljit_uw)(code_ptr - 4);
+                               else
+                                       jump->addr = (sljit_uw)(code_ptr - 3);
+
+                               code_ptr = detect_jump_type(jump, code_ptr, code);
+                               jump = jump->next;
+                       }
+
+                       if (const_ && const_->addr == word_count) {
+                               /* Just recording the address. */
+                               const_->addr = (sljit_uw) code_ptr;
+                               const_ = const_->next;
+                       }
+
+                       code_ptr++;
+                       word_count++;
+               } while (buf_ptr < buf_end);
+
+               buf = buf->next;
+       } while (buf);
+
+       if (label && label->size == word_count) {
+               label->addr = (sljit_uw) code_ptr;
+               label->size = code_ptr - code;
+               label = label->next;
+       }
+
+       SLJIT_ASSERT(!label);
+       SLJIT_ASSERT(!jump);
+       SLJIT_ASSERT(!const_);
+       SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
+
+       jump = compiler->jumps;
+       while (jump) {
+               do {
+                       addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
+                       buf_ptr = (sljit_ins *)jump->addr;
+
+                       if (jump->flags & PATCH_B) {
+                               addr = (sljit_sw)(addr - (jump->addr)) >> 3;
+                               SLJIT_ASSERT((sljit_sw) addr <= SIMM_17BIT_MAX && (sljit_sw) addr >= SIMM_17BIT_MIN);
+                               buf_ptr[0] = (buf_ptr[0] & ~(BOFF_X1(-1))) | BOFF_X1(addr);
+
+#ifdef TILEGX_JIT_DEBUG
+                               printf("[runtime relocate]%04d:\t", __LINE__);
+                               print_insn_tilegx(buf_ptr);
+#endif
+                               break;
+                       }
+
+                       if (jump->flags & PATCH_J) {
+                               SLJIT_ASSERT((addr & ~0x3FFFFFFFL) == ((jump->addr + sizeof(sljit_ins)) & ~0x3FFFFFFFL));
+                               addr = (sljit_sw)(addr - (jump->addr)) >> 3;
+                               buf_ptr[0] = (buf_ptr[0] & ~(JOFF_X1(-1))) | JOFF_X1(addr);
+
+#ifdef TILEGX_JIT_DEBUG
+                               printf("[runtime relocate]%04d:\t", __LINE__);
+                               print_insn_tilegx(buf_ptr);
+#endif
+                               break;
+                       }
+
+                       SLJIT_ASSERT(!(jump->flags & IS_JAL));
+
+                       /* Set the fields of immediate loads. */
+                       buf_ptr[0] = (buf_ptr[0] & ~(0xFFFFL << 43)) | (((addr >> 32) & 0xFFFFL) << 43);
+                       buf_ptr[1] = (buf_ptr[1] & ~(0xFFFFL << 43)) | (((addr >> 16) & 0xFFFFL) << 43);
+                       buf_ptr[2] = (buf_ptr[2] & ~(0xFFFFL << 43)) | ((addr & 0xFFFFL) << 43);
+               } while (0);
+
+               jump = jump->next;
+       }
+
+       compiler->error = SLJIT_ERR_COMPILED;
+       compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
+       SLJIT_CACHE_FLUSH(code, code_ptr);
+       return code;
+}
+
+static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si dst_ar, sljit_sw imm)
+{
+
+       if (imm <= SIMM_16BIT_MAX && imm >= SIMM_16BIT_MIN)
+               return ADDLI(dst_ar, ZERO, imm);
+
+       if (imm <= SIMM_32BIT_MAX && imm >= SIMM_32BIT_MIN) {
+               FAIL_IF(ADDLI(dst_ar, ZERO, imm >> 16));
+               return SHL16INSLI(dst_ar, dst_ar, imm);
+       }
+
+       if (imm <= SIMM_48BIT_MAX && imm >= SIMM_48BIT_MIN) {
+               FAIL_IF(ADDLI(dst_ar, ZERO, imm >> 32));
+               FAIL_IF(SHL16INSLI(dst_ar, dst_ar, imm >> 16));
+               return SHL16INSLI(dst_ar, dst_ar, imm);
+       }
+
+       FAIL_IF(ADDLI(dst_ar, ZERO, imm >> 48));
+       FAIL_IF(SHL16INSLI(dst_ar, dst_ar, imm >> 32));
+       FAIL_IF(SHL16INSLI(dst_ar, dst_ar, imm >> 16));
+       return SHL16INSLI(dst_ar, dst_ar, imm);
+}
+
+static sljit_si emit_const(struct sljit_compiler *compiler, sljit_si dst_ar, sljit_sw imm, int flush)
+{
+       /* Should *not* be optimized as load_immediate, as pcre relocation
+          mechanism will match this fixed 4-instruction pattern. */
+       if (flush) {
+               FAIL_IF(ADDLI_SOLO(dst_ar, ZERO, imm >> 32));
+               FAIL_IF(SHL16INSLI_SOLO(dst_ar, dst_ar, imm >> 16));
+               return SHL16INSLI_SOLO(dst_ar, dst_ar, imm);
+       }
+
+       FAIL_IF(ADDLI(dst_ar, ZERO, imm >> 32));
+       FAIL_IF(SHL16INSLI(dst_ar, dst_ar, imm >> 16));
+       return SHL16INSLI(dst_ar, dst_ar, imm);
+}
+
+static sljit_si emit_const_64(struct sljit_compiler *compiler, sljit_si dst_ar, sljit_sw imm, int flush)
+{
+       /* Should *not* be optimized as load_immediate, as pcre relocation
+          mechanism will match this fixed 4-instruction pattern. */
+       if (flush) {
+               FAIL_IF(ADDLI_SOLO(reg_map[dst_ar], ZERO, imm >> 48));
+               FAIL_IF(SHL16INSLI_SOLO(reg_map[dst_ar], reg_map[dst_ar], imm >> 32));
+               FAIL_IF(SHL16INSLI_SOLO(reg_map[dst_ar], reg_map[dst_ar], imm >> 16));
+               return SHL16INSLI_SOLO(reg_map[dst_ar], reg_map[dst_ar], imm);
+       }
+
+       FAIL_IF(ADDLI(reg_map[dst_ar], ZERO, imm >> 48));
+       FAIL_IF(SHL16INSLI(reg_map[dst_ar], reg_map[dst_ar], imm >> 32));
+       FAIL_IF(SHL16INSLI(reg_map[dst_ar], reg_map[dst_ar], imm >> 16));
+       return SHL16INSLI(reg_map[dst_ar], reg_map[dst_ar], imm);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler,
+       sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds,
+       sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
+{
+       sljit_ins base;
+       sljit_ins bundle = 0;
+
+       CHECK_ERROR();
+       check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
+       set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
+
+       local_size += (saveds + 1) * sizeof(sljit_sw);
+       local_size = (local_size + 7) & ~7;
+       compiler->local_size = local_size;
+
+       if (local_size <= SIMM_16BIT_MAX) {
+               /* Frequent case. */
+               FAIL_IF(ADDLI(SLJIT_LOCALS_REG_mapped, SLJIT_LOCALS_REG_mapped, -local_size));
+               base = SLJIT_LOCALS_REG_mapped;
+       } else {
+               FAIL_IF(load_immediate(compiler, TMP_REG1_mapped, local_size));
+               FAIL_IF(ADD(TMP_REG2_mapped, SLJIT_LOCALS_REG_mapped, ZERO));
+               FAIL_IF(SUB(SLJIT_LOCALS_REG_mapped, SLJIT_LOCALS_REG_mapped, TMP_REG1_mapped));
+               base = TMP_REG2_mapped;
+               local_size = 0;
+       }
+
+       FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 8));
+       FAIL_IF(ST_ADD(ADDR_TMP_mapped, RA, -8));
+
+       if (saveds >= 1)
+               FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_REG1_mapped, -8));
+
+       if (saveds >= 2)
+               FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_REG2_mapped, -8));
+
+       if (saveds >= 3)
+               FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_REG3_mapped, -8));
+
+       if (saveds >= 4)
+               FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_EREG1_mapped, -8));
+
+       if (saveds >= 5)
+               FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_EREG2_mapped, -8));
+
+       if (args >= 1)
+               FAIL_IF(ADD(SLJIT_SAVED_REG1_mapped, 0, ZERO));
+
+       if (args >= 2)
+               FAIL_IF(ADD(SLJIT_SAVED_REG2_mapped, 1, ZERO));
+
+       if (args >= 3)
+               FAIL_IF(ADD(SLJIT_SAVED_REG3_mapped, 2, ZERO));
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler,
+       sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds,
+       sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
+{
+       CHECK_ERROR_VOID();
+       check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
+       set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
+
+       local_size += (saveds + 1) * sizeof(sljit_sw);
+       compiler->local_size = (local_size + 7) & ~7;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
+{
+       sljit_si local_size;
+       sljit_ins base;
+       int addr_initialized = 0;
+
+       CHECK_ERROR();
+       check_sljit_emit_return(compiler, op, src, srcw);
+
+       FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
+
+       local_size = compiler->local_size;
+       if (local_size <= SIMM_16BIT_MAX)
+               base = SLJIT_LOCALS_REG_mapped;
+       else {
+               FAIL_IF(load_immediate(compiler, TMP_REG1_mapped, local_size));
+               FAIL_IF(ADD(TMP_REG1_mapped, SLJIT_LOCALS_REG_mapped, TMP_REG1_mapped));
+               base = TMP_REG1_mapped;
+               local_size = 0;
+       }
+
+       FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 8));
+       FAIL_IF(LD(RA, ADDR_TMP_mapped));
+
+       if (compiler->saveds >= 5) {
+               FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 48));
+               addr_initialized = 1;
+
+               FAIL_IF(LD_ADD(SLJIT_SAVED_EREG2_mapped, ADDR_TMP_mapped, 8));
+       }
+
+       if (compiler->saveds >= 4) {
+               if (addr_initialized == 0) {
+                       FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 40));
+                       addr_initialized = 1;
+               }
+
+               FAIL_IF(LD_ADD(SLJIT_SAVED_EREG1_mapped, ADDR_TMP_mapped, 8));
+       }
+
+       if (compiler->saveds >= 3) {
+               if (addr_initialized == 0) {
+                       FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 32));
+                       addr_initialized = 1;
+               }
+
+               FAIL_IF(LD_ADD(SLJIT_SAVED_REG3_mapped, ADDR_TMP_mapped, 8));
+       }
+
+       if (compiler->saveds >= 2) {
+               if (addr_initialized == 0) {
+                       FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 24));
+                       addr_initialized = 1;
+               }
+
+               FAIL_IF(LD_ADD(SLJIT_SAVED_REG2_mapped, ADDR_TMP_mapped, 8));
+       }
+
+       if (compiler->saveds >= 1) {
+               if (addr_initialized == 0) {
+                       FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 16));
+                       /* addr_initialized = 1; no need to initialize as it's the last one. */
+               }
+
+               FAIL_IF(LD_ADD(SLJIT_SAVED_REG1_mapped, ADDR_TMP_mapped, 8));
+       }
+
+       if (compiler->local_size <= SIMM_16BIT_MAX)
+               FAIL_IF(ADDLI(SLJIT_LOCALS_REG_mapped, SLJIT_LOCALS_REG_mapped, compiler->local_size));
+       else
+               FAIL_IF(ADD(SLJIT_LOCALS_REG_mapped, TMP_REG1_mapped, ZERO));
+
+       return JR(RA);
+}
+
+/* reg_ar is an absoulute register! */
+
+/* Can perform an operation using at most 1 instruction. */
+static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg_ar, sljit_si arg, sljit_sw argw)
+{
+       SLJIT_ASSERT(arg & SLJIT_MEM);
+
+       if ((!(flags & WRITE_BACK) || !(arg & REG_MASK))
+                       && !(arg & OFFS_REG_MASK) && argw <= SIMM_16BIT_MAX && argw >= SIMM_16BIT_MIN) {
+               /* Works for both absoulte and relative addresses. */
+               if (SLJIT_UNLIKELY(flags & ARG_TEST))
+                       return 1;
+
+               FAIL_IF(ADDLI(ADDR_TMP_mapped, reg_map[arg & REG_MASK], argw));
+
+               if (flags & LOAD_DATA)
+                       FAIL_IF(PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, ADDR_TMP_mapped));
+               else
+                       FAIL_IF(PB2(data_transfer_insts[flags & MEM_MASK], ADDR_TMP_mapped, reg_ar));
+
+               return -1;
+       }
+
+       return 0;
+}
+
+/* See getput_arg below.
+   Note: can_cache is called only for binary operators. Those
+   operators always uses word arguments without write back. */
+static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw)
+{
+       SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM));
+
+       /* Simple operation except for updates. */
+       if (arg & OFFS_REG_MASK) {
+               argw &= 0x3;
+               next_argw &= 0x3;
+               if (argw && argw == next_argw
+                               && (arg == next_arg || (arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK)))
+                       return 1;
+               return 0;
+       }
+
+       if (arg == next_arg) {
+               if (((next_argw - argw) <= SIMM_16BIT_MAX
+                               && (next_argw - argw) >= SIMM_16BIT_MIN))
+                       return 1;
+
+               return 0;
+       }
+
+       return 0;
+}
+
+/* Emit the necessary instructions. See can_cache above. */
+static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg_ar, sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw)
+{
+       sljit_si tmp_ar, base;
+
+       SLJIT_ASSERT(arg & SLJIT_MEM);
+       if (!(next_arg & SLJIT_MEM)) {
+               next_arg = 0;
+               next_argw = 0;
+       }
+
+       if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA))
+               tmp_ar = reg_ar;
+       else
+               tmp_ar = TMP_REG1_mapped;
+
+       base = arg & REG_MASK;
+
+       if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
+               argw &= 0x3;
+
+               if ((flags & WRITE_BACK) && reg_ar == reg_map[base]) {
+                       SLJIT_ASSERT(!(flags & LOAD_DATA) && reg_map[TMP_REG1] != reg_ar);
+                       FAIL_IF(ADD(TMP_REG1_mapped, reg_ar, ZERO));
+                       reg_ar = TMP_REG1_mapped;
+               }
+
+               /* Using the cache. */
+               if (argw == compiler->cache_argw) {
+                       if (!(flags & WRITE_BACK)) {
+                               if (arg == compiler->cache_arg) {
+                                       if (flags & LOAD_DATA)
+                                               return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped);
+                                       else
+                                               return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar);
+                               }
+
+                               if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) {
+                                       if (arg == next_arg && argw == (next_argw & 0x3)) {
+                                               compiler->cache_arg = arg;
+                                               compiler->cache_argw = argw;
+                                               FAIL_IF(ADD(TMP_REG3_mapped, reg_map[base], TMP_REG3_mapped));
+                                               if (flags & LOAD_DATA)
+                                                       return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped);
+                                               else
+                                                       return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar);
+                                       }
+
+                                       FAIL_IF(ADD(tmp_ar, reg_map[base], TMP_REG3_mapped));
+                                       if (flags & LOAD_DATA)
+                                               return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, tmp_ar);
+                                       else
+                                               return PB2(data_transfer_insts[flags & MEM_MASK], tmp_ar, reg_ar);
+                               }
+                       } else {
+                               if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) {
+                                       FAIL_IF(ADD(reg_map[base], reg_map[base], TMP_REG3_mapped));
+                                       if (flags & LOAD_DATA)
+                                               return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, reg_map[base]);
+                                       else
+                                               return PB2(data_transfer_insts[flags & MEM_MASK], reg_map[base], reg_ar);
+                               }
+                       }
+               }
+
+               if (SLJIT_UNLIKELY(argw)) {
+                       compiler->cache_arg = SLJIT_MEM | (arg & OFFS_REG_MASK);
+                       compiler->cache_argw = argw;
+                       FAIL_IF(SHLI(TMP_REG3_mapped, reg_map[OFFS_REG(arg)], argw));
+               }
+
+               if (!(flags & WRITE_BACK)) {
+                       if (arg == next_arg && argw == (next_argw & 0x3)) {
+                               compiler->cache_arg = arg;
+                               compiler->cache_argw = argw;
+                               FAIL_IF(ADD(TMP_REG3_mapped, reg_map[base], reg_map[!argw ? OFFS_REG(arg) : TMP_REG3]));
+                               tmp_ar = TMP_REG3_mapped;
+                       } else
+                               FAIL_IF(ADD(tmp_ar, reg_map[base], reg_map[!argw ? OFFS_REG(arg) : TMP_REG3]));
+
+                       if (flags & LOAD_DATA)
+                               return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, tmp_ar);
+                       else
+                               return PB2(data_transfer_insts[flags & MEM_MASK], tmp_ar, reg_ar);
+               }
+
+               FAIL_IF(ADD(reg_map[base], reg_map[base], reg_map[!argw ? OFFS_REG(arg) : TMP_REG3]));
+
+               if (flags & LOAD_DATA)
+                       return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, reg_map[base]);
+               else
+                       return PB2(data_transfer_insts[flags & MEM_MASK], reg_map[base], reg_ar);
+       }
+
+       if (SLJIT_UNLIKELY(flags & WRITE_BACK) && base) {
+               /* Update only applies if a base register exists. */
+               if (reg_ar == reg_map[base]) {
+                       SLJIT_ASSERT(!(flags & LOAD_DATA) && TMP_REG1_mapped != reg_ar);
+                       if (argw <= SIMM_16BIT_MAX && argw >= SIMM_16BIT_MIN) {
+                               FAIL_IF(ADDLI(ADDR_TMP_mapped, reg_map[base], argw));
+                               if (flags & LOAD_DATA)
+                                       FAIL_IF(PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, ADDR_TMP_mapped));
+                               else
+                                       FAIL_IF(PB2(data_transfer_insts[flags & MEM_MASK], ADDR_TMP_mapped, reg_ar));
+
+                               if (argw)
+                                       return ADDLI(reg_map[base], reg_map[base], argw);
+
+                               return SLJIT_SUCCESS;
+                       }
+
+                       FAIL_IF(ADD(TMP_REG1_mapped, reg_ar, ZERO));
+                       reg_ar = TMP_REG1_mapped;
+               }
+
+               if (argw <= SIMM_16BIT_MAX && argw >= SIMM_16BIT_MIN) {
+                       if (argw)
+                               FAIL_IF(ADDLI(reg_map[base], reg_map[base], argw));
+               } else {
+                       if (compiler->cache_arg == SLJIT_MEM
+                                       && argw - compiler->cache_argw <= SIMM_16BIT_MAX
+                                       && argw - compiler->cache_argw >= SIMM_16BIT_MIN) {
+                               if (argw != compiler->cache_argw) {
+                                       FAIL_IF(ADD(TMP_REG3_mapped, TMP_REG3_mapped, argw - compiler->cache_argw));
+                                       compiler->cache_argw = argw;
+                               }
+
+                               FAIL_IF(ADD(reg_map[base], reg_map[base], TMP_REG3_mapped));
+                       } else {
+                               compiler->cache_arg = SLJIT_MEM;
+                               compiler->cache_argw = argw;
+                               FAIL_IF(load_immediate(compiler, TMP_REG3_mapped, argw));
+                               FAIL_IF(ADD(reg_map[base], reg_map[base], TMP_REG3_mapped));
+                       }
+               }
+
+               if (flags & LOAD_DATA)
+                       return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, reg_map[base]);
+               else
+                       return PB2(data_transfer_insts[flags & MEM_MASK], reg_map[base], reg_ar);
+       }
+
+       if (compiler->cache_arg == arg
+                       && argw - compiler->cache_argw <= SIMM_16BIT_MAX
+                       && argw - compiler->cache_argw >= SIMM_16BIT_MIN) {
+               if (argw != compiler->cache_argw) {
+                       FAIL_IF(ADDLI(TMP_REG3_mapped, TMP_REG3_mapped, argw - compiler->cache_argw));
+                       compiler->cache_argw = argw;
+               }
+
+               if (flags & LOAD_DATA)
+                       return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped);
+               else
+                       return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar);
+       }
+
+       if (compiler->cache_arg == SLJIT_MEM
+                       && argw - compiler->cache_argw <= SIMM_16BIT_MAX
+                       && argw - compiler->cache_argw >= SIMM_16BIT_MIN) {
+               if (argw != compiler->cache_argw)
+                       FAIL_IF(ADDLI(TMP_REG3_mapped, TMP_REG3_mapped, argw - compiler->cache_argw));
+       } else {
+               compiler->cache_arg = SLJIT_MEM;
+               FAIL_IF(load_immediate(compiler, TMP_REG3_mapped, argw));
+       }
+
+       compiler->cache_argw = argw;
+
+       if (!base) {
+               if (flags & LOAD_DATA)
+                       return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped);
+               else
+                       return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar);
+       }
+
+       if (arg == next_arg
+                       && next_argw - argw <= SIMM_16BIT_MAX
+                       && next_argw - argw >= SIMM_16BIT_MIN) {
+               compiler->cache_arg = arg;
+               FAIL_IF(ADD(TMP_REG3_mapped, TMP_REG3_mapped, reg_map[base]));
+               if (flags & LOAD_DATA)
+                       return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped);
+               else
+                       return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar);
+       }
+
+       FAIL_IF(ADD(tmp_ar, TMP_REG3_mapped, reg_map[base]));
+
+       if (flags & LOAD_DATA)
+               return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, tmp_ar);
+       else
+               return PB2(data_transfer_insts[flags & MEM_MASK], tmp_ar, reg_ar);
+}
+
+static SLJIT_INLINE sljit_si emit_op_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg_ar, sljit_si arg, sljit_sw argw)
+{
+       if (getput_arg_fast(compiler, flags, reg_ar, arg, argw))
+               return compiler->error;
+
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+       return getput_arg(compiler, flags, reg_ar, arg, argw, 0, 0);
+}
+
+static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg1, sljit_sw arg1w, sljit_si arg2, sljit_sw arg2w)
+{
+       if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
+               return compiler->error;
+       return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw)
+{
+       CHECK_ERROR();
+       check_sljit_emit_fast_enter(compiler, dst, dstw);
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+
+       /* For UNUSED dst. Uncommon, but possible. */
+       if (dst == SLJIT_UNUSED)
+               return SLJIT_SUCCESS;
+
+       if (FAST_IS_REG(dst))
+               return ADD(reg_map[dst], RA, ZERO);
+
+       /* Memory. */
+       return emit_op_mem(compiler, WORD_DATA, RA, dst, dstw);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw)
+{
+       CHECK_ERROR();
+       check_sljit_emit_fast_return(compiler, src, srcw);
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       if (FAST_IS_REG(src))
+               FAIL_IF(ADD(RA, reg_map[src], ZERO));
+
+       else if (src & SLJIT_MEM)
+               FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RA, src, srcw));
+
+       else if (src & SLJIT_IMM)
+               FAIL_IF(load_immediate(compiler, RA, srcw));
+
+       return JR(RA);
+}
+
+static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags, sljit_si dst, sljit_si src1, sljit_sw src2)
+{
+       sljit_si overflow_ra = 0;
+
+       switch (GET_OPCODE(op)) {
+       case SLJIT_MOV:
+       case SLJIT_MOV_P:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+               if (dst != src2)
+                       return ADD(reg_map[dst], reg_map[src2], ZERO);
+               return SLJIT_SUCCESS;
+
+       case SLJIT_MOV_UI:
+       case SLJIT_MOV_SI:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+               if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
+                       if (op == SLJIT_MOV_SI)
+                               return BFEXTS(reg_map[dst], reg_map[src2], 0, 31);
+
+               return BFEXTU(reg_map[dst], reg_map[src2], 0, 31);
+               } else if (dst != src2)
+                       SLJIT_ASSERT_STOP();
+
+               return SLJIT_SUCCESS;
+
+       case SLJIT_MOV_UB:
+       case SLJIT_MOV_SB:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+               if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
+                       if (op == SLJIT_MOV_SB)
+                               return BFEXTS(reg_map[dst], reg_map[src2], 0, 7);
+
+                       return BFEXTU(reg_map[dst], reg_map[src2], 0, 7);
+               } else if (dst != src2)
+                       SLJIT_ASSERT_STOP();
+
+               return SLJIT_SUCCESS;
+
+       case SLJIT_MOV_UH:
+       case SLJIT_MOV_SH:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+               if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
+                       if (op == SLJIT_MOV_SH)
+                               return BFEXTS(reg_map[dst], reg_map[src2], 0, 15);
+
+                       return BFEXTU(reg_map[dst], reg_map[src2], 0, 15);
+               } else if (dst != src2)
+                       SLJIT_ASSERT_STOP();
+
+               return SLJIT_SUCCESS;
+
+       case SLJIT_NOT:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+               if (op & SLJIT_SET_E)
+                       FAIL_IF(NOR(EQUAL_FLAG, reg_map[src2], reg_map[src2]));
+               if (CHECK_FLAGS(SLJIT_SET_E))
+                       FAIL_IF(NOR(reg_map[dst], reg_map[src2], reg_map[src2]));
+
+               return SLJIT_SUCCESS;
+
+       case SLJIT_CLZ:
+               SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+               if (op & SLJIT_SET_E)
+                       FAIL_IF(CLZ(EQUAL_FLAG, reg_map[src2]));
+               if (CHECK_FLAGS(SLJIT_SET_E))
+                       FAIL_IF(CLZ(reg_map[dst], reg_map[src2]));
+
+               return SLJIT_SUCCESS;
+
+       case SLJIT_ADD:
+               if (flags & SRC2_IMM) {
+                       if (op & SLJIT_SET_O) {
+                               FAIL_IF(SHRUI(TMP_EREG1, reg_map[src1], 63));
+                               if (src2 < 0)
+                                       FAIL_IF(XORI(TMP_EREG1, TMP_EREG1, 1));
+                       }
+
+                       if (op & SLJIT_SET_E)
+                               FAIL_IF(ADDLI(EQUAL_FLAG, reg_map[src1], src2));
+
+                       if (op & SLJIT_SET_C) {
+                               if (src2 >= 0)
+                                       FAIL_IF(ORI(ULESS_FLAG ,reg_map[src1], src2));
+                               else {
+                                       FAIL_IF(ADDLI(ULESS_FLAG ,ZERO, src2));
+                                       FAIL_IF(OR(ULESS_FLAG,reg_map[src1],ULESS_FLAG));
+                               }
+                       }
+
+                       /* dst may be the same as src1 or src2. */
+                       if (CHECK_FLAGS(SLJIT_SET_E))
+                               FAIL_IF(ADDLI(reg_map[dst], reg_map[src1], src2));
+
+                       if (op & SLJIT_SET_O) {
+                               FAIL_IF(SHRUI(OVERFLOW_FLAG, reg_map[dst], 63));
+
+                               if (src2 < 0)
+                                       FAIL_IF(XORI(OVERFLOW_FLAG, OVERFLOW_FLAG, 1));
+                       }
+               } else {
+                       if (op & SLJIT_SET_O) {
+                               FAIL_IF(XOR(TMP_EREG1, reg_map[src1], reg_map[src2]));
+                               FAIL_IF(SHRUI(TMP_EREG1, TMP_EREG1, 63));
+
+                               if (src1 != dst)
+                                       overflow_ra = reg_map[src1];
+                               else if (src2 != dst)
+                                       overflow_ra = reg_map[src2];
+                               else {
+                                       /* Rare ocasion. */
+                                       FAIL_IF(ADD(TMP_EREG2, reg_map[src1], ZERO));
+                                       overflow_ra = TMP_EREG2;
+                               }
+                       }
+
+                       if (op & SLJIT_SET_E)
+                               FAIL_IF(ADD(EQUAL_FLAG ,reg_map[src1], reg_map[src2]));
+
+                       if (op & SLJIT_SET_C)
+                               FAIL_IF(OR(ULESS_FLAG,reg_map[src1], reg_map[src2]));
+
+                       /* dst may be the same as src1 or src2. */
+                       if (CHECK_FLAGS(SLJIT_SET_E))
+                               FAIL_IF(ADD(reg_map[dst],reg_map[src1], reg_map[src2]));
+
+                       if (op & SLJIT_SET_O) {
+                               FAIL_IF(XOR(OVERFLOW_FLAG,reg_map[dst], overflow_ra));
+                               FAIL_IF(SHRUI(OVERFLOW_FLAG, OVERFLOW_FLAG, 63));
+                       }
+               }
+
+               /* a + b >= a | b (otherwise, the carry should be set to 1). */
+               if (op & SLJIT_SET_C)
+                       FAIL_IF(CMPLTU(ULESS_FLAG ,reg_map[dst] ,ULESS_FLAG));
+
+               if (op & SLJIT_SET_O)
+                       return CMOVNEZ(OVERFLOW_FLAG, TMP_EREG1, ZERO);
+
+               return SLJIT_SUCCESS;
+
+       case SLJIT_ADDC:
+               if (flags & SRC2_IMM) {
+                       if (op & SLJIT_SET_C) {
+                               if (src2 >= 0)
+                                       FAIL_IF(ORI(TMP_EREG1, reg_map[src1], src2));
+                               else {
+                                       FAIL_IF(ADDLI(TMP_EREG1, ZERO, src2));
+                                       FAIL_IF(OR(TMP_EREG1, reg_map[src1], TMP_EREG1));
+                               }
+                       }
+
+                       FAIL_IF(ADDLI(reg_map[dst], reg_map[src1], src2));
+
+               } else {
+                       if (op & SLJIT_SET_C)
+                               FAIL_IF(OR(TMP_EREG1, reg_map[src1], reg_map[src2]));
+
+                       /* dst may be the same as src1 or src2. */
+                       FAIL_IF(ADD(reg_map[dst], reg_map[src1], reg_map[src2]));
+               }
+
+               if (op & SLJIT_SET_C)
+                       FAIL_IF(CMPLTU(TMP_EREG1, reg_map[dst], TMP_EREG1));
+
+               FAIL_IF(ADD(reg_map[dst], reg_map[dst], ULESS_FLAG));
+
+               if (!(op & SLJIT_SET_C))
+                       return SLJIT_SUCCESS;
+
+               /* Set TMP_EREG2 (dst == 0) && (ULESS_FLAG == 1). */
+               FAIL_IF(CMPLTUI(TMP_EREG2, reg_map[dst], 1));
+               FAIL_IF(AND(TMP_EREG2, TMP_EREG2, ULESS_FLAG));
+               /* Set carry flag. */
+               return OR(ULESS_FLAG, TMP_EREG2, TMP_EREG1);
+
+       case SLJIT_SUB:
+               if ((flags & SRC2_IMM) && ((op & (SLJIT_SET_U | SLJIT_SET_S)) || src2 == SIMM_16BIT_MIN)) {
+                       FAIL_IF(ADDLI(TMP_REG2_mapped, ZERO, src2));
+                       src2 = TMP_REG2;
+                       flags &= ~SRC2_IMM;
+               }
+
+               if (flags & SRC2_IMM) {
+                       if (op & SLJIT_SET_O) {
+                               FAIL_IF(SHRUI(TMP_EREG1,reg_map[src1], 63));
+
+                               if (src2 < 0)
+                                       FAIL_IF(XORI(TMP_EREG1, TMP_EREG1, 1));
+
+                               if (src1 != dst)
+                                       overflow_ra = reg_map[src1];
+                               else {
+                                       /* Rare ocasion. */
+                                       FAIL_IF(ADD(TMP_EREG2, reg_map[src1], ZERO));
+       
+                                       overflow_ra = TMP_EREG2;
+                               }
+                       }
+
+                       if (op & SLJIT_SET_E)
+                               FAIL_IF(ADDLI(EQUAL_FLAG, reg_map[src1], -src2));
+
+                       if (op & SLJIT_SET_C) {
+                               FAIL_IF(load_immediate(compiler, ADDR_TMP_mapped, src2));
+                               FAIL_IF(CMPLTU(ULESS_FLAG, reg_map[src1], ADDR_TMP_mapped));
+                       }
+
+                       /* dst may be the same as src1 or src2. */
+                       if (CHECK_FLAGS(SLJIT_SET_E))
+                               FAIL_IF(ADDLI(reg_map[dst], reg_map[src1], -src2));
+
+               } else {
+
+                       if (op & SLJIT_SET_O) {
+                               FAIL_IF(XOR(TMP_EREG1, reg_map[src1], reg_map[src2]));
+                               FAIL_IF(SHRUI(TMP_EREG1, TMP_EREG1, 63));
+
+                               if (src1 != dst)
+                                       overflow_ra = reg_map[src1];
+                               else {
+                                       /* Rare ocasion. */
+                                       FAIL_IF(ADD(TMP_EREG2, reg_map[src1], ZERO));
+                                       overflow_ra = TMP_EREG2;
+                               }
+                       }
+
+                       if (op & SLJIT_SET_E)
+                               FAIL_IF(SUB(EQUAL_FLAG, reg_map[src1], reg_map[src2]));
+
+                       if (op & (SLJIT_SET_U | SLJIT_SET_C))
+                               FAIL_IF(CMPLTU(ULESS_FLAG, reg_map[src1], reg_map[src2]));
+
+                       if (op & SLJIT_SET_U)
+                               FAIL_IF(CMPLTU(UGREATER_FLAG, reg_map[src2], reg_map[src1]));
+
+                       if (op & SLJIT_SET_S) {
+                               FAIL_IF(CMPLTS(LESS_FLAG ,reg_map[src1] ,reg_map[src2]));
+                               FAIL_IF(CMPLTS(GREATER_FLAG ,reg_map[src2] ,reg_map[src1]));
+                       }
+
+                       /* dst may be the same as src1 or src2. */
+                       if (CHECK_FLAGS(SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_C))
+                               FAIL_IF(SUB(reg_map[dst], reg_map[src1], reg_map[src2]));
+               }
+
+               if (op & SLJIT_SET_O) {
+                       FAIL_IF(XOR(OVERFLOW_FLAG, reg_map[dst], overflow_ra));
+                       FAIL_IF(SHRUI(OVERFLOW_FLAG, OVERFLOW_FLAG, 63));
+                       return CMOVEQZ(OVERFLOW_FLAG, TMP_EREG1, ZERO);
+               }
+
+               return SLJIT_SUCCESS;
+
+       case SLJIT_SUBC:
+               if ((flags & SRC2_IMM) && src2 == SIMM_16BIT_MIN) {
+                       FAIL_IF(ADDLI(TMP_REG2_mapped, ZERO, src2));
+                       src2 = TMP_REG2;
+                       flags &= ~SRC2_IMM;
+               }
+
+               if (flags & SRC2_IMM) {
+                       if (op & SLJIT_SET_C) {
+                               FAIL_IF(load_immediate(compiler, ADDR_TMP_mapped, -src2));
+                               FAIL_IF(CMPLTU(TMP_EREG1, reg_map[src1], ADDR_TMP_mapped));
+                       }
+
+                       /* dst may be the same as src1 or src2. */
+                       FAIL_IF(ADDLI(reg_map[dst], reg_map[src1], -src2));
+
+               } else {
+                       if (op & SLJIT_SET_C)
+                               FAIL_IF(CMPLTU(TMP_EREG1, reg_map[src1], reg_map[src2]));
+                               /* dst may be the same as src1 or src2. */
+                       FAIL_IF(SUB(reg_map[dst], reg_map[src1], reg_map[src2]));
+               }
+
+               if (op & SLJIT_SET_C)
+                       FAIL_IF(CMOVEQZ(TMP_EREG1, reg_map[dst], ULESS_FLAG));
+
+               FAIL_IF(SUB(reg_map[dst], reg_map[dst], ULESS_FLAG));
+
+               if (op & SLJIT_SET_C)
+                       FAIL_IF(ADD(ULESS_FLAG, TMP_EREG1, ZERO));
+
+               return SLJIT_SUCCESS;
+
+#define EMIT_LOGICAL(op_imm, op_norm) \
+       if (flags & SRC2_IMM) { \
+               FAIL_IF(load_immediate(compiler, ADDR_TMP_mapped, src2)); \
+               if (op & SLJIT_SET_E) \
+                       FAIL_IF(push_3_buffer( \
+                               compiler, op_norm, EQUAL_FLAG, reg_map[src1], \
+                               ADDR_TMP_mapped, __LINE__)); \
+               if (CHECK_FLAGS(SLJIT_SET_E)) \
+                       FAIL_IF(push_3_buffer( \
+                               compiler, op_norm, reg_map[dst], reg_map[src1], \
+                               ADDR_TMP_mapped, __LINE__)); \
+       } else { \
+               if (op & SLJIT_SET_E) \
+                       FAIL_IF(push_3_buffer( \
+                               compiler, op_norm, EQUAL_FLAG, reg_map[src1], \
+                               reg_map[src2], __LINE__)); \
+               if (CHECK_FLAGS(SLJIT_SET_E)) \
+                       FAIL_IF(push_3_buffer( \
+                               compiler, op_norm, reg_map[dst], reg_map[src1], \
+                               reg_map[src2], __LINE__)); \
+       }
+
+       case SLJIT_AND:
+               EMIT_LOGICAL(TILEGX_OPC_ANDI, TILEGX_OPC_AND);
+               return SLJIT_SUCCESS;
+
+       case SLJIT_OR:
+               EMIT_LOGICAL(TILEGX_OPC_ORI, TILEGX_OPC_OR);
+               return SLJIT_SUCCESS;
+
+       case SLJIT_XOR:
+               EMIT_LOGICAL(TILEGX_OPC_XORI, TILEGX_OPC_XOR);
+               return SLJIT_SUCCESS;
+
+#define EMIT_SHIFT(op_imm, op_norm) \
+       if (flags & SRC2_IMM) { \
+               if (op & SLJIT_SET_E) \
+                       FAIL_IF(push_3_buffer( \
+                               compiler, op_imm, EQUAL_FLAG, reg_map[src1], \
+                               src2 & 0x3F, __LINE__)); \
+               if (CHECK_FLAGS(SLJIT_SET_E)) \
+                       FAIL_IF(push_3_buffer( \
+                               compiler, op_imm, reg_map[dst], reg_map[src1], \
+                               src2 & 0x3F, __LINE__)); \
+       } else { \
+               if (op & SLJIT_SET_E) \
+                       FAIL_IF(push_3_buffer( \
+                               compiler, op_imm, reg_map[dst], reg_map[src1], \
+                               src2 & 0x3F, __LINE__)); \
+               if (CHECK_FLAGS(SLJIT_SET_E)) \
+                       FAIL_IF(push_3_buffer( \
+                               compiler, op_norm, reg_map[dst], reg_map[src1], \
+                               reg_map[src2], __LINE__)); \
+       }
+
+       case SLJIT_SHL:
+               EMIT_SHIFT(TILEGX_OPC_SHLI, TILEGX_OPC_SHL);
+               return SLJIT_SUCCESS;
+
+       case SLJIT_LSHR:
+               EMIT_SHIFT(TILEGX_OPC_SHRUI, TILEGX_OPC_SHRU);
+               return SLJIT_SUCCESS;
+
+       case SLJIT_ASHR:
+               EMIT_SHIFT(TILEGX_OPC_SHRSI, TILEGX_OPC_SHRS);
+               return SLJIT_SUCCESS;
+       }
+
+       SLJIT_ASSERT_STOP();
+       return SLJIT_SUCCESS;
+}
+
+static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags, sljit_si dst, sljit_sw dstw, sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w)
+{
+       /* arg1 goes to TMP_REG1 or src reg.
+          arg2 goes to TMP_REG2, imm or src reg.
+          TMP_REG3 can be used for caching.
+          result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */
+       sljit_si dst_r = TMP_REG2;
+       sljit_si src1_r;
+       sljit_sw src2_r = 0;
+       sljit_si sugg_src2_r = TMP_REG2;
+
+       if (!(flags & ALT_KEEP_CACHE)) {
+               compiler->cache_arg = 0;
+               compiler->cache_argw = 0;
+       }
+
+       if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
+               if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI && !(src2 & SLJIT_MEM))
+                       return SLJIT_SUCCESS;
+               if (GET_FLAGS(op))
+                       flags |= UNUSED_DEST;
+       } else if (FAST_IS_REG(dst)) {
+               dst_r = dst;
+               flags |= REG_DEST;
+               if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI)
+                       sugg_src2_r = dst_r;
+       } else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1_mapped, dst, dstw))
+               flags |= SLOW_DEST;
+
+       if (flags & IMM_OP) {
+               if ((src2 & SLJIT_IMM) && src2w) {
+                       if ((!(flags & LOGICAL_OP)
+                                       && (src2w <= SIMM_16BIT_MAX && src2w >= SIMM_16BIT_MIN))
+                                       || ((flags & LOGICAL_OP) && !(src2w & ~UIMM_16BIT_MAX))) {
+                               flags |= SRC2_IMM;
+                               src2_r = src2w;
+                       }
+               }
+
+               if (!(flags & SRC2_IMM) && (flags & CUMULATIVE_OP) && (src1 & SLJIT_IMM) && src1w) {
+                       if ((!(flags & LOGICAL_OP)
+                                       && (src1w <= SIMM_16BIT_MAX && src1w >= SIMM_16BIT_MIN))
+                                       || ((flags & LOGICAL_OP) && !(src1w & ~UIMM_16BIT_MAX))) {
+                               flags |= SRC2_IMM;
+                               src2_r = src1w;
+
+                               /* And swap arguments. */
+                               src1 = src2;
+                               src1w = src2w;
+                               src2 = SLJIT_IMM;
+                               /* src2w = src2_r unneeded. */
+                       }
+               }
+       }
+
+       /* Source 1. */
+       if (FAST_IS_REG(src1)) {
+               src1_r = src1;
+               flags |= REG1_SOURCE;
+       } else if (src1 & SLJIT_IMM) {
+               if (src1w) {
+                       FAIL_IF(load_immediate(compiler, TMP_REG1_mapped, src1w));
+                       src1_r = TMP_REG1;
+               } else
+                       src1_r = 0;
+       } else {
+               if (getput_arg_fast(compiler, flags | LOAD_DATA, TMP_REG1_mapped, src1, src1w))
+                       FAIL_IF(compiler->error);
+               else
+                       flags |= SLOW_SRC1;
+               src1_r = TMP_REG1;
+       }
+
+       /* Source 2. */
+       if (FAST_IS_REG(src2)) {
+               src2_r = src2;
+               flags |= REG2_SOURCE;
+               if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_SI)
+                       dst_r = src2_r;
+       } else if (src2 & SLJIT_IMM) {
+               if (!(flags & SRC2_IMM)) {
+                       if (src2w) {
+                               FAIL_IF(load_immediate(compiler, reg_map[sugg_src2_r], src2w));
+                               src2_r = sugg_src2_r;
+                       } else {
+                               src2_r = 0;
+                               if ((op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) && (dst & SLJIT_MEM))
+                                       dst_r = 0;
+                       }
+               }
+       } else {
+               if (getput_arg_fast(compiler, flags | LOAD_DATA, reg_map[sugg_src2_r], src2, src2w))
+                       FAIL_IF(compiler->error);
+               else
+                       flags |= SLOW_SRC2;
+               src2_r = sugg_src2_r;
+       }
+
+       if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
+               SLJIT_ASSERT(src2_r == TMP_REG2);
+               if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
+                       FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2_mapped, src2, src2w, src1, src1w));
+                       FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1_mapped, src1, src1w, dst, dstw));
+               } else {
+                       FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1_mapped, src1, src1w, src2, src2w));
+                       FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2_mapped, src2, src2w, dst, dstw));
+               }
+       } else if (flags & SLOW_SRC1)
+               FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1_mapped, src1, src1w, dst, dstw));
+       else if (flags & SLOW_SRC2)
+               FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, reg_map[sugg_src2_r], src2, src2w, dst, dstw));
+
+       FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));
+
+       if (dst & SLJIT_MEM) {
+               if (!(flags & SLOW_DEST)) {
+                       getput_arg_fast(compiler, flags, reg_map[dst_r], dst, dstw);
+                       return compiler->error;
+               }
+
+               return getput_arg(compiler, flags, reg_map[dst_r], dst, dstw, 0, 0);
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw, sljit_si type)
+{
+       sljit_si sugg_dst_ar, dst_ar;
+       sljit_si flags = GET_ALL_FLAGS(op);
+
+       CHECK_ERROR();
+       check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type);
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+
+       if (dst == SLJIT_UNUSED)
+               return SLJIT_SUCCESS;
+
+       op = GET_OPCODE(op);
+       sugg_dst_ar = reg_map[(op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2];
+
+       compiler->cache_arg = 0;
+       compiler->cache_argw = 0;
+       if (op >= SLJIT_ADD && (src & SLJIT_MEM)) {
+               ADJUST_LOCAL_OFFSET(src, srcw);
+               FAIL_IF(emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, TMP_REG1_mapped, src, srcw, dst, dstw));
+               src = TMP_REG1;
+               srcw = 0;
+       }
+
+       switch (type) {
+       case SLJIT_C_EQUAL:
+       case SLJIT_C_NOT_EQUAL:
+               FAIL_IF(CMPLTUI(sugg_dst_ar, EQUAL_FLAG, 1));
+               dst_ar = sugg_dst_ar;
+               break;
+       case SLJIT_C_LESS:
+       case SLJIT_C_GREATER_EQUAL:
+       case SLJIT_C_FLOAT_LESS:
+       case SLJIT_C_FLOAT_GREATER_EQUAL:
+               dst_ar = ULESS_FLAG;
+               break;
+       case SLJIT_C_GREATER:
+       case SLJIT_C_LESS_EQUAL:
+       case SLJIT_C_FLOAT_GREATER:
+       case SLJIT_C_FLOAT_LESS_EQUAL:
+               dst_ar = UGREATER_FLAG;
+               break;
+       case SLJIT_C_SIG_LESS:
+       case SLJIT_C_SIG_GREATER_EQUAL:
+               dst_ar = LESS_FLAG;
+               break;
+       case SLJIT_C_SIG_GREATER:
+       case SLJIT_C_SIG_LESS_EQUAL:
+               dst_ar = GREATER_FLAG;
+               break;
+       case SLJIT_C_OVERFLOW:
+       case SLJIT_C_NOT_OVERFLOW:
+               dst_ar = OVERFLOW_FLAG;
+               break;
+       case SLJIT_C_MUL_OVERFLOW:
+       case SLJIT_C_MUL_NOT_OVERFLOW:
+               FAIL_IF(CMPLTUI(sugg_dst_ar, OVERFLOW_FLAG, 1));
+               dst_ar = sugg_dst_ar;
+               type ^= 0x1; /* Flip type bit for the XORI below. */
+               break;
+       case SLJIT_C_FLOAT_EQUAL:
+       case SLJIT_C_FLOAT_NOT_EQUAL:
+               dst_ar = EQUAL_FLAG;
+               break;
+
+       default:
+               SLJIT_ASSERT_STOP();
+               dst_ar = sugg_dst_ar;
+               break;
+       }
+
+       if (type & 0x1) {
+               FAIL_IF(XORI(sugg_dst_ar, dst_ar, 1));
+               dst_ar = sugg_dst_ar;
+       }
+
+       if (op >= SLJIT_ADD) {
+               if (TMP_REG2_mapped != dst_ar)
+                       FAIL_IF(ADD(TMP_REG2_mapped, dst_ar, ZERO));
+               return emit_op(compiler, op | flags, CUMULATIVE_OP | LOGICAL_OP | IMM_OP | ALT_KEEP_CACHE, dst, dstw, src, srcw, TMP_REG2, 0);
+       }
+
+       if (dst & SLJIT_MEM)
+               return emit_op_mem(compiler, WORD_DATA, dst_ar, dst, dstw);
+
+       if (sugg_dst_ar != dst_ar)
+               return ADD(sugg_dst_ar, dst_ar, ZERO);
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op) {
+       CHECK_ERROR();
+       check_sljit_emit_op0(compiler, op);
+
+       op = GET_OPCODE(op);
+       switch (op) {
+       case SLJIT_NOP:
+               return push_0_buffer(compiler, TILEGX_OPC_FNOP, __LINE__);
+
+       case SLJIT_BREAKPOINT:
+               return PI(BPT);
+
+       case SLJIT_UMUL:
+       case SLJIT_SMUL:
+       case SLJIT_UDIV:
+       case SLJIT_SDIV:
+               SLJIT_ASSERT_STOP();
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw)
+{
+       CHECK_ERROR();
+       check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       switch (GET_OPCODE(op)) {
+       case SLJIT_MOV:
+       case SLJIT_MOV_P:
+               return emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
+
+       case SLJIT_MOV_UI:
+               return emit_op(compiler, SLJIT_MOV_UI, INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
+
+       case SLJIT_MOV_SI:
+               return emit_op(compiler, SLJIT_MOV_SI, INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
+
+       case SLJIT_MOV_UB:
+               return emit_op(compiler, SLJIT_MOV_UB, BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ub) srcw : srcw);
+
+       case SLJIT_MOV_SB:
+               return emit_op(compiler, SLJIT_MOV_SB, BYTE_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sb) srcw : srcw);
+
+       case SLJIT_MOV_UH:
+               return emit_op(compiler, SLJIT_MOV_UH, HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_uh) srcw : srcw);
+
+       case SLJIT_MOV_SH:
+               return emit_op(compiler, SLJIT_MOV_SH, HALF_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sh) srcw : srcw);
+
+       case SLJIT_MOVU:
+       case SLJIT_MOVU_P:
+               return emit_op(compiler, SLJIT_MOV, WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
+
+       case SLJIT_MOVU_UI:
+               return emit_op(compiler, SLJIT_MOV_UI, INT_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
+
+       case SLJIT_MOVU_SI:
+               return emit_op(compiler, SLJIT_MOV_SI, INT_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
+
+       case SLJIT_MOVU_UB:
+               return emit_op(compiler, SLJIT_MOV_UB, BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ub) srcw : srcw);
+
+       case SLJIT_MOVU_SB:
+               return emit_op(compiler, SLJIT_MOV_SB, BYTE_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sb) srcw : srcw);
+
+       case SLJIT_MOVU_UH:
+               return emit_op(compiler, SLJIT_MOV_UH, HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_uh) srcw : srcw);
+
+       case SLJIT_MOVU_SH:
+               return emit_op(compiler, SLJIT_MOV_SH, HALF_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sh) srcw : srcw);
+
+       case SLJIT_NOT:
+               return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw);
+
+       case SLJIT_NEG:
+               return emit_op(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), IMM_OP, dst, dstw, SLJIT_IMM, 0, src, srcw);
+
+       case SLJIT_CLZ:
+               return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw);
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w)
+{
+       CHECK_ERROR();
+       check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+       ADJUST_LOCAL_OFFSET(src1, src1w);
+       ADJUST_LOCAL_OFFSET(src2, src2w);
+
+       switch (GET_OPCODE(op)) {
+       case SLJIT_ADD:
+       case SLJIT_ADDC:
+               return emit_op(compiler, op, CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
+
+       case SLJIT_SUB:
+       case SLJIT_SUBC:
+               return emit_op(compiler, op, IMM_OP, dst, dstw, src1, src1w, src2, src2w);
+
+       case SLJIT_MUL:
+               return emit_op(compiler, op, CUMULATIVE_OP, dst, dstw, src1, src1w, src2, src2w);
+
+       case SLJIT_AND:
+       case SLJIT_OR:
+       case SLJIT_XOR:
+               return emit_op(compiler, op, CUMULATIVE_OP | LOGICAL_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
+
+       case SLJIT_SHL:
+       case SLJIT_LSHR:
+       case SLJIT_ASHR:
+               if (src2 & SLJIT_IMM)
+                       src2w &= 0x3f;
+               if (op & SLJIT_INT_OP)
+                       src2w &= 0x1f;
+
+               return emit_op(compiler, op, IMM_OP, dst, dstw, src1, src1w, src2, src2w);
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_label * sljit_emit_label(struct sljit_compiler *compiler)
+{
+       struct sljit_label *label;
+
+       flush_buffer(compiler);
+
+       CHECK_ERROR_PTR();
+       check_sljit_emit_label(compiler);
+
+       if (compiler->last_label && compiler->last_label->size == compiler->size)
+               return compiler->last_label;
+
+       label = (struct sljit_label *)ensure_abuf(compiler, sizeof(struct sljit_label));
+       PTR_FAIL_IF(!label);
+       set_label(label, compiler);
+       return label;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw)
+{
+       sljit_si src_r = TMP_REG2;
+       struct sljit_jump *jump = NULL;
+
+       flush_buffer(compiler);
+
+       CHECK_ERROR();
+       check_sljit_emit_ijump(compiler, type, src, srcw);
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       if (FAST_IS_REG(src)) {
+               if (reg_map[src] != 0)
+                       src_r = src;
+               else
+                       FAIL_IF(ADD_SOLO(TMP_REG2_mapped, reg_map[src], ZERO));
+       }
+
+       if (type >= SLJIT_CALL0) {
+               SLJIT_ASSERT(reg_map[PIC_ADDR_REG] == 16 && PIC_ADDR_REG == TMP_REG2);
+               if (src & (SLJIT_IMM | SLJIT_MEM)) {
+                       if (src & SLJIT_IMM)
+                               FAIL_IF(emit_const(compiler, reg_map[PIC_ADDR_REG], srcw, 1));
+                       else {
+                               SLJIT_ASSERT(src_r == TMP_REG2 && (src & SLJIT_MEM));
+                               FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw));
+                       }
+
+                       FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_R0], ZERO));
+
+                       FAIL_IF(ADDI_SOLO(54, 54, -16));
+
+                       FAIL_IF(JALR_SOLO(reg_map[PIC_ADDR_REG]));
+
+                       return ADDI_SOLO(54, 54, 16);
+               }
+
+               /* Register input. */
+               if (type >= SLJIT_CALL1)
+                       FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_R0], ZERO));
+
+               FAIL_IF(ADD_SOLO(reg_map[PIC_ADDR_REG], reg_map[src_r], ZERO));
+
+               FAIL_IF(ADDI_SOLO(54, 54, -16));
+
+               FAIL_IF(JALR_SOLO(reg_map[src_r]));
+
+               return ADDI_SOLO(54, 54, 16);
+       }
+
+       if (src & SLJIT_IMM) {
+               jump = (struct sljit_jump *)ensure_abuf(compiler, sizeof(struct sljit_jump));
+               FAIL_IF(!jump);
+               set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_JAL : 0));
+               jump->u.target = srcw;
+               FAIL_IF(emit_const(compiler, TMP_REG2_mapped, 0, 1));
+
+               if (type >= SLJIT_FAST_CALL) {
+                       FAIL_IF(ADD_SOLO(ZERO, ZERO, ZERO));
+                       jump->addr = compiler->size;
+                       FAIL_IF(JR_SOLO(reg_map[src_r]));
+               } else {
+                       jump->addr = compiler->size;
+                       FAIL_IF(JR_SOLO(reg_map[src_r]));
+               }
+
+               return SLJIT_SUCCESS;
+
+       } else if (src & SLJIT_MEM)
+               FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw));
+
+       FAIL_IF(JR_SOLO(reg_map[src_r]));
+
+       if (jump)
+               jump->addr = compiler->size;
+
+       return SLJIT_SUCCESS;
+}
+
+#define BR_Z(src) \
+       inst = BEQZ_X1 | SRCA_X1(src); \
+       flags = IS_COND;
+
+#define BR_NZ(src) \
+       inst = BNEZ_X1 | SRCA_X1(src); \
+       flags = IS_COND;
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump * sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type)
+{
+       struct sljit_jump *jump;
+       sljit_ins inst;
+       sljit_si flags = 0;
+
+       flush_buffer(compiler);
+
+       CHECK_ERROR_PTR();
+       check_sljit_emit_jump(compiler, type);
+
+       jump = (struct sljit_jump *)ensure_abuf(compiler, sizeof(struct sljit_jump));
+       PTR_FAIL_IF(!jump);
+       set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
+       type &= 0xff;
+
+       switch (type) {
+       case SLJIT_C_EQUAL:
+       case SLJIT_C_FLOAT_NOT_EQUAL:
+               BR_NZ(EQUAL_FLAG);
+               break;
+       case SLJIT_C_NOT_EQUAL:
+       case SLJIT_C_FLOAT_EQUAL:
+               BR_Z(EQUAL_FLAG);
+               break;
+       case SLJIT_C_LESS:
+       case SLJIT_C_FLOAT_LESS:
+               BR_Z(ULESS_FLAG);
+               break;
+       case SLJIT_C_GREATER_EQUAL:
+       case SLJIT_C_FLOAT_GREATER_EQUAL:
+               BR_NZ(ULESS_FLAG);
+               break;
+       case SLJIT_C_GREATER:
+       case SLJIT_C_FLOAT_GREATER:
+               BR_Z(UGREATER_FLAG);
+               break;
+       case SLJIT_C_LESS_EQUAL:
+       case SLJIT_C_FLOAT_LESS_EQUAL:
+               BR_NZ(UGREATER_FLAG);
+               break;
+       case SLJIT_C_SIG_LESS:
+               BR_Z(LESS_FLAG);
+               break;
+       case SLJIT_C_SIG_GREATER_EQUAL:
+               BR_NZ(LESS_FLAG);
+               break;
+       case SLJIT_C_SIG_GREATER:
+               BR_Z(GREATER_FLAG);
+               break;
+       case SLJIT_C_SIG_LESS_EQUAL:
+               BR_NZ(GREATER_FLAG);
+               break;
+       case SLJIT_C_OVERFLOW:
+       case SLJIT_C_MUL_OVERFLOW:
+               BR_Z(OVERFLOW_FLAG);
+               break;
+       case SLJIT_C_NOT_OVERFLOW:
+       case SLJIT_C_MUL_NOT_OVERFLOW:
+               BR_NZ(OVERFLOW_FLAG);
+               break;
+       default:
+               /* Not conditional branch. */
+               inst = 0;
+               break;
+       }
+
+       jump->flags |= flags;
+
+       if (inst) {
+               inst = inst | ((type <= SLJIT_JUMP) ? BOFF_X1(5) : BOFF_X1(6));
+               PTR_FAIL_IF(PI(inst));
+       }
+
+       PTR_FAIL_IF(emit_const(compiler, TMP_REG2_mapped, 0, 1));
+       if (type <= SLJIT_JUMP) {
+               jump->addr = compiler->size;
+               PTR_FAIL_IF(JR_SOLO(TMP_REG2_mapped));
+       } else {
+               SLJIT_ASSERT(reg_map[PIC_ADDR_REG] == 16 && PIC_ADDR_REG == TMP_REG2);
+               /* Cannot be optimized out if type is >= CALL0. */
+               jump->flags |= IS_JAL | (type >= SLJIT_CALL0 ? SLJIT_REWRITABLE_JUMP : 0);
+               PTR_FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_R0], ZERO));
+               jump->addr = compiler->size;
+               PTR_FAIL_IF(JALR_SOLO(TMP_REG2_mapped));
+       }
+
+       return jump;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
+{
+       return 0;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw)
+{
+       SLJIT_ASSERT_STOP();
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w)
+{
+       SLJIT_ASSERT_STOP();
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_const * sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value)
+{
+       struct sljit_const *const_;
+       sljit_si reg;
+
+       flush_buffer(compiler);
+
+       CHECK_ERROR_PTR();
+       check_sljit_emit_const(compiler, dst, dstw, init_value);
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+
+       const_ = (struct sljit_const *)ensure_abuf(compiler, sizeof(struct sljit_const));
+       PTR_FAIL_IF(!const_);
+       set_const(const_, compiler);
+
+       reg = FAST_IS_REG(dst) ? dst : TMP_REG2;
+
+       PTR_FAIL_IF(emit_const_64(compiler, reg, init_value, 1));
+
+       if (dst & SLJIT_MEM)
+               PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0));
+       return const_;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
+{
+       sljit_ins *inst = (sljit_ins *)addr;
+
+       inst[0] = (inst[0] & ~(0xFFFFL << 43)) | (((new_addr >> 32) & 0xffff) << 43);
+       inst[1] = (inst[1] & ~(0xFFFFL << 43)) | (((new_addr >> 16) & 0xffff) << 43);
+       inst[2] = (inst[2] & ~(0xFFFFL << 43)) | ((new_addr & 0xffff) << 43);
+       SLJIT_CACHE_FLUSH(inst, inst + 3);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
+{
+       sljit_ins *inst = (sljit_ins *)addr;
+
+       inst[0] = (inst[0] & ~(0xFFFFL << 43)) | (((new_constant >> 48) & 0xFFFFL) << 43);
+       inst[1] = (inst[1] & ~(0xFFFFL << 43)) | (((new_constant >> 32) & 0xFFFFL) << 43);
+       inst[2] = (inst[2] & ~(0xFFFFL << 43)) | (((new_constant >> 16) & 0xFFFFL) << 43);
+       inst[3] = (inst[3] & ~(0xFFFFL << 43)) | ((new_constant & 0xFFFFL) << 43);
+       SLJIT_CACHE_FLUSH(inst, inst + 4);
+}
diff --git a/ext/pcre/pcrelib/sljit/sljitNativeX86_32.c b/ext/pcre/pcrelib/sljit/sljitNativeX86_32.c
new file mode 100644 (file)
index 0000000..d7129c8
--- /dev/null
@@ -0,0 +1,550 @@
+/*
+ *    Stack-less Just-In-Time compiler
+ *
+ *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice, this list of
+ *      conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *      of conditions and the following disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* x86 32-bit arch dependent functions. */
+
+static sljit_si emit_do_imm(struct sljit_compiler *compiler, sljit_ub opcode, sljit_sw imm)
+{
+       sljit_ub *inst;
+
+       inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + sizeof(sljit_sw));
+       FAIL_IF(!inst);
+       INC_SIZE(1 + sizeof(sljit_sw));
+       *inst++ = opcode;
+       *(sljit_sw*)inst = imm;
+       return SLJIT_SUCCESS;
+}
+
+static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_si type)
+{
+       if (type == SLJIT_JUMP) {
+               *code_ptr++ = JMP_i32;
+               jump->addr++;
+       }
+       else if (type >= SLJIT_FAST_CALL) {
+               *code_ptr++ = CALL_i32;
+               jump->addr++;
+       }
+       else {
+               *code_ptr++ = GROUP_0F;
+               *code_ptr++ = get_jump_code(type);
+               jump->addr += 2;
+       }
+
+       if (jump->flags & JUMP_LABEL)
+               jump->flags |= PATCH_MW;
+       else
+               *(sljit_sw*)code_ptr = jump->u.target - (jump->addr + 4);
+       code_ptr += 4;
+
+       return code_ptr;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler,
+       sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds,
+       sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
+{
+       sljit_si size;
+       sljit_ub *inst;
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
+       set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
+
+       compiler->args = args;
+       compiler->flags_saved = 0;
+
+       size = 1 + (scratches > 7 ? (scratches - 7) : 0) + (saveds <= 3 ? saveds : 3);
+#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
+       size += (args > 0 ? (args * 2) : 0) + (args > 2 ? 2 : 0);
+#else
+       size += (args > 0 ? (2 + args * 3) : 0);
+#endif
+       inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
+       FAIL_IF(!inst);
+
+       INC_SIZE(size);
+       PUSH_REG(reg_map[TMP_REG1]);
+#if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
+       if (args > 0) {
+               *inst++ = MOV_r_rm;
+               *inst++ = MOD_REG | (reg_map[TMP_REG1] << 3) | 0x4 /* esp */;
+       }
+#endif
+       if (saveds > 2 || scratches > 7)
+               PUSH_REG(reg_map[SLJIT_S2]);
+       if (saveds > 1 || scratches > 8)
+               PUSH_REG(reg_map[SLJIT_S1]);
+       if (saveds > 0 || scratches > 9)
+               PUSH_REG(reg_map[SLJIT_S0]);
+
+#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
+       if (args > 0) {
+               *inst++ = MOV_r_rm;
+               *inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | reg_map[SLJIT_R2];
+       }
+       if (args > 1) {
+               *inst++ = MOV_r_rm;
+               *inst++ = MOD_REG | (reg_map[SLJIT_S1] << 3) | reg_map[SLJIT_R1];
+       }
+       if (args > 2) {
+               *inst++ = MOV_r_rm;
+               *inst++ = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | 0x4 /* esp */;
+               *inst++ = 0x24;
+               *inst++ = sizeof(sljit_sw) * (3 + 2); /* saveds >= 3 as well. */
+       }
+#else
+       if (args > 0) {
+               *inst++ = MOV_r_rm;
+               *inst++ = MOD_DISP8 | (reg_map[SLJIT_S0] << 3) | reg_map[TMP_REG1];
+               *inst++ = sizeof(sljit_sw) * 2;
+       }
+       if (args > 1) {
+               *inst++ = MOV_r_rm;
+               *inst++ = MOD_DISP8 | (reg_map[SLJIT_S1] << 3) | reg_map[TMP_REG1];
+               *inst++ = sizeof(sljit_sw) * 3;
+       }
+       if (args > 2) {
+               *inst++ = MOV_r_rm;
+               *inst++ = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | reg_map[TMP_REG1];
+               *inst++ = sizeof(sljit_sw) * 4;
+       }
+#endif
+
+       SLJIT_COMPILE_ASSERT(SLJIT_LOCALS_OFFSET >= (2 + 4) * sizeof(sljit_uw), require_at_least_two_words);
+#if defined(__APPLE__)
+       /* Ignore pushed registers and SLJIT_LOCALS_OFFSET when computing the aligned local size. */
+       saveds = (2 + (scratches > 7 ? (scratches - 7) : 0) + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw);
+       local_size = ((SLJIT_LOCALS_OFFSET + saveds + local_size + 15) & ~15) - saveds;
+#else
+       if (options & SLJIT_DOUBLE_ALIGNMENT) {
+               local_size = SLJIT_LOCALS_OFFSET + ((local_size + 7) & ~7);
+
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + 17);
+               FAIL_IF(!inst);
+
+               INC_SIZE(17);
+               inst[0] = MOV_r_rm;
+               inst[1] = MOD_REG | (reg_map[TMP_REG1] << 3) | reg_map[SLJIT_SP];
+               inst[2] = GROUP_F7;
+               inst[3] = MOD_REG | (0 << 3) | reg_map[SLJIT_SP];
+               *(sljit_sw*)(inst + 4) = 0x4;
+               inst[8] = JNE_i8;
+               inst[9] = 6;
+               inst[10] = GROUP_BINARY_81;
+               inst[11] = MOD_REG | (5 << 3) | reg_map[SLJIT_SP];
+               *(sljit_sw*)(inst + 12) = 0x4;
+               inst[16] = PUSH_r + reg_map[TMP_REG1];
+       }
+       else
+               local_size = SLJIT_LOCALS_OFFSET + ((local_size + 3) & ~3);
+#endif
+
+       compiler->local_size = local_size;
+#ifdef _WIN32
+       if (local_size > 1024) {
+#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
+               FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size));
+#else
+               local_size -= SLJIT_LOCALS_OFFSET;
+               FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size));
+               FAIL_IF(emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
+                       SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, SLJIT_LOCALS_OFFSET));
+#endif
+               FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack)));
+       }
+#endif
+
+       SLJIT_ASSERT(local_size > 0);
+       return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
+               SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compiler,
+       sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds,
+       sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
+{
+       CHECK_ERROR();
+       CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
+       set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
+
+       compiler->args = args;
+
+#if defined(__APPLE__)
+       saveds = (2 + (scratches > 7 ? (scratches - 7) : 0) + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw);
+       compiler->local_size = ((SLJIT_LOCALS_OFFSET + saveds + local_size + 15) & ~15) - saveds;
+#else
+       if (options & SLJIT_DOUBLE_ALIGNMENT)
+               compiler->local_size = SLJIT_LOCALS_OFFSET + ((local_size + 7) & ~7);
+       else
+               compiler->local_size = SLJIT_LOCALS_OFFSET + ((local_size + 3) & ~3);
+#endif
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
+{
+       sljit_si size;
+       sljit_ub *inst;
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_return(compiler, op, src, srcw));
+       SLJIT_ASSERT(compiler->args >= 0);
+
+       compiler->flags_saved = 0;
+       FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
+
+       SLJIT_ASSERT(compiler->local_size > 0);
+       FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
+               SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size));
+
+#if !defined(__APPLE__)
+       if (compiler->options & SLJIT_DOUBLE_ALIGNMENT) {
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + 3);
+               FAIL_IF(!inst);
+
+               INC_SIZE(3);
+               inst[0] = MOV_r_rm;
+               inst[1] = (reg_map[SLJIT_SP] << 3) | 0x4 /* SIB */;
+               inst[2] = (4 << 3) | reg_map[SLJIT_SP];
+       }
+#endif
+
+       size = 2 + (compiler->scratches > 7 ? (compiler->scratches - 7) : 0) +
+               (compiler->saveds <= 3 ? compiler->saveds : 3);
+#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
+       if (compiler->args > 2)
+               size += 2;
+#else
+       if (compiler->args > 0)
+               size += 2;
+#endif
+       inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
+       FAIL_IF(!inst);
+
+       INC_SIZE(size);
+
+       if (compiler->saveds > 0 || compiler->scratches > 9)
+               POP_REG(reg_map[SLJIT_S0]);
+       if (compiler->saveds > 1 || compiler->scratches > 8)
+               POP_REG(reg_map[SLJIT_S1]);
+       if (compiler->saveds > 2 || compiler->scratches > 7)
+               POP_REG(reg_map[SLJIT_S2]);
+       POP_REG(reg_map[TMP_REG1]);
+#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
+       if (compiler->args > 2)
+               RET_I16(sizeof(sljit_sw));
+       else
+               RET();
+#else
+       RET();
+#endif
+
+       return SLJIT_SUCCESS;
+}
+
+/* --------------------------------------------------------------------- */
+/*  Operators                                                            */
+/* --------------------------------------------------------------------- */
+
+/* Size contains the flags as well. */
+static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si size,
+       /* The register or immediate operand. */
+       sljit_si a, sljit_sw imma,
+       /* The general operand (not immediate). */
+       sljit_si b, sljit_sw immb)
+{
+       sljit_ub *inst;
+       sljit_ub *buf_ptr;
+       sljit_si flags = size & ~0xf;
+       sljit_si inst_size;
+
+       /* Both cannot be switched on. */
+       SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS));
+       /* Size flags not allowed for typed instructions. */
+       SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0);
+       /* Both size flags cannot be switched on. */
+       SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
+       /* SSE2 and immediate is not possible. */
+       SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2));
+       SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3)
+               && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66)
+               && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66));
+
+       size &= 0xf;
+       inst_size = size;
+
+       if (flags & (EX86_PREF_F2 | EX86_PREF_F3))
+               inst_size++;
+       if (flags & EX86_PREF_66)
+               inst_size++;
+
+       /* Calculate size of b. */
+       inst_size += 1; /* mod r/m byte. */
+       if (b & SLJIT_MEM) {
+               if ((b & REG_MASK) == SLJIT_UNUSED)
+                       inst_size += sizeof(sljit_sw);
+               else if (immb != 0 && !(b & OFFS_REG_MASK)) {
+                       /* Immediate operand. */
+                       if (immb <= 127 && immb >= -128)
+                               inst_size += sizeof(sljit_sb);
+                       else
+                               inst_size += sizeof(sljit_sw);
+               }
+
+               if ((b & REG_MASK) == SLJIT_SP && !(b & OFFS_REG_MASK))
+                       b |= TO_OFFS_REG(SLJIT_SP);
+
+               if ((b & OFFS_REG_MASK) != SLJIT_UNUSED)
+                       inst_size += 1; /* SIB byte. */
+       }
+
+       /* Calculate size of a. */
+       if (a & SLJIT_IMM) {
+               if (flags & EX86_BIN_INS) {
+                       if (imma <= 127 && imma >= -128) {
+                               inst_size += 1;
+                               flags |= EX86_BYTE_ARG;
+                       } else
+                               inst_size += 4;
+               }
+               else if (flags & EX86_SHIFT_INS) {
+                       imma &= 0x1f;
+                       if (imma != 1) {
+                               inst_size ++;
+                               flags |= EX86_BYTE_ARG;
+                       }
+               } else if (flags & EX86_BYTE_ARG)
+                       inst_size++;
+               else if (flags & EX86_HALF_ARG)
+                       inst_size += sizeof(short);
+               else
+                       inst_size += sizeof(sljit_sw);
+       }
+       else
+               SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
+
+       inst = (sljit_ub*)ensure_buf(compiler, 1 + inst_size);
+       PTR_FAIL_IF(!inst);
+
+       /* Encoding the byte. */
+       INC_SIZE(inst_size);
+       if (flags & EX86_PREF_F2)
+               *inst++ = 0xf2;
+       if (flags & EX86_PREF_F3)
+               *inst++ = 0xf3;
+       if (flags & EX86_PREF_66)
+               *inst++ = 0x66;
+
+       buf_ptr = inst + size;
+
+       /* Encode mod/rm byte. */
+       if (!(flags & EX86_SHIFT_INS)) {
+               if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM))
+                       *inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81;
+
+               if ((a & SLJIT_IMM) || (a == 0))
+                       *buf_ptr = 0;
+               else if (!(flags & EX86_SSE2_OP1))
+                       *buf_ptr = reg_map[a] << 3;
+               else
+                       *buf_ptr = a << 3;
+       }
+       else {
+               if (a & SLJIT_IMM) {
+                       if (imma == 1)
+                               *inst = GROUP_SHIFT_1;
+                       else
+                               *inst = GROUP_SHIFT_N;
+               } else
+                       *inst = GROUP_SHIFT_CL;
+               *buf_ptr = 0;
+       }
+
+       if (!(b & SLJIT_MEM))
+               *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2_OP2)) ? reg_map[b] : b);
+       else if ((b & REG_MASK) != SLJIT_UNUSED) {
+               if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) {
+                       if (immb != 0) {
+                               if (immb <= 127 && immb >= -128)
+                                       *buf_ptr |= 0x40;
+                               else
+                                       *buf_ptr |= 0x80;
+                       }
+
+                       if ((b & OFFS_REG_MASK) == SLJIT_UNUSED)
+                               *buf_ptr++ |= reg_map[b & REG_MASK];
+                       else {
+                               *buf_ptr++ |= 0x04;
+                               *buf_ptr++ = reg_map[b & REG_MASK] | (reg_map[OFFS_REG(b)] << 3);
+                       }
+
+                       if (immb != 0) {
+                               if (immb <= 127 && immb >= -128)
+                                       *buf_ptr++ = immb; /* 8 bit displacement. */
+                               else {
+                                       *(sljit_sw*)buf_ptr = immb; /* 32 bit displacement. */
+                                       buf_ptr += sizeof(sljit_sw);
+                               }
+                       }
+               }
+               else {
+                       *buf_ptr++ |= 0x04;
+                       *buf_ptr++ = reg_map[b & REG_MASK] | (reg_map[OFFS_REG(b)] << 3) | (immb << 6);
+               }
+       }
+       else {
+               *buf_ptr++ |= 0x05;
+               *(sljit_sw*)buf_ptr = immb; /* 32 bit displacement. */
+               buf_ptr += sizeof(sljit_sw);
+       }
+
+       if (a & SLJIT_IMM) {
+               if (flags & EX86_BYTE_ARG)
+                       *buf_ptr = imma;
+               else if (flags & EX86_HALF_ARG)
+                       *(short*)buf_ptr = imma;
+               else if (!(flags & EX86_SHIFT_INS))
+                       *(sljit_sw*)buf_ptr = imma;
+       }
+
+       return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1);
+}
+
+/* --------------------------------------------------------------------- */
+/*  Call / return instructions                                           */
+/* --------------------------------------------------------------------- */
+
+static SLJIT_INLINE sljit_si call_with_args(struct sljit_compiler *compiler, sljit_si type)
+{
+       sljit_ub *inst;
+
+#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
+       inst = (sljit_ub*)ensure_buf(compiler, type >= SLJIT_CALL3 ? 1 + 2 + 1 : 1 + 2);
+       FAIL_IF(!inst);
+       INC_SIZE(type >= SLJIT_CALL3 ? 2 + 1 : 2);
+
+       if (type >= SLJIT_CALL3)
+               PUSH_REG(reg_map[SLJIT_R2]);
+       *inst++ = MOV_r_rm;
+       *inst++ = MOD_REG | (reg_map[SLJIT_R2] << 3) | reg_map[SLJIT_R0];
+#else
+       inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 * (type - SLJIT_CALL0));
+       FAIL_IF(!inst);
+       INC_SIZE(4 * (type - SLJIT_CALL0));
+
+       *inst++ = MOV_rm_r;
+       *inst++ = MOD_DISP8 | (reg_map[SLJIT_R0] << 3) | 0x4 /* SIB */;
+       *inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_SP];
+       *inst++ = 0;
+       if (type >= SLJIT_CALL2) {
+               *inst++ = MOV_rm_r;
+               *inst++ = MOD_DISP8 | (reg_map[SLJIT_R1] << 3) | 0x4 /* SIB */;
+               *inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_SP];
+               *inst++ = sizeof(sljit_sw);
+       }
+       if (type >= SLJIT_CALL3) {
+               *inst++ = MOV_rm_r;
+               *inst++ = MOD_DISP8 | (reg_map[SLJIT_R2] << 3) | 0x4 /* SIB */;
+               *inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_SP];
+               *inst++ = 2 * sizeof(sljit_sw);
+       }
+#endif
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw)
+{
+       sljit_ub *inst;
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+
+       CHECK_EXTRA_REGS(dst, dstw, (void)0);
+
+       /* For UNUSED dst. Uncommon, but possible. */
+       if (dst == SLJIT_UNUSED)
+               dst = TMP_REG1;
+
+       if (FAST_IS_REG(dst)) {
+               /* Unused dest is possible here. */
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
+               FAIL_IF(!inst);
+
+               INC_SIZE(1);
+               POP_REG(reg_map[dst]);
+               return SLJIT_SUCCESS;
+       }
+
+       /* Memory. */
+       inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
+       FAIL_IF(!inst);
+       *inst++ = POP_rm;
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw)
+{
+       sljit_ub *inst;
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       CHECK_EXTRA_REGS(src, srcw, (void)0);
+
+       if (FAST_IS_REG(src)) {
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 1);
+               FAIL_IF(!inst);
+
+               INC_SIZE(1 + 1);
+               PUSH_REG(reg_map[src]);
+       }
+       else if (src & SLJIT_MEM) {
+               inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
+               FAIL_IF(!inst);
+               *inst++ = GROUP_FF;
+               *inst |= PUSH_rm;
+
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
+               FAIL_IF(!inst);
+               INC_SIZE(1);
+       }
+       else {
+               /* SLJIT_IMM. */
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + 5 + 1);
+               FAIL_IF(!inst);
+
+               INC_SIZE(5 + 1);
+               *inst++ = PUSH_i32;
+               *(sljit_sw*)inst = srcw;
+               inst += sizeof(sljit_sw);
+       }
+
+       RET();
+       return SLJIT_SUCCESS;
+}
diff --git a/ext/pcre/pcrelib/sljit/sljitNativeX86_64.c b/ext/pcre/pcrelib/sljit/sljitNativeX86_64.c
new file mode 100644 (file)
index 0000000..1790d8a
--- /dev/null
@@ -0,0 +1,747 @@
+/*
+ *    Stack-less Just-In-Time compiler
+ *
+ *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice, this list of
+ *      conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *      of conditions and the following disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* x86 64-bit arch dependent functions. */
+
+static sljit_si emit_load_imm64(struct sljit_compiler *compiler, sljit_si reg, sljit_sw imm)
+{
+       sljit_ub *inst;
+
+       inst = (sljit_ub*)ensure_buf(compiler, 1 + 2 + sizeof(sljit_sw));
+       FAIL_IF(!inst);
+       INC_SIZE(2 + sizeof(sljit_sw));
+       *inst++ = REX_W | ((reg_map[reg] <= 7) ? 0 : REX_B);
+       *inst++ = MOV_r_i32 + (reg_map[reg] & 0x7);
+       *(sljit_sw*)inst = imm;
+       return SLJIT_SUCCESS;
+}
+
+static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_si type)
+{
+       if (type < SLJIT_JUMP) {
+               /* Invert type. */
+               *code_ptr++ = get_jump_code(type ^ 0x1) - 0x10;
+               *code_ptr++ = 10 + 3;
+       }
+
+       SLJIT_COMPILE_ASSERT(reg_map[TMP_REG3] == 9, tmp3_is_9_first);
+       *code_ptr++ = REX_W | REX_B;
+       *code_ptr++ = MOV_r_i32 + 1;
+       jump->addr = (sljit_uw)code_ptr;
+
+       if (jump->flags & JUMP_LABEL)
+               jump->flags |= PATCH_MD;
+       else
+               *(sljit_sw*)code_ptr = jump->u.target;
+
+       code_ptr += sizeof(sljit_sw);
+       *code_ptr++ = REX_B;
+       *code_ptr++ = GROUP_FF;
+       *code_ptr++ = (type >= SLJIT_FAST_CALL) ? (MOD_REG | CALL_rm | 1) : (MOD_REG | JMP_rm | 1);
+
+       return code_ptr;
+}
+
+static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_sw addr, sljit_si type)
+{
+       sljit_sw delta = addr - ((sljit_sw)code_ptr + 1 + sizeof(sljit_si));
+
+       if (delta <= HALFWORD_MAX && delta >= HALFWORD_MIN) {
+               *code_ptr++ = (type == 2) ? CALL_i32 : JMP_i32;
+               *(sljit_sw*)code_ptr = delta;
+       }
+       else {
+               SLJIT_COMPILE_ASSERT(reg_map[TMP_REG3] == 9, tmp3_is_9_second);
+               *code_ptr++ = REX_W | REX_B;
+               *code_ptr++ = MOV_r_i32 + 1;
+               *(sljit_sw*)code_ptr = addr;
+               code_ptr += sizeof(sljit_sw);
+               *code_ptr++ = REX_B;
+               *code_ptr++ = GROUP_FF;
+               *code_ptr++ = (type == 2) ? (MOD_REG | CALL_rm | 1) : (MOD_REG | JMP_rm | 1);
+       }
+
+       return code_ptr;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler,
+       sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds,
+       sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
+{
+       sljit_si i, tmp, size, saved_register_size;
+       sljit_ub *inst;
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
+       set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
+
+       compiler->flags_saved = 0;
+
+       /* Including the return address saved by the call instruction. */
+       saved_register_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
+
+       tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
+       for (i = SLJIT_S0; i >= tmp; i--) {
+               size = reg_map[i] >= 8 ? 2 : 1;
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
+               FAIL_IF(!inst);
+               INC_SIZE(size);
+               if (reg_map[i] >= 8)
+                       *inst++ = REX_B;
+               PUSH_REG(reg_lmap[i]);
+       }
+
+       for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
+               size = reg_map[i] >= 8 ? 2 : 1;
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
+               FAIL_IF(!inst);
+               INC_SIZE(size);
+               if (reg_map[i] >= 8)
+                       *inst++ = REX_B;
+               PUSH_REG(reg_lmap[i]);
+       }
+
+       if (args > 0) {
+               size = args * 3;
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
+               FAIL_IF(!inst);
+
+               INC_SIZE(size);
+
+#ifndef _WIN64
+               if (args > 0) {
+                       *inst++ = REX_W;
+                       *inst++ = MOV_r_rm;
+                       *inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x7 /* rdi */;
+               }
+               if (args > 1) {
+                       *inst++ = REX_W | REX_R;
+                       *inst++ = MOV_r_rm;
+                       *inst++ = MOD_REG | (reg_lmap[SLJIT_S1] << 3) | 0x6 /* rsi */;
+               }
+               if (args > 2) {
+                       *inst++ = REX_W | REX_R;
+                       *inst++ = MOV_r_rm;
+                       *inst++ = MOD_REG | (reg_lmap[SLJIT_S2] << 3) | 0x2 /* rdx */;
+               }
+#else
+               if (args > 0) {
+                       *inst++ = REX_W;
+                       *inst++ = MOV_r_rm;
+                       *inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x1 /* rcx */;
+               }
+               if (args > 1) {
+                       *inst++ = REX_W;
+                       *inst++ = MOV_r_rm;
+                       *inst++ = MOD_REG | (reg_map[SLJIT_S1] << 3) | 0x2 /* rdx */;
+               }
+               if (args > 2) {
+                       *inst++ = REX_W | REX_B;
+                       *inst++ = MOV_r_rm;
+                       *inst++ = MOD_REG | (reg_map[SLJIT_S2] << 3) | 0x0 /* r8 */;
+               }
+#endif
+       }
+
+       local_size = ((local_size + SLJIT_LOCALS_OFFSET + saved_register_size + 15) & ~15) - saved_register_size;
+       compiler->local_size = local_size;
+
+#ifdef _WIN64
+       if (local_size > 1024) {
+               /* Allocate stack for the callback, which grows the stack. */
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + (3 + sizeof(sljit_si)));
+               FAIL_IF(!inst);
+               INC_SIZE(4 + (3 + sizeof(sljit_si)));
+               *inst++ = REX_W;
+               *inst++ = GROUP_BINARY_83;
+               *inst++ = MOD_REG | SUB | 4;
+               /* Allocated size for registers must be divisible by 8. */
+               SLJIT_ASSERT(!(saved_register_size & 0x7));
+               /* Aligned to 16 byte. */
+               if (saved_register_size & 0x8) {
+                       *inst++ = 5 * sizeof(sljit_sw);
+                       local_size -= 5 * sizeof(sljit_sw);
+               } else {
+                       *inst++ = 4 * sizeof(sljit_sw);
+                       local_size -= 4 * sizeof(sljit_sw);
+               }
+               /* Second instruction */
+               SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R0] < 8, temporary_reg1_is_loreg);
+               *inst++ = REX_W;
+               *inst++ = MOV_rm_i32;
+               *inst++ = MOD_REG | reg_lmap[SLJIT_R0];
+               *(sljit_si*)inst = local_size;
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+                       || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+               compiler->skip_checks = 1;
+#endif
+               FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack)));
+       }
+#endif
+
+       SLJIT_ASSERT(local_size > 0);
+       if (local_size <= 127) {
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
+               FAIL_IF(!inst);
+               INC_SIZE(4);
+               *inst++ = REX_W;
+               *inst++ = GROUP_BINARY_83;
+               *inst++ = MOD_REG | SUB | 4;
+               *inst++ = local_size;
+       }
+       else {
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + 7);
+               FAIL_IF(!inst);
+               INC_SIZE(7);
+               *inst++ = REX_W;
+               *inst++ = GROUP_BINARY_81;
+               *inst++ = MOD_REG | SUB | 4;
+               *(sljit_si*)inst = local_size;
+               inst += sizeof(sljit_si);
+       }
+
+#ifdef _WIN64
+       /* Save xmm6 register: movaps [rsp + 0x20], xmm6 */
+       if (fscratches >= 6 || fsaveds >= 1) {
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
+               FAIL_IF(!inst);
+               INC_SIZE(5);
+               *inst++ = GROUP_0F;
+               *(sljit_si*)inst = 0x20247429;
+       }
+#endif
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compiler,
+       sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds,
+       sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
+{
+       sljit_si saved_register_size;
+
+       CHECK_ERROR();
+       CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
+       set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
+
+       /* Including the return address saved by the call instruction. */
+       saved_register_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
+       compiler->local_size = ((local_size + SLJIT_LOCALS_OFFSET + saved_register_size + 15) & ~15) - saved_register_size;
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
+{
+       sljit_si i, tmp, size;
+       sljit_ub *inst;
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_return(compiler, op, src, srcw));
+
+       compiler->flags_saved = 0;
+       FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
+
+#ifdef _WIN64
+       /* Restore xmm6 register: movaps xmm6, [rsp + 0x20] */
+       if (compiler->fscratches >= 6 || compiler->fsaveds >= 1) {
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
+               FAIL_IF(!inst);
+               INC_SIZE(5);
+               *inst++ = GROUP_0F;
+               *(sljit_si*)inst = 0x20247428;
+       }
+#endif
+
+       SLJIT_ASSERT(compiler->local_size > 0);
+       if (compiler->local_size <= 127) {
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
+               FAIL_IF(!inst);
+               INC_SIZE(4);
+               *inst++ = REX_W;
+               *inst++ = GROUP_BINARY_83;
+               *inst++ = MOD_REG | ADD | 4;
+               *inst = compiler->local_size;
+       }
+       else {
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + 7);
+               FAIL_IF(!inst);
+               INC_SIZE(7);
+               *inst++ = REX_W;
+               *inst++ = GROUP_BINARY_81;
+               *inst++ = MOD_REG | ADD | 4;
+               *(sljit_si*)inst = compiler->local_size;
+       }
+
+       tmp = compiler->scratches;
+       for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) {
+               size = reg_map[i] >= 8 ? 2 : 1;
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
+               FAIL_IF(!inst);
+               INC_SIZE(size);
+               if (reg_map[i] >= 8)
+                       *inst++ = REX_B;
+               POP_REG(reg_lmap[i]);
+       }
+
+       tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
+       for (i = tmp; i <= SLJIT_S0; i++) {
+               size = reg_map[i] >= 8 ? 2 : 1;
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
+               FAIL_IF(!inst);
+               INC_SIZE(size);
+               if (reg_map[i] >= 8)
+                       *inst++ = REX_B;
+               POP_REG(reg_lmap[i]);
+       }
+
+       inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
+       FAIL_IF(!inst);
+       INC_SIZE(1);
+       RET();
+       return SLJIT_SUCCESS;
+}
+
+/* --------------------------------------------------------------------- */
+/*  Operators                                                            */
+/* --------------------------------------------------------------------- */
+
+static sljit_si emit_do_imm32(struct sljit_compiler *compiler, sljit_ub rex, sljit_ub opcode, sljit_sw imm)
+{
+       sljit_ub *inst;
+       sljit_si length = 1 + (rex ? 1 : 0) + sizeof(sljit_si);
+
+       inst = (sljit_ub*)ensure_buf(compiler, 1 + length);
+       FAIL_IF(!inst);
+       INC_SIZE(length);
+       if (rex)
+               *inst++ = rex;
+       *inst++ = opcode;
+       *(sljit_si*)inst = imm;
+       return SLJIT_SUCCESS;
+}
+
+static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si size,
+       /* The register or immediate operand. */
+       sljit_si a, sljit_sw imma,
+       /* The general operand (not immediate). */
+       sljit_si b, sljit_sw immb)
+{
+       sljit_ub *inst;
+       sljit_ub *buf_ptr;
+       sljit_ub rex = 0;
+       sljit_si flags = size & ~0xf;
+       sljit_si inst_size;
+
+       /* The immediate operand must be 32 bit. */
+       SLJIT_ASSERT(!(a & SLJIT_IMM) || compiler->mode32 || IS_HALFWORD(imma));
+       /* Both cannot be switched on. */
+       SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS));
+       /* Size flags not allowed for typed instructions. */
+       SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0);
+       /* Both size flags cannot be switched on. */
+       SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
+       /* SSE2 and immediate is not possible. */
+       SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2));
+       SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3)
+               && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66)
+               && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66));
+
+       size &= 0xf;
+       inst_size = size;
+
+       if (!compiler->mode32 && !(flags & EX86_NO_REXW))
+               rex |= REX_W;
+       else if (flags & EX86_REX)
+               rex |= REX;
+
+       if (flags & (EX86_PREF_F2 | EX86_PREF_F3))
+               inst_size++;
+       if (flags & EX86_PREF_66)
+               inst_size++;
+
+       /* Calculate size of b. */
+       inst_size += 1; /* mod r/m byte. */
+       if (b & SLJIT_MEM) {
+               if (!(b & OFFS_REG_MASK)) {
+                       if (NOT_HALFWORD(immb)) {
+                               if (emit_load_imm64(compiler, TMP_REG3, immb))
+                                       return NULL;
+                               immb = 0;
+                               if (b & REG_MASK)
+                                       b |= TO_OFFS_REG(TMP_REG3);
+                               else
+                                       b |= TMP_REG3;
+                       }
+                       else if (reg_lmap[b & REG_MASK] == 4)
+                               b |= TO_OFFS_REG(SLJIT_SP);
+               }
+
+               if ((b & REG_MASK) == SLJIT_UNUSED)
+                       inst_size += 1 + sizeof(sljit_si); /* SIB byte required to avoid RIP based addressing. */
+               else {
+                       if (reg_map[b & REG_MASK] >= 8)
+                               rex |= REX_B;
+
+                       if (immb != 0 && (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP))) {
+                               /* Immediate operand. */
+                               if (immb <= 127 && immb >= -128)
+                                       inst_size += sizeof(sljit_sb);
+                               else
+                                       inst_size += sizeof(sljit_si);
+                       }
+                       else if (reg_lmap[b & REG_MASK] == 5)
+                               inst_size += sizeof(sljit_sb);
+
+                       if ((b & OFFS_REG_MASK) != SLJIT_UNUSED) {
+                               inst_size += 1; /* SIB byte. */
+                               if (reg_map[OFFS_REG(b)] >= 8)
+                                       rex |= REX_X;
+                       }
+               }
+       }
+       else if (!(flags & EX86_SSE2_OP2) && reg_map[b] >= 8)
+               rex |= REX_B;
+
+       if (a & SLJIT_IMM) {
+               if (flags & EX86_BIN_INS) {
+                       if (imma <= 127 && imma >= -128) {
+                               inst_size += 1;
+                               flags |= EX86_BYTE_ARG;
+                       } else
+                               inst_size += 4;
+               }
+               else if (flags & EX86_SHIFT_INS) {
+                       imma &= compiler->mode32 ? 0x1f : 0x3f;
+                       if (imma != 1) {
+                               inst_size ++;
+                               flags |= EX86_BYTE_ARG;
+                       }
+               } else if (flags & EX86_BYTE_ARG)
+                       inst_size++;
+               else if (flags & EX86_HALF_ARG)
+                       inst_size += sizeof(short);
+               else
+                       inst_size += sizeof(sljit_si);
+       }
+       else {
+               SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
+               /* reg_map[SLJIT_PREF_SHIFT_REG] is less than 8. */
+               if (!(flags & EX86_SSE2_OP1) && reg_map[a] >= 8)
+                       rex |= REX_R;
+       }
+
+       if (rex)
+               inst_size++;
+
+       inst = (sljit_ub*)ensure_buf(compiler, 1 + inst_size);
+       PTR_FAIL_IF(!inst);
+
+       /* Encoding the byte. */
+       INC_SIZE(inst_size);
+       if (flags & EX86_PREF_F2)
+               *inst++ = 0xf2;
+       if (flags & EX86_PREF_F3)
+               *inst++ = 0xf3;
+       if (flags & EX86_PREF_66)
+               *inst++ = 0x66;
+       if (rex)
+               *inst++ = rex;
+       buf_ptr = inst + size;
+
+       /* Encode mod/rm byte. */
+       if (!(flags & EX86_SHIFT_INS)) {
+               if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM))
+                       *inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81;
+
+               if ((a & SLJIT_IMM) || (a == 0))
+                       *buf_ptr = 0;
+               else if (!(flags & EX86_SSE2_OP1))
+                       *buf_ptr = reg_lmap[a] << 3;
+               else
+                       *buf_ptr = a << 3;
+       }
+       else {
+               if (a & SLJIT_IMM) {
+                       if (imma == 1)
+                               *inst = GROUP_SHIFT_1;
+                       else
+                               *inst = GROUP_SHIFT_N;
+               } else
+                       *inst = GROUP_SHIFT_CL;
+               *buf_ptr = 0;
+       }
+
+       if (!(b & SLJIT_MEM))
+               *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2_OP2)) ? reg_lmap[b] : b);
+       else if ((b & REG_MASK) != SLJIT_UNUSED) {
+               if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) {
+                       if (immb != 0 || reg_lmap[b & REG_MASK] == 5) {
+                               if (immb <= 127 && immb >= -128)
+                                       *buf_ptr |= 0x40;
+                               else
+                                       *buf_ptr |= 0x80;
+                       }
+
+                       if ((b & OFFS_REG_MASK) == SLJIT_UNUSED)
+                               *buf_ptr++ |= reg_lmap[b & REG_MASK];
+                       else {
+                               *buf_ptr++ |= 0x04;
+                               *buf_ptr++ = reg_lmap[b & REG_MASK] | (reg_lmap[OFFS_REG(b)] << 3);
+                       }
+
+                       if (immb != 0 || reg_lmap[b & REG_MASK] == 5) {
+                               if (immb <= 127 && immb >= -128)
+                                       *buf_ptr++ = immb; /* 8 bit displacement. */
+                               else {
+                                       *(sljit_si*)buf_ptr = immb; /* 32 bit displacement. */
+                                       buf_ptr += sizeof(sljit_si);
+                               }
+                       }
+               }
+               else {
+                       if (reg_lmap[b & REG_MASK] == 5)
+                               *buf_ptr |= 0x40;
+                       *buf_ptr++ |= 0x04;
+                       *buf_ptr++ = reg_lmap[b & REG_MASK] | (reg_lmap[OFFS_REG(b)] << 3) | (immb << 6);
+                       if (reg_lmap[b & REG_MASK] == 5)
+                               *buf_ptr++ = 0;
+               }
+       }
+       else {
+               *buf_ptr++ |= 0x04;
+               *buf_ptr++ = 0x25;
+               *(sljit_si*)buf_ptr = immb; /* 32 bit displacement. */
+               buf_ptr += sizeof(sljit_si);
+       }
+
+       if (a & SLJIT_IMM) {
+               if (flags & EX86_BYTE_ARG)
+                       *buf_ptr = imma;
+               else if (flags & EX86_HALF_ARG)
+                       *(short*)buf_ptr = imma;
+               else if (!(flags & EX86_SHIFT_INS))
+                       *(sljit_si*)buf_ptr = imma;
+       }
+
+       return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1);
+}
+
+/* --------------------------------------------------------------------- */
+/*  Call / return instructions                                           */
+/* --------------------------------------------------------------------- */
+
+static SLJIT_INLINE sljit_si call_with_args(struct sljit_compiler *compiler, sljit_si type)
+{
+       sljit_ub *inst;
+
+#ifndef _WIN64
+       SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R1] == 6 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8, args_registers);
+
+       inst = (sljit_ub*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6));
+       FAIL_IF(!inst);
+       INC_SIZE((type < SLJIT_CALL3) ? 3 : 6);
+       if (type >= SLJIT_CALL3) {
+               *inst++ = REX_W;
+               *inst++ = MOV_r_rm;
+               *inst++ = MOD_REG | (0x2 /* rdx */ << 3) | reg_lmap[SLJIT_R2];
+       }
+       *inst++ = REX_W;
+       *inst++ = MOV_r_rm;
+       *inst++ = MOD_REG | (0x7 /* rdi */ << 3) | reg_lmap[SLJIT_R0];
+#else
+       SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R1] == 2 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8, args_registers);
+
+       inst = (sljit_ub*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6));
+       FAIL_IF(!inst);
+       INC_SIZE((type < SLJIT_CALL3) ? 3 : 6);
+       if (type >= SLJIT_CALL3) {
+               *inst++ = REX_W | REX_R;
+               *inst++ = MOV_r_rm;
+               *inst++ = MOD_REG | (0x0 /* r8 */ << 3) | reg_lmap[SLJIT_R2];
+       }
+       *inst++ = REX_W;
+       *inst++ = MOV_r_rm;
+       *inst++ = MOD_REG | (0x1 /* rcx */ << 3) | reg_lmap[SLJIT_R0];
+#endif
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw)
+{
+       sljit_ub *inst;
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+
+       /* For UNUSED dst. Uncommon, but possible. */
+       if (dst == SLJIT_UNUSED)
+               dst = TMP_REG1;
+
+       if (FAST_IS_REG(dst)) {
+               if (reg_map[dst] < 8) {
+                       inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
+                       FAIL_IF(!inst);
+                       INC_SIZE(1);
+                       POP_REG(reg_lmap[dst]);
+                       return SLJIT_SUCCESS;
+               }
+
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + 2);
+               FAIL_IF(!inst);
+               INC_SIZE(2);
+               *inst++ = REX_B;
+               POP_REG(reg_lmap[dst]);
+               return SLJIT_SUCCESS;
+       }
+
+       /* REX_W is not necessary (src is not immediate). */
+       compiler->mode32 = 1;
+       inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
+       FAIL_IF(!inst);
+       *inst++ = POP_rm;
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw)
+{
+       sljit_ub *inst;
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       if ((src & SLJIT_IMM) && NOT_HALFWORD(srcw)) {
+               FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw));
+               src = TMP_REG1;
+       }
+
+       if (FAST_IS_REG(src)) {
+               if (reg_map[src] < 8) {
+                       inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 1);
+                       FAIL_IF(!inst);
+
+                       INC_SIZE(1 + 1);
+                       PUSH_REG(reg_lmap[src]);
+               }
+               else {
+                       inst = (sljit_ub*)ensure_buf(compiler, 1 + 2 + 1);
+                       FAIL_IF(!inst);
+
+                       INC_SIZE(2 + 1);
+                       *inst++ = REX_B;
+                       PUSH_REG(reg_lmap[src]);
+               }
+       }
+       else if (src & SLJIT_MEM) {
+               /* REX_W is not necessary (src is not immediate). */
+               compiler->mode32 = 1;
+               inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
+               FAIL_IF(!inst);
+               *inst++ = GROUP_FF;
+               *inst |= PUSH_rm;
+
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
+               FAIL_IF(!inst);
+               INC_SIZE(1);
+       }
+       else {
+               SLJIT_ASSERT(IS_HALFWORD(srcw));
+               /* SLJIT_IMM. */
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + 5 + 1);
+               FAIL_IF(!inst);
+
+               INC_SIZE(5 + 1);
+               *inst++ = PUSH_i32;
+               *(sljit_si*)inst = srcw;
+               inst += sizeof(sljit_si);
+       }
+
+       RET();
+       return SLJIT_SUCCESS;
+}
+
+
+/* --------------------------------------------------------------------- */
+/*  Extend input                                                         */
+/* --------------------------------------------------------------------- */
+
+static sljit_si emit_mov_int(struct sljit_compiler *compiler, sljit_si sign,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       sljit_ub* inst;
+       sljit_si dst_r;
+
+       compiler->mode32 = 0;
+
+       if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
+               return SLJIT_SUCCESS; /* Empty instruction. */
+
+       if (src & SLJIT_IMM) {
+               if (FAST_IS_REG(dst)) {
+                       if (sign || ((sljit_uw)srcw <= 0x7fffffff)) {
+                               inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_si)srcw, dst, dstw);
+                               FAIL_IF(!inst);
+                               *inst = MOV_rm_i32;
+                               return SLJIT_SUCCESS;
+                       }
+                       return emit_load_imm64(compiler, dst, srcw);
+               }
+               compiler->mode32 = 1;
+               inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_si)srcw, dst, dstw);
+               FAIL_IF(!inst);
+               *inst = MOV_rm_i32;
+               compiler->mode32 = 0;
+               return SLJIT_SUCCESS;
+       }
+
+       dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
+
+       if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
+               dst_r = src;
+       else {
+               if (sign) {
+                       inst = emit_x86_instruction(compiler, 1, dst_r, 0, src, srcw);
+                       FAIL_IF(!inst);
+                       *inst++ = MOVSXD_r_rm;
+               } else {
+                       compiler->mode32 = 1;
+                       FAIL_IF(emit_mov(compiler, dst_r, 0, src, srcw));
+                       compiler->mode32 = 0;
+               }
+       }
+
+       if (dst & SLJIT_MEM) {
+               compiler->mode32 = 1;
+               inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
+               FAIL_IF(!inst);
+               *inst = MOV_rm_r;
+               compiler->mode32 = 0;
+       }
+
+       return SLJIT_SUCCESS;
+}
diff --git a/ext/pcre/pcrelib/sljit/sljitNativeX86_common.c b/ext/pcre/pcrelib/sljit/sljitNativeX86_common.c
new file mode 100644 (file)
index 0000000..22a163f
--- /dev/null
@@ -0,0 +1,2925 @@
+/*
+ *    Stack-less Just-In-Time compiler
+ *
+ *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice, this list of
+ *      conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *      of conditions and the following disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
+{
+       return "x86" SLJIT_CPUINFO;
+}
+
+/*
+   32b register indexes:
+     0 - EAX
+     1 - ECX
+     2 - EDX
+     3 - EBX
+     4 - none
+     5 - EBP
+     6 - ESI
+     7 - EDI
+*/
+
+/*
+   64b register indexes:
+     0 - RAX
+     1 - RCX
+     2 - RDX
+     3 - RBX
+     4 - none
+     5 - RBP
+     6 - RSI
+     7 - RDI
+     8 - R8   - From now on REX prefix is required
+     9 - R9
+    10 - R10
+    11 - R11
+    12 - R12
+    13 - R13
+    14 - R14
+    15 - R15
+*/
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+
+/* Last register + 1. */
+#define TMP_REG1       (SLJIT_NUMBER_OF_REGISTERS + 2)
+
+static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {
+       0, 0, 2, 1, 0, 0, 0, 0, 7, 6, 3, 4, 5
+};
+
+#define CHECK_EXTRA_REGS(p, w, do) \
+       if (p >= SLJIT_R3 && p <= SLJIT_R6) { \
+               w = SLJIT_LOCALS_OFFSET + ((p) - (SLJIT_R3 + 4)) * sizeof(sljit_sw); \
+               p = SLJIT_MEM1(SLJIT_SP); \
+               do; \
+       }
+
+#else /* SLJIT_CONFIG_X86_32 */
+
+/* Last register + 1. */
+#define TMP_REG1       (SLJIT_NUMBER_OF_REGISTERS + 2)
+#define TMP_REG2       (SLJIT_NUMBER_OF_REGISTERS + 3)
+#define TMP_REG3       (SLJIT_NUMBER_OF_REGISTERS + 4)
+
+/* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
+   Note: avoid to use r12 and r13 for memory addessing
+   therefore r12 is better for SAVED_EREG than SAVED_REG. */
+#ifndef _WIN64
+/* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */
+static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
+       0, 0, 6, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 7, 9
+};
+/* low-map. reg_map & 0x7. */
+static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = {
+       0, 0, 6, 1, 0, 3,  2,  4,  5,  5,  6,  7, 3, 4, 2, 7, 1
+};
+#else
+/* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */
+static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
+       0, 0, 2, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 10, 8, 9
+};
+/* low-map. reg_map & 0x7. */
+static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = {
+       0, 0, 2, 1, 3,  4,  5,  5, 6,  7,  7, 6, 3, 4, 2,  0, 1
+};
+#endif
+
+#define REX_W          0x48
+#define REX_R          0x44
+#define REX_X          0x42
+#define REX_B          0x41
+#define REX            0x40
+
+#ifndef _WIN64
+#define HALFWORD_MAX 0x7fffffffl
+#define HALFWORD_MIN -0x80000000l
+#else
+#define HALFWORD_MAX 0x7fffffffll
+#define HALFWORD_MIN -0x80000000ll
+#endif
+
+#define IS_HALFWORD(x)         ((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN)
+#define NOT_HALFWORD(x)                ((x) > HALFWORD_MAX || (x) < HALFWORD_MIN)
+
+#define CHECK_EXTRA_REGS(p, w, do)
+
+#endif /* SLJIT_CONFIG_X86_32 */
+
+#define TMP_FREG       (0)
+
+/* Size flags for emit_x86_instruction: */
+#define EX86_BIN_INS           0x0010
+#define EX86_SHIFT_INS         0x0020
+#define EX86_REX               0x0040
+#define EX86_NO_REXW           0x0080
+#define EX86_BYTE_ARG          0x0100
+#define EX86_HALF_ARG          0x0200
+#define EX86_PREF_66           0x0400
+#define EX86_PREF_F2           0x0800
+#define EX86_PREF_F3           0x1000
+#define EX86_SSE2_OP1          0x2000
+#define EX86_SSE2_OP2          0x4000
+#define EX86_SSE2              (EX86_SSE2_OP1 | EX86_SSE2_OP2)
+
+/* --------------------------------------------------------------------- */
+/*  Instrucion forms                                                     */
+/* --------------------------------------------------------------------- */
+
+#define ADD            (/* BINARY */ 0 << 3)
+#define ADD_EAX_i32    0x05
+#define ADD_r_rm       0x03
+#define ADD_rm_r       0x01
+#define ADDSD_x_xm     0x58
+#define ADC            (/* BINARY */ 2 << 3)
+#define ADC_EAX_i32    0x15
+#define ADC_r_rm       0x13
+#define ADC_rm_r       0x11
+#define AND            (/* BINARY */ 4 << 3)
+#define AND_EAX_i32    0x25
+#define AND_r_rm       0x23
+#define AND_rm_r       0x21
+#define ANDPD_x_xm     0x54
+#define BSR_r_rm       (/* GROUP_0F */ 0xbd)
+#define CALL_i32       0xe8
+#define CALL_rm                (/* GROUP_FF */ 2 << 3)
+#define CDQ            0x99
+#define CMOVNE_r_rm    (/* GROUP_0F */ 0x45)
+#define CMP            (/* BINARY */ 7 << 3)
+#define CMP_EAX_i32    0x3d
+#define CMP_r_rm       0x3b
+#define CMP_rm_r       0x39
+#define CVTPD2PS_x_xm  0x5a
+#define CVTSI2SD_x_rm  0x2a
+#define CVTTSD2SI_r_xm 0x2c
+#define DIV            (/* GROUP_F7 */ 6 << 3)
+#define DIVSD_x_xm     0x5e
+#define INT3           0xcc
+#define IDIV           (/* GROUP_F7 */ 7 << 3)
+#define IMUL           (/* GROUP_F7 */ 5 << 3)
+#define IMUL_r_rm      (/* GROUP_0F */ 0xaf)
+#define IMUL_r_rm_i8   0x6b
+#define IMUL_r_rm_i32  0x69
+#define JE_i8          0x74
+#define JNE_i8         0x75
+#define JMP_i8         0xeb
+#define JMP_i32                0xe9
+#define JMP_rm         (/* GROUP_FF */ 4 << 3)
+#define LEA_r_m                0x8d
+#define MOV_r_rm       0x8b
+#define MOV_r_i32      0xb8
+#define MOV_rm_r       0x89
+#define MOV_rm_i32     0xc7
+#define MOV_rm8_i8     0xc6
+#define MOV_rm8_r8     0x88
+#define MOVSD_x_xm     0x10
+#define MOVSD_xm_x     0x11
+#define MOVSXD_r_rm    0x63
+#define MOVSX_r_rm8    (/* GROUP_0F */ 0xbe)
+#define MOVSX_r_rm16   (/* GROUP_0F */ 0xbf)
+#define MOVZX_r_rm8    (/* GROUP_0F */ 0xb6)
+#define MOVZX_r_rm16   (/* GROUP_0F */ 0xb7)
+#define MUL            (/* GROUP_F7 */ 4 << 3)
+#define MULSD_x_xm     0x59
+#define NEG_rm         (/* GROUP_F7 */ 3 << 3)
+#define NOP            0x90
+#define NOT_rm         (/* GROUP_F7 */ 2 << 3)
+#define OR             (/* BINARY */ 1 << 3)
+#define OR_r_rm                0x0b
+#define OR_EAX_i32     0x0d
+#define OR_rm_r                0x09
+#define OR_rm8_r8      0x08
+#define POP_r          0x58
+#define POP_rm         0x8f
+#define POPF           0x9d
+#define PUSH_i32       0x68
+#define PUSH_r         0x50
+#define PUSH_rm                (/* GROUP_FF */ 6 << 3)
+#define PUSHF          0x9c
+#define RET_near       0xc3
+#define RET_i16                0xc2
+#define SBB            (/* BINARY */ 3 << 3)
+#define SBB_EAX_i32    0x1d
+#define SBB_r_rm       0x1b
+#define SBB_rm_r       0x19
+#define SAR            (/* SHIFT */ 7 << 3)
+#define SHL            (/* SHIFT */ 4 << 3)
+#define SHR            (/* SHIFT */ 5 << 3)
+#define SUB            (/* BINARY */ 5 << 3)
+#define SUB_EAX_i32    0x2d
+#define SUB_r_rm       0x2b
+#define SUB_rm_r       0x29
+#define SUBSD_x_xm     0x5c
+#define TEST_EAX_i32   0xa9
+#define TEST_rm_r      0x85
+#define UCOMISD_x_xm   0x2e
+#define UNPCKLPD_x_xm  0x14
+#define XCHG_EAX_r     0x90
+#define XCHG_r_rm      0x87
+#define XOR            (/* BINARY */ 6 << 3)
+#define XOR_EAX_i32    0x35
+#define XOR_r_rm       0x33
+#define XOR_rm_r       0x31
+#define XORPD_x_xm     0x57
+
+#define GROUP_0F       0x0f
+#define GROUP_F7       0xf7
+#define GROUP_FF       0xff
+#define GROUP_BINARY_81        0x81
+#define GROUP_BINARY_83        0x83
+#define GROUP_SHIFT_1  0xd1
+#define GROUP_SHIFT_N  0xc1
+#define GROUP_SHIFT_CL 0xd3
+
+#define MOD_REG                0xc0
+#define MOD_DISP8      0x40
+
+#define INC_SIZE(s)                    (*inst++ = (s), compiler->size += (s))
+
+#define PUSH_REG(r)                    (*inst++ = (PUSH_r + (r)))
+#define POP_REG(r)                     (*inst++ = (POP_r + (r)))
+#define RET()                          (*inst++ = (RET_near))
+#define RET_I16(n)                     (*inst++ = (RET_i16), *inst++ = n, *inst++ = 0)
+/* r32, r/m32 */
+#define MOV_RM(mod, reg, rm)           (*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm))
+
+/* Multithreading does not affect these static variables, since they store
+   built-in CPU features. Therefore they can be overwritten by different threads
+   if they detect the CPU features in the same time. */
+#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
+static sljit_si cpu_has_sse2 = -1;
+#endif
+static sljit_si cpu_has_cmov = -1;
+
+#if defined(_MSC_VER) && _MSC_VER >= 1400
+#include <intrin.h>
+#endif
+
+static void get_cpu_features(void)
+{
+       sljit_ui features;
+
+#if defined(_MSC_VER) && _MSC_VER >= 1400
+
+       int CPUInfo[4];
+       __cpuid(CPUInfo, 1);
+       features = (sljit_ui)CPUInfo[3];
+
+#elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C)
+
+       /* AT&T syntax. */
+       __asm__ (
+               "movl $0x1, %%eax\n"
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+               /* On x86-32, there is no red zone, so this
+                  should work (no need for a local variable). */
+               "push %%ebx\n"
+#endif
+               "cpuid\n"
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+               "pop %%ebx\n"
+#endif
+               "movl %%edx, %0\n"
+               : "=g" (features)
+               :
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+               : "%eax", "%ecx", "%edx"
+#else
+               : "%rax", "%rbx", "%rcx", "%rdx"
+#endif
+       );
+
+#else /* _MSC_VER && _MSC_VER >= 1400 */
+
+       /* Intel syntax. */
+       __asm {
+               mov eax, 1
+               cpuid
+               mov features, edx
+       }
+
+#endif /* _MSC_VER && _MSC_VER >= 1400 */
+
+#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
+       cpu_has_sse2 = (features >> 26) & 0x1;
+#endif
+       cpu_has_cmov = (features >> 15) & 0x1;
+}
+
+static sljit_ub get_jump_code(sljit_si type)
+{
+       switch (type) {
+       case SLJIT_EQUAL:
+       case SLJIT_D_EQUAL:
+               return 0x84 /* je */;
+
+       case SLJIT_NOT_EQUAL:
+       case SLJIT_D_NOT_EQUAL:
+               return 0x85 /* jne */;
+
+       case SLJIT_LESS:
+       case SLJIT_D_LESS:
+               return 0x82 /* jc */;
+
+       case SLJIT_GREATER_EQUAL:
+       case SLJIT_D_GREATER_EQUAL:
+               return 0x83 /* jae */;
+
+       case SLJIT_GREATER:
+       case SLJIT_D_GREATER:
+               return 0x87 /* jnbe */;
+
+       case SLJIT_LESS_EQUAL:
+       case SLJIT_D_LESS_EQUAL:
+               return 0x86 /* jbe */;
+
+       case SLJIT_SIG_LESS:
+               return 0x8c /* jl */;
+
+       case SLJIT_SIG_GREATER_EQUAL:
+               return 0x8d /* jnl */;
+
+       case SLJIT_SIG_GREATER:
+               return 0x8f /* jnle */;
+
+       case SLJIT_SIG_LESS_EQUAL:
+               return 0x8e /* jle */;
+
+       case SLJIT_OVERFLOW:
+       case SLJIT_MUL_OVERFLOW:
+               return 0x80 /* jo */;
+
+       case SLJIT_NOT_OVERFLOW:
+       case SLJIT_MUL_NOT_OVERFLOW:
+               return 0x81 /* jno */;
+
+       case SLJIT_D_UNORDERED:
+               return 0x8a /* jp */;
+
+       case SLJIT_D_ORDERED:
+               return 0x8b /* jpo */;
+       }
+       return 0;
+}
+
+static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_si type);
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_sw addr, sljit_si type);
+#endif
+
+static sljit_ub* generate_near_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_ub *code, sljit_si type)
+{
+       sljit_si short_jump;
+       sljit_uw label_addr;
+
+       if (jump->flags & JUMP_LABEL)
+               label_addr = (sljit_uw)(code + jump->u.label->size);
+       else
+               label_addr = jump->u.target;
+       short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127;
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+       if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN)
+               return generate_far_jump_code(jump, code_ptr, type);
+#endif
+
+       if (type == SLJIT_JUMP) {
+               if (short_jump)
+                       *code_ptr++ = JMP_i8;
+               else
+                       *code_ptr++ = JMP_i32;
+               jump->addr++;
+       }
+       else if (type >= SLJIT_FAST_CALL) {
+               short_jump = 0;
+               *code_ptr++ = CALL_i32;
+               jump->addr++;
+       }
+       else if (short_jump) {
+               *code_ptr++ = get_jump_code(type) - 0x10;
+               jump->addr++;
+       }
+       else {
+               *code_ptr++ = GROUP_0F;
+               *code_ptr++ = get_jump_code(type);
+               jump->addr += 2;
+       }
+
+       if (short_jump) {
+               jump->flags |= PATCH_MB;
+               code_ptr += sizeof(sljit_sb);
+       } else {
+               jump->flags |= PATCH_MW;
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+               code_ptr += sizeof(sljit_sw);
+#else
+               code_ptr += sizeof(sljit_si);
+#endif
+       }
+
+       return code_ptr;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
+{
+       struct sljit_memory_fragment *buf;
+       sljit_ub *code;
+       sljit_ub *code_ptr;
+       sljit_ub *buf_ptr;
+       sljit_ub *buf_end;
+       sljit_ub len;
+
+       struct sljit_label *label;
+       struct sljit_jump *jump;
+       struct sljit_const *const_;
+
+       CHECK_ERROR_PTR();
+       CHECK_PTR(check_sljit_generate_code(compiler));
+       reverse_buf(compiler);
+
+       /* Second code generation pass. */
+       code = (sljit_ub*)SLJIT_MALLOC_EXEC(compiler->size);
+       PTR_FAIL_WITH_EXEC_IF(code);
+       buf = compiler->buf;
+
+       code_ptr = code;
+       label = compiler->labels;
+       jump = compiler->jumps;
+       const_ = compiler->consts;
+       do {
+               buf_ptr = buf->memory;
+               buf_end = buf_ptr + buf->used_size;
+               do {
+                       len = *buf_ptr++;
+                       if (len > 0) {
+                               /* The code is already generated. */
+                               SLJIT_MEMMOVE(code_ptr, buf_ptr, len);
+                               code_ptr += len;
+                               buf_ptr += len;
+                       }
+                       else {
+                               if (*buf_ptr >= 4) {
+                                       jump->addr = (sljit_uw)code_ptr;
+                                       if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
+                                               code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 4);
+                                       else
+                                               code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 4);
+                                       jump = jump->next;
+                               }
+                               else if (*buf_ptr == 0) {
+                                       label->addr = (sljit_uw)code_ptr;
+                                       label->size = code_ptr - code;
+                                       label = label->next;
+                               }
+                               else if (*buf_ptr == 1) {
+                                       const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw);
+                                       const_ = const_->next;
+                               }
+                               else {
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+                                       *code_ptr++ = (*buf_ptr == 2) ? CALL_i32 : JMP_i32;
+                                       buf_ptr++;
+                                       *(sljit_sw*)code_ptr = *(sljit_sw*)buf_ptr - ((sljit_sw)code_ptr + sizeof(sljit_sw));
+                                       code_ptr += sizeof(sljit_sw);
+                                       buf_ptr += sizeof(sljit_sw) - 1;
+#else
+                                       code_ptr = generate_fixed_jump(code_ptr, *(sljit_sw*)(buf_ptr + 1), *buf_ptr);
+                                       buf_ptr += sizeof(sljit_sw);
+#endif
+                               }
+                               buf_ptr++;
+                       }
+               } while (buf_ptr < buf_end);
+               SLJIT_ASSERT(buf_ptr == buf_end);
+               buf = buf->next;
+       } while (buf);
+
+       SLJIT_ASSERT(!label);
+       SLJIT_ASSERT(!jump);
+       SLJIT_ASSERT(!const_);
+
+       jump = compiler->jumps;
+       while (jump) {
+               if (jump->flags & PATCH_MB) {
+                       SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))) <= 127);
+                       *(sljit_ub*)jump->addr = (sljit_ub)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb)));
+               } else if (jump->flags & PATCH_MW) {
+                       if (jump->flags & JUMP_LABEL) {
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+                               *(sljit_sw*)jump->addr = (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sw)));
+#else
+                               SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))) <= HALFWORD_MAX);
+                               *(sljit_si*)jump->addr = (sljit_si)(jump->u.label->addr - (jump->addr + sizeof(sljit_si)));
+#endif
+                       }
+                       else {
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+                               *(sljit_sw*)jump->addr = (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_sw)));
+#else
+                               SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_si))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_si))) <= HALFWORD_MAX);
+                               *(sljit_si*)jump->addr = (sljit_si)(jump->u.target - (jump->addr + sizeof(sljit_si)));
+#endif
+                       }
+               }
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+               else if (jump->flags & PATCH_MD)
+                       *(sljit_sw*)jump->addr = jump->u.label->addr;
+#endif
+
+               jump = jump->next;
+       }
+
+       /* Maybe we waste some space because of short jumps. */
+       SLJIT_ASSERT(code_ptr <= code + compiler->size);
+       compiler->error = SLJIT_ERR_COMPILED;
+       compiler->executable_size = code_ptr - code;
+       return (void*)code;
+}
+
+/* --------------------------------------------------------------------- */
+/*  Operators                                                            */
+/* --------------------------------------------------------------------- */
+
+static sljit_si emit_cum_binary(struct sljit_compiler *compiler,
+       sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w);
+
+static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler,
+       sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w);
+
+static sljit_si emit_mov(struct sljit_compiler *compiler,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw);
+
+static SLJIT_INLINE sljit_si emit_save_flags(struct sljit_compiler *compiler)
+{
+       sljit_ub *inst;
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+       inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
+       FAIL_IF(!inst);
+       INC_SIZE(5);
+#else
+       inst = (sljit_ub*)ensure_buf(compiler, 1 + 6);
+       FAIL_IF(!inst);
+       INC_SIZE(6);
+       *inst++ = REX_W;
+#endif
+       *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp + sizeof(sljit_sw)] */
+       *inst++ = 0x64;
+       *inst++ = 0x24;
+       *inst++ = (sljit_ub)sizeof(sljit_sw);
+       *inst++ = PUSHF;
+       compiler->flags_saved = 1;
+       return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_si emit_restore_flags(struct sljit_compiler *compiler, sljit_si keep_flags)
+{
+       sljit_ub *inst;
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+       inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
+       FAIL_IF(!inst);
+       INC_SIZE(5);
+       *inst++ = POPF;
+#else
+       inst = (sljit_ub*)ensure_buf(compiler, 1 + 6);
+       FAIL_IF(!inst);
+       INC_SIZE(6);
+       *inst++ = POPF;
+       *inst++ = REX_W;
+#endif
+       *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp - sizeof(sljit_sw)] */
+       *inst++ = 0x64;
+       *inst++ = 0x24;
+       *inst++ = (sljit_ub)-(sljit_sb)sizeof(sljit_sw);
+       compiler->flags_saved = keep_flags;
+       return SLJIT_SUCCESS;
+}
+
+#ifdef _WIN32
+#include <malloc.h>
+
+static void SLJIT_CALL sljit_grow_stack(sljit_sw local_size)
+{
+       /* Workaround for calling the internal _chkstk() function on Windows.
+       This function touches all 4k pages belongs to the requested stack space,
+       which size is passed in local_size. This is necessary on Windows where
+       the stack can only grow in 4k steps. However, this function just burn
+       CPU cycles if the stack is large enough. However, you don't know it in
+       advance, so it must always be called. I think this is a bad design in
+       general even if it has some reasons. */
+       *(volatile sljit_si*)alloca(local_size) = 0;
+}
+
+#endif
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+#include "sljitNativeX86_32.c"
+#else
+#include "sljitNativeX86_64.c"
+#endif
+
+static sljit_si emit_mov(struct sljit_compiler *compiler,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       sljit_ub* inst;
+
+       if (dst == SLJIT_UNUSED) {
+               /* No destination, doesn't need to setup flags. */
+               if (src & SLJIT_MEM) {
+                       inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
+                       FAIL_IF(!inst);
+                       *inst = MOV_r_rm;
+               }
+               return SLJIT_SUCCESS;
+       }
+       if (FAST_IS_REG(src)) {
+               inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
+               FAIL_IF(!inst);
+               *inst = MOV_rm_r;
+               return SLJIT_SUCCESS;
+       }
+       if (src & SLJIT_IMM) {
+               if (FAST_IS_REG(dst)) {
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+                       return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
+#else
+                       if (!compiler->mode32) {
+                               if (NOT_HALFWORD(srcw))
+                                       return emit_load_imm64(compiler, dst, srcw);
+                       }
+                       else
+                               return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, MOV_r_i32 + reg_lmap[dst], srcw);
+#endif
+               }
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+               if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
+                       FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw));
+                       inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw);
+                       FAIL_IF(!inst);
+                       *inst = MOV_rm_r;
+                       return SLJIT_SUCCESS;
+               }
+#endif
+               inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
+               FAIL_IF(!inst);
+               *inst = MOV_rm_i32;
+               return SLJIT_SUCCESS;
+       }
+       if (FAST_IS_REG(dst)) {
+               inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
+               FAIL_IF(!inst);
+               *inst = MOV_r_rm;
+               return SLJIT_SUCCESS;
+       }
+
+       /* Memory to memory move. Requires two instruction. */
+       inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
+       FAIL_IF(!inst);
+       *inst = MOV_r_rm;
+       inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
+       FAIL_IF(!inst);
+       *inst = MOV_rm_r;
+       return SLJIT_SUCCESS;
+}
+
+#define EMIT_MOV(compiler, dst, dstw, src, srcw) \
+       FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op)
+{
+       sljit_ub *inst;
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+       sljit_si size;
+#endif
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_op0(compiler, op));
+
+       switch (GET_OPCODE(op)) {
+       case SLJIT_BREAKPOINT:
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
+               FAIL_IF(!inst);
+               INC_SIZE(1);
+               *inst = INT3;
+               break;
+       case SLJIT_NOP:
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
+               FAIL_IF(!inst);
+               INC_SIZE(1);
+               *inst = NOP;
+               break;
+       case SLJIT_LUMUL:
+       case SLJIT_LSMUL:
+       case SLJIT_LUDIV:
+       case SLJIT_LSDIV:
+               compiler->flags_saved = 0;
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+#ifdef _WIN64
+               SLJIT_COMPILE_ASSERT(
+                       reg_map[SLJIT_R0] == 0
+                       && reg_map[SLJIT_R1] == 2
+                       && reg_map[TMP_REG1] > 7,
+                       invalid_register_assignment_for_div_mul);
+#else
+               SLJIT_COMPILE_ASSERT(
+                       reg_map[SLJIT_R0] == 0
+                       && reg_map[SLJIT_R1] < 7
+                       && reg_map[TMP_REG1] == 2,
+                       invalid_register_assignment_for_div_mul);
+#endif
+               compiler->mode32 = op & SLJIT_INT_OP;
+#endif
+
+               op = GET_OPCODE(op);
+               if (op == SLJIT_LUDIV) {
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
+                       EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
+                       inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0);
+#else
+                       inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
+#endif
+                       FAIL_IF(!inst);
+                       *inst = XOR_r_rm;
+               }
+
+               if (op == SLJIT_LSDIV) {
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
+                       EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
+#endif
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+                       inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
+                       FAIL_IF(!inst);
+                       INC_SIZE(1);
+                       *inst = CDQ;
+#else
+                       if (compiler->mode32) {
+                               inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
+                               FAIL_IF(!inst);
+                               INC_SIZE(1);
+                               *inst = CDQ;
+                       } else {
+                               inst = (sljit_ub*)ensure_buf(compiler, 1 + 2);
+                               FAIL_IF(!inst);
+                               INC_SIZE(2);
+                               *inst++ = REX_W;
+                               *inst = CDQ;
+                       }
+#endif
+               }
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + 2);
+               FAIL_IF(!inst);
+               INC_SIZE(2);
+               *inst++ = GROUP_F7;
+               *inst = MOD_REG | ((op >= SLJIT_LUDIV) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]);
+#else
+#ifdef _WIN64
+               size = (!compiler->mode32 || op >= SLJIT_LUDIV) ? 3 : 2;
+#else
+               size = (!compiler->mode32) ? 3 : 2;
+#endif
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
+               FAIL_IF(!inst);
+               INC_SIZE(size);
+#ifdef _WIN64
+               if (!compiler->mode32)
+                       *inst++ = REX_W | ((op >= SLJIT_LUDIV) ? REX_B : 0);
+               else if (op >= SLJIT_LUDIV)
+                       *inst++ = REX_B;
+               *inst++ = GROUP_F7;
+               *inst = MOD_REG | ((op >= SLJIT_LUDIV) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]);
+#else
+               if (!compiler->mode32)
+                       *inst++ = REX_W;
+               *inst++ = GROUP_F7;
+               *inst = MOD_REG | reg_map[SLJIT_R1];
+#endif
+#endif
+               switch (op) {
+               case SLJIT_LUMUL:
+                       *inst |= MUL;
+                       break;
+               case SLJIT_LSMUL:
+                       *inst |= IMUL;
+                       break;
+               case SLJIT_LUDIV:
+                       *inst |= DIV;
+                       break;
+               case SLJIT_LSDIV:
+                       *inst |= IDIV;
+                       break;
+               }
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
+               EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
+#endif
+               break;
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+#define ENCODE_PREFIX(prefix) \
+       do { \
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); \
+               FAIL_IF(!inst); \
+               INC_SIZE(1); \
+               *inst = (prefix); \
+       } while (0)
+
+static sljit_si emit_mov_byte(struct sljit_compiler *compiler, sljit_si sign,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       sljit_ub* inst;
+       sljit_si dst_r;
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+       sljit_si work_r;
+#endif
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+       compiler->mode32 = 0;
+#endif
+
+       if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
+               return SLJIT_SUCCESS; /* Empty instruction. */
+
+       if (src & SLJIT_IMM) {
+               if (FAST_IS_REG(dst)) {
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+                       return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
+#else
+                       inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
+                       FAIL_IF(!inst);
+                       *inst = MOV_rm_i32;
+                       return SLJIT_SUCCESS;
+#endif
+               }
+               inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
+               FAIL_IF(!inst);
+               *inst = MOV_rm8_i8;
+               return SLJIT_SUCCESS;
+       }
+
+       dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
+
+       if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) {
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+               if (reg_map[src] >= 4) {
+                       SLJIT_ASSERT(dst_r == TMP_REG1);
+                       EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
+               } else
+                       dst_r = src;
+#else
+               dst_r = src;
+#endif
+       }
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+       else if (FAST_IS_REG(src) && reg_map[src] >= 4) {
+               /* src, dst are registers. */
+               SLJIT_ASSERT(SLOW_IS_REG(dst));
+               if (reg_map[dst] < 4) {
+                       if (dst != src)
+                               EMIT_MOV(compiler, dst, 0, src, 0);
+                       inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
+                       FAIL_IF(!inst);
+                       *inst++ = GROUP_0F;
+                       *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
+               }
+               else {
+                       if (dst != src)
+                               EMIT_MOV(compiler, dst, 0, src, 0);
+                       if (sign) {
+                               /* shl reg, 24 */
+                               inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
+                               FAIL_IF(!inst);
+                               *inst |= SHL;
+                               /* sar reg, 24 */
+                               inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
+                               FAIL_IF(!inst);
+                               *inst |= SAR;
+                       }
+                       else {
+                               inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0);
+                               FAIL_IF(!inst);
+                               *(inst + 1) |= AND;
+                       }
+               }
+               return SLJIT_SUCCESS;
+       }
+#endif
+       else {
+               /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
+               inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
+               FAIL_IF(!inst);
+               *inst++ = GROUP_0F;
+               *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
+       }
+
+       if (dst & SLJIT_MEM) {
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+               if (dst_r == TMP_REG1) {
+                       /* Find a non-used register, whose reg_map[src] < 4. */
+                       if ((dst & REG_MASK) == SLJIT_R0) {
+                               if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_R1))
+                                       work_r = SLJIT_R2;
+                               else
+                                       work_r = SLJIT_R1;
+                       }
+                       else {
+                               if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
+                                       work_r = SLJIT_R0;
+                               else if ((dst & REG_MASK) == SLJIT_R1)
+                                       work_r = SLJIT_R2;
+                               else
+                                       work_r = SLJIT_R1;
+                       }
+
+                       if (work_r == SLJIT_R0) {
+                               ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
+                       }
+                       else {
+                               inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
+                               FAIL_IF(!inst);
+                               *inst = XCHG_r_rm;
+                       }
+
+                       inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
+                       FAIL_IF(!inst);
+                       *inst = MOV_rm8_r8;
+
+                       if (work_r == SLJIT_R0) {
+                               ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
+                       }
+                       else {
+                               inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
+                               FAIL_IF(!inst);
+                               *inst = XCHG_r_rm;
+                       }
+               }
+               else {
+                       inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
+                       FAIL_IF(!inst);
+                       *inst = MOV_rm8_r8;
+               }
+#else
+               inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
+               FAIL_IF(!inst);
+               *inst = MOV_rm8_r8;
+#endif
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+static sljit_si emit_mov_half(struct sljit_compiler *compiler, sljit_si sign,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       sljit_ub* inst;
+       sljit_si dst_r;
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+       compiler->mode32 = 0;
+#endif
+
+       if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
+               return SLJIT_SUCCESS; /* Empty instruction. */
+
+       if (src & SLJIT_IMM) {
+               if (FAST_IS_REG(dst)) {
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+                       return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
+#else
+                       inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
+                       FAIL_IF(!inst);
+                       *inst = MOV_rm_i32;
+                       return SLJIT_SUCCESS;
+#endif
+               }
+               inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
+               FAIL_IF(!inst);
+               *inst = MOV_rm_i32;
+               return SLJIT_SUCCESS;
+       }
+
+       dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
+
+       if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
+               dst_r = src;
+       else {
+               inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
+               FAIL_IF(!inst);
+               *inst++ = GROUP_0F;
+               *inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16;
+       }
+
+       if (dst & SLJIT_MEM) {
+               inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
+               FAIL_IF(!inst);
+               *inst = MOV_rm_r;
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+static sljit_si emit_unary(struct sljit_compiler *compiler, sljit_ub opcode,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       sljit_ub* inst;
+
+       if (dst == SLJIT_UNUSED) {
+               EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
+               inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
+               FAIL_IF(!inst);
+               *inst++ = GROUP_F7;
+               *inst |= opcode;
+               return SLJIT_SUCCESS;
+       }
+       if (dst == src && dstw == srcw) {
+               /* Same input and output */
+               inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
+               FAIL_IF(!inst);
+               *inst++ = GROUP_F7;
+               *inst |= opcode;
+               return SLJIT_SUCCESS;
+       }
+       if (FAST_IS_REG(dst)) {
+               EMIT_MOV(compiler, dst, 0, src, srcw);
+               inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
+               FAIL_IF(!inst);
+               *inst++ = GROUP_F7;
+               *inst |= opcode;
+               return SLJIT_SUCCESS;
+       }
+       EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
+       inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
+       FAIL_IF(!inst);
+       *inst++ = GROUP_F7;
+       *inst |= opcode;
+       EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
+       return SLJIT_SUCCESS;
+}
+
+static sljit_si emit_not_with_flags(struct sljit_compiler *compiler,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       sljit_ub* inst;
+
+       if (dst == SLJIT_UNUSED) {
+               EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
+               inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
+               FAIL_IF(!inst);
+               *inst++ = GROUP_F7;
+               *inst |= NOT_rm;
+               inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
+               FAIL_IF(!inst);
+               *inst = OR_r_rm;
+               return SLJIT_SUCCESS;
+       }
+       if (FAST_IS_REG(dst)) {
+               EMIT_MOV(compiler, dst, 0, src, srcw);
+               inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
+               FAIL_IF(!inst);
+               *inst++ = GROUP_F7;
+               *inst |= NOT_rm;
+               inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
+               FAIL_IF(!inst);
+               *inst = OR_r_rm;
+               return SLJIT_SUCCESS;
+       }
+       EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
+       inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
+       FAIL_IF(!inst);
+       *inst++ = GROUP_F7;
+       *inst |= NOT_rm;
+       inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
+       FAIL_IF(!inst);
+       *inst = OR_r_rm;
+       EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
+       return SLJIT_SUCCESS;
+}
+
+static sljit_si emit_clz(struct sljit_compiler *compiler, sljit_si op_flags,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       sljit_ub* inst;
+       sljit_si dst_r;
+
+       SLJIT_UNUSED_ARG(op_flags);
+       if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
+               /* Just set the zero flag. */
+               EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
+               inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
+               FAIL_IF(!inst);
+               *inst++ = GROUP_F7;
+               *inst |= NOT_rm;
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+               inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REG1, 0);
+#else
+               inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, TMP_REG1, 0);
+#endif
+               FAIL_IF(!inst);
+               *inst |= SHR;
+               return SLJIT_SUCCESS;
+       }
+
+       if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
+               EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
+               src = TMP_REG1;
+               srcw = 0;
+       }
+
+       inst = emit_x86_instruction(compiler, 2, TMP_REG1, 0, src, srcw);
+       FAIL_IF(!inst);
+       *inst++ = GROUP_0F;
+       *inst = BSR_r_rm;
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+       if (FAST_IS_REG(dst))
+               dst_r = dst;
+       else {
+               /* Find an unused temporary register. */
+               if ((dst & REG_MASK) != SLJIT_R0 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
+                       dst_r = SLJIT_R0;
+               else if ((dst & REG_MASK) != SLJIT_R1 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R1))
+                       dst_r = SLJIT_R1;
+               else
+                       dst_r = SLJIT_R2;
+               EMIT_MOV(compiler, dst, dstw, dst_r, 0);
+       }
+       EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31);
+#else
+       dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
+       compiler->mode32 = 0;
+       EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 64 + 63 : 32 + 31);
+       compiler->mode32 = op_flags & SLJIT_INT_OP;
+#endif
+
+       if (cpu_has_cmov == -1)
+               get_cpu_features();
+
+       if (cpu_has_cmov) {
+               inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);
+               FAIL_IF(!inst);
+               *inst++ = GROUP_0F;
+               *inst = CMOVNE_r_rm;
+       } else {
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
+               FAIL_IF(!inst);
+               INC_SIZE(4);
+
+               *inst++ = JE_i8;
+               *inst++ = 2;
+               *inst++ = MOV_r_rm;
+               *inst++ = MOD_REG | (reg_map[dst_r] << 3) | reg_map[TMP_REG1];
+#else
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
+               FAIL_IF(!inst);
+               INC_SIZE(5);
+
+               *inst++ = JE_i8;
+               *inst++ = 3;
+               *inst++ = REX_W | (reg_map[dst_r] >= 8 ? REX_R : 0) | (reg_map[TMP_REG1] >= 8 ? REX_B : 0);
+               *inst++ = MOV_r_rm;
+               *inst++ = MOD_REG | (reg_lmap[dst_r] << 3) | reg_lmap[TMP_REG1];
+#endif
+       }
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+       inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
+#else
+       inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, dst_r, 0);
+#endif
+       FAIL_IF(!inst);
+       *(inst + 1) |= XOR;
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+       if (dst & SLJIT_MEM) {
+               inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
+               FAIL_IF(!inst);
+               *inst = XCHG_r_rm;
+       }
+#else
+       if (dst & SLJIT_MEM)
+               EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0);
+#endif
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       sljit_ub* inst;
+       sljit_si update = 0;
+       sljit_si op_flags = GET_ALL_FLAGS(op);
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+       sljit_si dst_is_ereg = 0;
+       sljit_si src_is_ereg = 0;
+#else
+#      define src_is_ereg 0
+#endif
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
+       CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1);
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+       compiler->mode32 = op_flags & SLJIT_INT_OP;
+#endif
+
+       op = GET_OPCODE(op);
+       if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) {
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+               compiler->mode32 = 0;
+#endif
+
+               if (op_flags & SLJIT_INT_OP) {
+                       if (FAST_IS_REG(src) && src == dst) {
+                               if (!TYPE_CAST_NEEDED(op))
+                                       return SLJIT_SUCCESS;
+                       }
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+                       if (op == SLJIT_MOV_SI && (src & SLJIT_MEM))
+                               op = SLJIT_MOV_UI;
+                       if (op == SLJIT_MOVU_SI && (src & SLJIT_MEM))
+                               op = SLJIT_MOVU_UI;
+                       if (op == SLJIT_MOV_UI && (src & SLJIT_IMM))
+                               op = SLJIT_MOV_SI;
+                       if (op == SLJIT_MOVU_UI && (src & SLJIT_IMM))
+                               op = SLJIT_MOVU_SI;
+#endif
+               }
+
+               SLJIT_COMPILE_ASSERT(SLJIT_MOV + 8 == SLJIT_MOVU, movu_offset);
+               if (op >= SLJIT_MOVU) {
+                       update = 1;
+                       op -= 8;
+               }
+
+               if (src & SLJIT_IMM) {
+                       switch (op) {
+                       case SLJIT_MOV_UB:
+                               srcw = (sljit_ub)srcw;
+                               break;
+                       case SLJIT_MOV_SB:
+                               srcw = (sljit_sb)srcw;
+                               break;
+                       case SLJIT_MOV_UH:
+                               srcw = (sljit_uh)srcw;
+                               break;
+                       case SLJIT_MOV_SH:
+                               srcw = (sljit_sh)srcw;
+                               break;
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+                       case SLJIT_MOV_UI:
+                               srcw = (sljit_ui)srcw;
+                               break;
+                       case SLJIT_MOV_SI:
+                               srcw = (sljit_si)srcw;
+                               break;
+#endif
+                       }
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+                       if (SLJIT_UNLIKELY(dst_is_ereg))
+                               return emit_mov(compiler, dst, dstw, src, srcw);
+#endif
+               }
+
+               if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & REG_MASK) && (srcw != 0 || (src & OFFS_REG_MASK) != 0)) {
+                       inst = emit_x86_instruction(compiler, 1, src & REG_MASK, 0, src, srcw);
+                       FAIL_IF(!inst);
+                       *inst = LEA_r_m;
+                       src &= SLJIT_MEM | 0xf;
+                       srcw = 0;
+               }
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+               if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) {
+                       SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP));
+                       dst = TMP_REG1;
+               }
+#endif
+
+               switch (op) {
+               case SLJIT_MOV:
+               case SLJIT_MOV_P:
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+               case SLJIT_MOV_UI:
+               case SLJIT_MOV_SI:
+#endif
+                       FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
+                       break;
+               case SLJIT_MOV_UB:
+                       FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw));
+                       break;
+               case SLJIT_MOV_SB:
+                       FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw));
+                       break;
+               case SLJIT_MOV_UH:
+                       FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw));
+                       break;
+               case SLJIT_MOV_SH:
+                       FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw));
+                       break;
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+               case SLJIT_MOV_UI:
+                       FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw));
+                       break;
+               case SLJIT_MOV_SI:
+                       FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw));
+                       break;
+#endif
+               }
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+               if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1)
+                       return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0);
+#endif
+
+               if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & REG_MASK) && (dstw != 0 || (dst & OFFS_REG_MASK) != 0)) {
+                       inst = emit_x86_instruction(compiler, 1, dst & REG_MASK, 0, dst, dstw);
+                       FAIL_IF(!inst);
+                       *inst = LEA_r_m;
+               }
+               return SLJIT_SUCCESS;
+       }
+
+       if (SLJIT_UNLIKELY(GET_FLAGS(op_flags)))
+               compiler->flags_saved = 0;
+
+       switch (op) {
+       case SLJIT_NOT:
+               if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_E))
+                       return emit_not_with_flags(compiler, dst, dstw, src, srcw);
+               return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw);
+
+       case SLJIT_NEG:
+               if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
+                       FAIL_IF(emit_save_flags(compiler));
+               return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw);
+
+       case SLJIT_CLZ:
+               if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
+                       FAIL_IF(emit_save_flags(compiler));
+               return emit_clz(compiler, op_flags, dst, dstw, src, srcw);
+       }
+
+       return SLJIT_SUCCESS;
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+#      undef src_is_ereg
+#endif
+}
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+
+#define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
+       if (IS_HALFWORD(immw) || compiler->mode32) { \
+               inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
+               FAIL_IF(!inst); \
+               *(inst + 1) |= (op_imm); \
+       } \
+       else { \
+               FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \
+               inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \
+               FAIL_IF(!inst); \
+               *inst = (op_mr); \
+       }
+
+#define BINARY_EAX_IMM(op_eax_imm, immw) \
+       FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw))
+
+#else
+
+#define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
+       inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
+       FAIL_IF(!inst); \
+       *(inst + 1) |= (op_imm);
+
+#define BINARY_EAX_IMM(op_eax_imm, immw) \
+       FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw))
+
+#endif
+
+static sljit_si emit_cum_binary(struct sljit_compiler *compiler,
+       sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       sljit_ub* inst;
+
+       if (dst == SLJIT_UNUSED) {
+               EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
+               if (src2 & SLJIT_IMM) {
+                       BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
+               }
+               else {
+                       inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
+                       FAIL_IF(!inst);
+                       *inst = op_rm;
+               }
+               return SLJIT_SUCCESS;
+       }
+
+       if (dst == src1 && dstw == src1w) {
+               if (src2 & SLJIT_IMM) {
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+                       if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
+#else
+                       if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
+#endif
+                               BINARY_EAX_IMM(op_eax_imm, src2w);
+                       }
+                       else {
+                               BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
+                       }
+               }
+               else if (FAST_IS_REG(dst)) {
+                       inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
+                       FAIL_IF(!inst);
+                       *inst = op_rm;
+               }
+               else if (FAST_IS_REG(src2)) {
+                       /* Special exception for sljit_emit_op_flags. */
+                       inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
+                       FAIL_IF(!inst);
+                       *inst = op_mr;
+               }
+               else {
+                       EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
+                       inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
+                       FAIL_IF(!inst);
+                       *inst = op_mr;
+               }
+               return SLJIT_SUCCESS;
+       }
+
+       /* Only for cumulative operations. */
+       if (dst == src2 && dstw == src2w) {
+               if (src1 & SLJIT_IMM) {
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+                       if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
+#else
+                       if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) {
+#endif
+                               BINARY_EAX_IMM(op_eax_imm, src1w);
+                       }
+                       else {
+                               BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
+                       }
+               }
+               else if (FAST_IS_REG(dst)) {
+                       inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
+                       FAIL_IF(!inst);
+                       *inst = op_rm;
+               }
+               else if (FAST_IS_REG(src1)) {
+                       inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
+                       FAIL_IF(!inst);
+                       *inst = op_mr;
+               }
+               else {
+                       EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
+                       inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
+                       FAIL_IF(!inst);
+                       *inst = op_mr;
+               }
+               return SLJIT_SUCCESS;
+       }
+
+       /* General version. */
+       if (FAST_IS_REG(dst)) {
+               EMIT_MOV(compiler, dst, 0, src1, src1w);
+               if (src2 & SLJIT_IMM) {
+                       BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
+               }
+               else {
+                       inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
+                       FAIL_IF(!inst);
+                       *inst = op_rm;
+               }
+       }
+       else {
+               /* This version requires less memory writing. */
+               EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
+               if (src2 & SLJIT_IMM) {
+                       BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
+               }
+               else {
+                       inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
+                       FAIL_IF(!inst);
+                       *inst = op_rm;
+               }
+               EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler,
+       sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       sljit_ub* inst;
+
+       if (dst == SLJIT_UNUSED) {
+               EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
+               if (src2 & SLJIT_IMM) {
+                       BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
+               }
+               else {
+                       inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
+                       FAIL_IF(!inst);
+                       *inst = op_rm;
+               }
+               return SLJIT_SUCCESS;
+       }
+
+       if (dst == src1 && dstw == src1w) {
+               if (src2 & SLJIT_IMM) {
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+                       if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
+#else
+                       if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
+#endif
+                               BINARY_EAX_IMM(op_eax_imm, src2w);
+                       }
+                       else {
+                               BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
+                       }
+               }
+               else if (FAST_IS_REG(dst)) {
+                       inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
+                       FAIL_IF(!inst);
+                       *inst = op_rm;
+               }
+               else if (FAST_IS_REG(src2)) {
+                       inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
+                       FAIL_IF(!inst);
+                       *inst = op_mr;
+               }
+               else {
+                       EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
+                       inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
+                       FAIL_IF(!inst);
+                       *inst = op_mr;
+               }
+               return SLJIT_SUCCESS;
+       }
+
+       /* General version. */
+       if (FAST_IS_REG(dst) && dst != src2) {
+               EMIT_MOV(compiler, dst, 0, src1, src1w);
+               if (src2 & SLJIT_IMM) {
+                       BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
+               }
+               else {
+                       inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
+                       FAIL_IF(!inst);
+                       *inst = op_rm;
+               }
+       }
+       else {
+               /* This version requires less memory writing. */
+               EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
+               if (src2 & SLJIT_IMM) {
+                       BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
+               }
+               else {
+                       inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
+                       FAIL_IF(!inst);
+                       *inst = op_rm;
+               }
+               EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+static sljit_si emit_mul(struct sljit_compiler *compiler,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       sljit_ub* inst;
+       sljit_si dst_r;
+
+       dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
+
+       /* Register destination. */
+       if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
+               inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
+               FAIL_IF(!inst);
+               *inst++ = GROUP_0F;
+               *inst = IMUL_r_rm;
+       }
+       else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
+               inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
+               FAIL_IF(!inst);
+               *inst++ = GROUP_0F;
+               *inst = IMUL_r_rm;
+       }
+       else if (src1 & SLJIT_IMM) {
+               if (src2 & SLJIT_IMM) {
+                       EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
+                       src2 = dst_r;
+                       src2w = 0;
+               }
+
+               if (src1w <= 127 && src1w >= -128) {
+                       inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
+                       FAIL_IF(!inst);
+                       *inst = IMUL_r_rm_i8;
+                       inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
+                       FAIL_IF(!inst);
+                       INC_SIZE(1);
+                       *inst = (sljit_sb)src1w;
+               }
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+               else {
+                       inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
+                       FAIL_IF(!inst);
+                       *inst = IMUL_r_rm_i32;
+                       inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
+                       FAIL_IF(!inst);
+                       INC_SIZE(4);
+                       *(sljit_sw*)inst = src1w;
+               }
+#else
+               else if (IS_HALFWORD(src1w)) {
+                       inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
+                       FAIL_IF(!inst);
+                       *inst = IMUL_r_rm_i32;
+                       inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
+                       FAIL_IF(!inst);
+                       INC_SIZE(4);
+                       *(sljit_si*)inst = (sljit_si)src1w;
+               }
+               else {
+                       EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
+                       if (dst_r != src2)
+                               EMIT_MOV(compiler, dst_r, 0, src2, src2w);
+                       inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
+                       FAIL_IF(!inst);
+                       *inst++ = GROUP_0F;
+                       *inst = IMUL_r_rm;
+               }
+#endif
+       }
+       else if (src2 & SLJIT_IMM) {
+               /* Note: src1 is NOT immediate. */
+
+               if (src2w <= 127 && src2w >= -128) {
+                       inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
+                       FAIL_IF(!inst);
+                       *inst = IMUL_r_rm_i8;
+                       inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
+                       FAIL_IF(!inst);
+                       INC_SIZE(1);
+                       *inst = (sljit_sb)src2w;
+               }
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+               else {
+                       inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
+                       FAIL_IF(!inst);
+                       *inst = IMUL_r_rm_i32;
+                       inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
+                       FAIL_IF(!inst);
+                       INC_SIZE(4);
+                       *(sljit_sw*)inst = src2w;
+               }
+#else
+               else if (IS_HALFWORD(src2w)) {
+                       inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
+                       FAIL_IF(!inst);
+                       *inst = IMUL_r_rm_i32;
+                       inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
+                       FAIL_IF(!inst);
+                       INC_SIZE(4);
+                       *(sljit_si*)inst = (sljit_si)src2w;
+               }
+               else {
+                       EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src2w);
+                       if (dst_r != src1)
+                               EMIT_MOV(compiler, dst_r, 0, src1, src1w);
+                       inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
+                       FAIL_IF(!inst);
+                       *inst++ = GROUP_0F;
+                       *inst = IMUL_r_rm;
+               }
+#endif
+       }
+       else {
+               /* Neither argument is immediate. */
+               if (ADDRESSING_DEPENDS_ON(src2, dst_r))
+                       dst_r = TMP_REG1;
+               EMIT_MOV(compiler, dst_r, 0, src1, src1w);
+               inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
+               FAIL_IF(!inst);
+               *inst++ = GROUP_0F;
+               *inst = IMUL_r_rm;
+       }
+
+       if (dst_r == TMP_REG1)
+               EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
+
+       return SLJIT_SUCCESS;
+}
+
+static sljit_si emit_lea_binary(struct sljit_compiler *compiler, sljit_si keep_flags,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       sljit_ub* inst;
+       sljit_si dst_r, done = 0;
+
+       /* These cases better be left to handled by normal way. */
+       if (!keep_flags) {
+               if (dst == src1 && dstw == src1w)
+                       return SLJIT_ERR_UNSUPPORTED;
+               if (dst == src2 && dstw == src2w)
+                       return SLJIT_ERR_UNSUPPORTED;
+       }
+
+       dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
+
+       if (FAST_IS_REG(src1)) {
+               if (FAST_IS_REG(src2)) {
+                       inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
+                       FAIL_IF(!inst);
+                       *inst = LEA_r_m;
+                       done = 1;
+               }
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+               if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
+                       inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_si)src2w);
+#else
+               if (src2 & SLJIT_IMM) {
+                       inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
+#endif
+                       FAIL_IF(!inst);
+                       *inst = LEA_r_m;
+                       done = 1;
+               }
+       }
+       else if (FAST_IS_REG(src2)) {
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+               if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
+                       inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_si)src1w);
+#else
+               if (src1 & SLJIT_IMM) {
+                       inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
+#endif
+                       FAIL_IF(!inst);
+                       *inst = LEA_r_m;
+                       done = 1;
+               }
+       }
+
+       if (done) {
+               if (dst_r == TMP_REG1)
+                       return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
+               return SLJIT_SUCCESS;
+       }
+       return SLJIT_ERR_UNSUPPORTED;
+}
+
+static sljit_si emit_cmp_binary(struct sljit_compiler *compiler,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       sljit_ub* inst;
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+       if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
+#else
+       if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
+#endif
+               BINARY_EAX_IMM(CMP_EAX_i32, src2w);
+               return SLJIT_SUCCESS;
+       }
+
+       if (FAST_IS_REG(src1)) {
+               if (src2 & SLJIT_IMM) {
+                       BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0);
+               }
+               else {
+                       inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
+                       FAIL_IF(!inst);
+                       *inst = CMP_r_rm;
+               }
+               return SLJIT_SUCCESS;
+       }
+
+       if (FAST_IS_REG(src2) && !(src1 & SLJIT_IMM)) {
+               inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
+               FAIL_IF(!inst);
+               *inst = CMP_rm_r;
+               return SLJIT_SUCCESS;
+       }
+
+       if (src2 & SLJIT_IMM) {
+               if (src1 & SLJIT_IMM) {
+                       EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
+                       src1 = TMP_REG1;
+                       src1w = 0;
+               }
+               BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w);
+       }
+       else {
+               EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
+               inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
+               FAIL_IF(!inst);
+               *inst = CMP_r_rm;
+       }
+       return SLJIT_SUCCESS;
+}
+
+static sljit_si emit_test_binary(struct sljit_compiler *compiler,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       sljit_ub* inst;
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+       if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
+#else
+       if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
+#endif
+               BINARY_EAX_IMM(TEST_EAX_i32, src2w);
+               return SLJIT_SUCCESS;
+       }
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+       if (src2 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
+#else
+       if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
+#endif
+               BINARY_EAX_IMM(TEST_EAX_i32, src1w);
+               return SLJIT_SUCCESS;
+       }
+
+       if (FAST_IS_REG(src1)) {
+               if (src2 & SLJIT_IMM) {
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+                       if (IS_HALFWORD(src2w) || compiler->mode32) {
+                               inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
+                               FAIL_IF(!inst);
+                               *inst = GROUP_F7;
+                       }
+                       else {
+                               FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
+                               inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, 0);
+                               FAIL_IF(!inst);
+                               *inst = TEST_rm_r;
+                       }
+#else
+                       inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
+                       FAIL_IF(!inst);
+                       *inst = GROUP_F7;
+#endif
+               }
+               else {
+                       inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
+                       FAIL_IF(!inst);
+                       *inst = TEST_rm_r;
+               }
+               return SLJIT_SUCCESS;
+       }
+
+       if (FAST_IS_REG(src2)) {
+               if (src1 & SLJIT_IMM) {
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+                       if (IS_HALFWORD(src1w) || compiler->mode32) {
+                               inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, 0);
+                               FAIL_IF(!inst);
+                               *inst = GROUP_F7;
+                       }
+                       else {
+                               FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
+                               inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, 0);
+                               FAIL_IF(!inst);
+                               *inst = TEST_rm_r;
+                       }
+#else
+                       inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, 0);
+                       FAIL_IF(!inst);
+                       *inst = GROUP_F7;
+#endif
+               }
+               else {
+                       inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
+                       FAIL_IF(!inst);
+                       *inst = TEST_rm_r;
+               }
+               return SLJIT_SUCCESS;
+       }
+
+       EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
+       if (src2 & SLJIT_IMM) {
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+               if (IS_HALFWORD(src2w) || compiler->mode32) {
+                       inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
+                       FAIL_IF(!inst);
+                       *inst = GROUP_F7;
+               }
+               else {
+                       FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
+                       inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0);
+                       FAIL_IF(!inst);
+                       *inst = TEST_rm_r;
+               }
+#else
+               inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
+               FAIL_IF(!inst);
+               *inst = GROUP_F7;
+#endif
+       }
+       else {
+               inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
+               FAIL_IF(!inst);
+               *inst = TEST_rm_r;
+       }
+       return SLJIT_SUCCESS;
+}
+
+static sljit_si emit_shift(struct sljit_compiler *compiler,
+       sljit_ub mode,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       sljit_ub* inst;
+
+       if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
+               if (dst == src1 && dstw == src1w) {
+                       inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
+                       FAIL_IF(!inst);
+                       *inst |= mode;
+                       return SLJIT_SUCCESS;
+               }
+               if (dst == SLJIT_UNUSED) {
+                       EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
+                       inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
+                       FAIL_IF(!inst);
+                       *inst |= mode;
+                       return SLJIT_SUCCESS;
+               }
+               if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
+                       EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
+                       inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
+                       FAIL_IF(!inst);
+                       *inst |= mode;
+                       EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
+                       return SLJIT_SUCCESS;
+               }
+               if (FAST_IS_REG(dst)) {
+                       EMIT_MOV(compiler, dst, 0, src1, src1w);
+                       inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
+                       FAIL_IF(!inst);
+                       *inst |= mode;
+                       return SLJIT_SUCCESS;
+               }
+
+               EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
+               inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
+               FAIL_IF(!inst);
+               *inst |= mode;
+               EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
+               return SLJIT_SUCCESS;
+       }
+
+       if (dst == SLJIT_PREF_SHIFT_REG) {
+               EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
+               EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
+               inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
+               FAIL_IF(!inst);
+               *inst |= mode;
+               EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
+       }
+       else if (FAST_IS_REG(dst) && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
+               if (src1 != dst)
+                       EMIT_MOV(compiler, dst, 0, src1, src1w);
+               EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
+               EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
+               inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
+               FAIL_IF(!inst);
+               *inst |= mode;
+               EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
+       }
+       else {
+               /* This case is really difficult, since ecx itself may used for
+                  addressing, and we must ensure to work even in that case. */
+               EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+               EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
+#else
+               /* [esp+0] contains the flags. */
+               EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw), SLJIT_PREF_SHIFT_REG, 0);
+#endif
+               EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
+               inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
+               FAIL_IF(!inst);
+               *inst |= mode;
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+               EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
+#else
+               EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw));
+#endif
+               EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+static sljit_si emit_shift_with_flags(struct sljit_compiler *compiler,
+       sljit_ub mode, sljit_si set_flags,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       /* The CPU does not set flags if the shift count is 0. */
+       if (src2 & SLJIT_IMM) {
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+               if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0))
+                       return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
+#else
+               if ((src2w & 0x1f) != 0)
+                       return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
+#endif
+               if (!set_flags)
+                       return emit_mov(compiler, dst, dstw, src1, src1w);
+               /* OR dst, src, 0 */
+               return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
+                       dst, dstw, src1, src1w, SLJIT_IMM, 0);
+       }
+
+       if (!set_flags)
+               return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
+
+       if (!FAST_IS_REG(dst))
+               FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
+
+       FAIL_IF(emit_shift(compiler,mode, dst, dstw, src1, src1w, src2, src2w));
+
+       if (FAST_IS_REG(dst))
+               return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0);
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+       ADJUST_LOCAL_OFFSET(src1, src1w);
+       ADJUST_LOCAL_OFFSET(src2, src2w);
+
+       CHECK_EXTRA_REGS(dst, dstw, (void)0);
+       CHECK_EXTRA_REGS(src1, src1w, (void)0);
+       CHECK_EXTRA_REGS(src2, src2w, (void)0);
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+       compiler->mode32 = op & SLJIT_INT_OP;
+#endif
+
+       if (GET_OPCODE(op) >= SLJIT_MUL) {
+               if (SLJIT_UNLIKELY(GET_FLAGS(op)))
+                       compiler->flags_saved = 0;
+               else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
+                       FAIL_IF(emit_save_flags(compiler));
+       }
+
+       switch (GET_OPCODE(op)) {
+       case SLJIT_ADD:
+               if (!GET_FLAGS(op)) {
+                       if (emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
+                               return compiler->error;
+               }
+               else
+                       compiler->flags_saved = 0;
+               if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
+                       FAIL_IF(emit_save_flags(compiler));
+               return emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
+                       dst, dstw, src1, src1w, src2, src2w);
+       case SLJIT_ADDC:
+               if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
+                       FAIL_IF(emit_restore_flags(compiler, 1));
+               else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
+                       FAIL_IF(emit_save_flags(compiler));
+               if (SLJIT_UNLIKELY(GET_FLAGS(op)))
+                       compiler->flags_saved = 0;
+               return emit_cum_binary(compiler, ADC_r_rm, ADC_rm_r, ADC, ADC_EAX_i32,
+                       dst, dstw, src1, src1w, src2, src2w);
+       case SLJIT_SUB:
+               if (!GET_FLAGS(op)) {
+                       if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
+                               return compiler->error;
+               }
+               else
+                       compiler->flags_saved = 0;
+               if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
+                       FAIL_IF(emit_save_flags(compiler));
+               if (dst == SLJIT_UNUSED)
+                       return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
+               return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
+                       dst, dstw, src1, src1w, src2, src2w);
+       case SLJIT_SUBC:
+               if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
+                       FAIL_IF(emit_restore_flags(compiler, 1));
+               else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
+                       FAIL_IF(emit_save_flags(compiler));
+               if (SLJIT_UNLIKELY(GET_FLAGS(op)))
+                       compiler->flags_saved = 0;
+               return emit_non_cum_binary(compiler, SBB_r_rm, SBB_rm_r, SBB, SBB_EAX_i32,
+                       dst, dstw, src1, src1w, src2, src2w);
+       case SLJIT_MUL:
+               return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
+       case SLJIT_AND:
+               if (dst == SLJIT_UNUSED)
+                       return emit_test_binary(compiler, src1, src1w, src2, src2w);
+               return emit_cum_binary(compiler, AND_r_rm, AND_rm_r, AND, AND_EAX_i32,
+                       dst, dstw, src1, src1w, src2, src2w);
+       case SLJIT_OR:
+               return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
+                       dst, dstw, src1, src1w, src2, src2w);
+       case SLJIT_XOR:
+               return emit_cum_binary(compiler, XOR_r_rm, XOR_rm_r, XOR, XOR_EAX_i32,
+                       dst, dstw, src1, src1w, src2, src2w);
+       case SLJIT_SHL:
+               return emit_shift_with_flags(compiler, SHL, GET_FLAGS(op),
+                       dst, dstw, src1, src1w, src2, src2w);
+       case SLJIT_LSHR:
+               return emit_shift_with_flags(compiler, SHR, GET_FLAGS(op),
+                       dst, dstw, src1, src1w, src2, src2w);
+       case SLJIT_ASHR:
+               return emit_shift_with_flags(compiler, SAR, GET_FLAGS(op),
+                       dst, dstw, src1, src1w, src2, src2w);
+       }
+
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
+{
+       CHECK_REG_INDEX(check_sljit_get_register_index(reg));
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+       if (reg >= SLJIT_R3 && reg <= SLJIT_R6)
+               return -1;
+#endif
+       return reg_map[reg];
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg)
+{
+       CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
+       return reg;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
+       void *instruction, sljit_si size)
+{
+       sljit_ub *inst;
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
+
+       inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
+       FAIL_IF(!inst);
+       INC_SIZE(size);
+       SLJIT_MEMMOVE(inst, instruction, size);
+       return SLJIT_SUCCESS;
+}
+
+/* --------------------------------------------------------------------- */
+/*  Floating point operators                                             */
+/* --------------------------------------------------------------------- */
+
+/* Alignment + 2 * 16 bytes. */
+static sljit_si sse2_data[3 + (4 + 4) * 2];
+static sljit_si *sse2_buffer;
+
+static void init_compiler(void)
+{
+       sse2_buffer = (sljit_si*)(((sljit_uw)sse2_data + 15) & ~0xf);
+       /* Single precision constants. */
+       sse2_buffer[0] = 0x80000000;
+       sse2_buffer[4] = 0x7fffffff;
+       /* Double precision constants. */
+       sse2_buffer[8] = 0;
+       sse2_buffer[9] = 0x80000000;
+       sse2_buffer[12] = 0xffffffff;
+       sse2_buffer[13] = 0x7fffffff;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
+{
+#ifdef SLJIT_IS_FPU_AVAILABLE
+       return SLJIT_IS_FPU_AVAILABLE;
+#elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
+       if (cpu_has_sse2 == -1)
+               get_cpu_features();
+       return cpu_has_sse2;
+#else /* SLJIT_DETECT_SSE2 */
+       return 1;
+#endif /* SLJIT_DETECT_SSE2 */
+}
+
+static sljit_si emit_sse2(struct sljit_compiler *compiler, sljit_ub opcode,
+       sljit_si single, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w)
+{
+       sljit_ub *inst;
+
+       inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
+       FAIL_IF(!inst);
+       *inst++ = GROUP_0F;
+       *inst = opcode;
+       return SLJIT_SUCCESS;
+}
+
+static sljit_si emit_sse2_logic(struct sljit_compiler *compiler, sljit_ub opcode,
+       sljit_si pref66, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w)
+{
+       sljit_ub *inst;
+
+       inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
+       FAIL_IF(!inst);
+       *inst++ = GROUP_0F;
+       *inst = opcode;
+       return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_si emit_sse2_load(struct sljit_compiler *compiler,
+       sljit_si single, sljit_si dst, sljit_si src, sljit_sw srcw)
+{
+       return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw);
+}
+
+static SLJIT_INLINE sljit_si emit_sse2_store(struct sljit_compiler *compiler,
+       sljit_si single, sljit_si dst, sljit_sw dstw, sljit_si src)
+{
+       return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw);
+}
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       sljit_si dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
+       sljit_ub *inst;
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+       if (GET_OPCODE(op) == SLJIT_CONVW_FROMD)
+               compiler->mode32 = 0;
+#endif
+
+       inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_SINGLE_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw);
+       FAIL_IF(!inst);
+       *inst++ = GROUP_0F;
+       *inst = CVTTSD2SI_r_xm;
+
+       if (dst_r == TMP_REG1 && dst != SLJIT_UNUSED)
+               return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
+       return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
+       sljit_ub *inst;
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+       if (GET_OPCODE(op) == SLJIT_CONVD_FROMW)
+               compiler->mode32 = 0;
+#endif
+
+       if (src & SLJIT_IMM) {
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+               if (GET_OPCODE(op) == SLJIT_CONVD_FROMI)
+                       srcw = (sljit_si)srcw;
+#endif
+               EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
+               src = TMP_REG1;
+               srcw = 0;
+       }
+
+       inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_SINGLE_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw);
+       FAIL_IF(!inst);
+       *inst++ = GROUP_0F;
+       *inst = CVTSI2SD_x_rm;
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+       compiler->mode32 = 1;
+#endif
+       if (dst_r == TMP_FREG)
+               return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
+       return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       compiler->flags_saved = 0;
+       if (!FAST_IS_REG(src1)) {
+               FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w));
+               src1 = TMP_FREG;
+       }
+       return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_SINGLE_OP), src1, src2, src2w);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw)
+{
+       sljit_si dst_r;
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+       compiler->mode32 = 1;
+#endif
+
+       CHECK_ERROR();
+       SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
+
+       if (GET_OPCODE(op) == SLJIT_DMOV) {
+               if (FAST_IS_REG(dst))
+                       return emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst, src, srcw);
+               if (FAST_IS_REG(src))
+                       return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, src);
+               FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src, srcw));
+               return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
+       }
+
+       if (GET_OPCODE(op) == SLJIT_CONVD_FROMS) {
+               dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
+               if (FAST_IS_REG(src)) {
+                       /* We overwrite the high bits of source. From SLJIT point of view,
+                          this is not an issue.
+                          Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */
+                       FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_SINGLE_OP, src, src, 0));
+               }
+               else {
+                       FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_SINGLE_OP), TMP_FREG, src, srcw));
+                       src = TMP_FREG;
+               }
+
+               FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_SINGLE_OP, dst_r, src, 0));
+               if (dst_r == TMP_FREG)
+                       return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
+               return SLJIT_SUCCESS;
+       }
+
+       if (SLOW_IS_REG(dst)) {
+               dst_r = dst;
+               if (dst != src)
+                       FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src, srcw));
+       }
+       else {
+               dst_r = TMP_FREG;
+               FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src, srcw));
+       }
+
+       switch (GET_OPCODE(op)) {
+       case SLJIT_DNEG:
+               FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer : sse2_buffer + 8)));
+               break;
+
+       case SLJIT_DABS:
+               FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer + 4 : sse2_buffer + 12)));
+               break;
+       }
+
+       if (dst_r == TMP_FREG)
+               return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src1, sljit_sw src1w,
+       sljit_si src2, sljit_sw src2w)
+{
+       sljit_si dst_r;
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+       ADJUST_LOCAL_OFFSET(src1, src1w);
+       ADJUST_LOCAL_OFFSET(src2, src2w);
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+       compiler->mode32 = 1;
+#endif
+
+       if (FAST_IS_REG(dst)) {
+               dst_r = dst;
+               if (dst == src1)
+                       ; /* Do nothing here. */
+               else if (dst == src2 && (op == SLJIT_DADD || op == SLJIT_DMUL)) {
+                       /* Swap arguments. */
+                       src2 = src1;
+                       src2w = src1w;
+               }
+               else if (dst != src2)
+                       FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src1, src1w));
+               else {
+                       dst_r = TMP_FREG;
+                       FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w));
+               }
+       }
+       else {
+               dst_r = TMP_FREG;
+               FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w));
+       }
+
+       switch (GET_OPCODE(op)) {
+       case SLJIT_DADD:
+               FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
+               break;
+
+       case SLJIT_DSUB:
+               FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
+               break;
+
+       case SLJIT_DMUL:
+               FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
+               break;
+
+       case SLJIT_DDIV:
+               FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
+               break;
+       }
+
+       if (dst_r == TMP_FREG)
+               return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
+       return SLJIT_SUCCESS;
+}
+
+/* --------------------------------------------------------------------- */
+/*  Conditional instructions                                             */
+/* --------------------------------------------------------------------- */
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
+{
+       sljit_ub *inst;
+       struct sljit_label *label;
+
+       CHECK_ERROR_PTR();
+       CHECK_PTR(check_sljit_emit_label(compiler));
+
+       /* We should restore the flags before the label,
+          since other taken jumps has their own flags as well. */
+       if (SLJIT_UNLIKELY(compiler->flags_saved))
+               PTR_FAIL_IF(emit_restore_flags(compiler, 0));
+
+       if (compiler->last_label && compiler->last_label->size == compiler->size)
+               return compiler->last_label;
+
+       label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
+       PTR_FAIL_IF(!label);
+       set_label(label, compiler);
+
+       inst = (sljit_ub*)ensure_buf(compiler, 2);
+       PTR_FAIL_IF(!inst);
+
+       *inst++ = 0;
+       *inst++ = 0;
+
+       return label;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type)
+{
+       sljit_ub *inst;
+       struct sljit_jump *jump;
+
+       CHECK_ERROR_PTR();
+       CHECK_PTR(check_sljit_emit_jump(compiler, type));
+
+       if (SLJIT_UNLIKELY(compiler->flags_saved)) {
+               if ((type & 0xff) <= SLJIT_JUMP)
+                       PTR_FAIL_IF(emit_restore_flags(compiler, 0));
+               compiler->flags_saved = 0;
+       }
+
+       jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
+       PTR_FAIL_IF_NULL(jump);
+       set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
+       type &= 0xff;
+
+       if (type >= SLJIT_CALL1)
+               PTR_FAIL_IF(call_with_args(compiler, type));
+
+       /* Worst case size. */
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+       compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
+#else
+       compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
+#endif
+
+       inst = (sljit_ub*)ensure_buf(compiler, 2);
+       PTR_FAIL_IF_NULL(inst);
+
+       *inst++ = 0;
+       *inst++ = type + 4;
+       return jump;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw)
+{
+       sljit_ub *inst;
+       struct sljit_jump *jump;
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
+       ADJUST_LOCAL_OFFSET(src, srcw);
+
+       CHECK_EXTRA_REGS(src, srcw, (void)0);
+
+       if (SLJIT_UNLIKELY(compiler->flags_saved)) {
+               if (type <= SLJIT_JUMP)
+                       FAIL_IF(emit_restore_flags(compiler, 0));
+               compiler->flags_saved = 0;
+       }
+
+       if (type >= SLJIT_CALL1) {
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
+               if (src == SLJIT_R2) {
+                       EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
+                       src = TMP_REG1;
+               }
+               if (src == SLJIT_MEM1(SLJIT_SP) && type >= SLJIT_CALL3)
+                       srcw += sizeof(sljit_sw);
+#endif
+#endif
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64)
+               if (src == SLJIT_R2) {
+                       EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
+                       src = TMP_REG1;
+               }
+#endif
+               FAIL_IF(call_with_args(compiler, type));
+       }
+
+       if (src == SLJIT_IMM) {
+               jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
+               FAIL_IF_NULL(jump);
+               set_jump(jump, compiler, JUMP_ADDR);
+               jump->u.target = srcw;
+
+               /* Worst case size. */
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+               compiler->size += 5;
+#else
+               compiler->size += 10 + 3;
+#endif
+
+               inst = (sljit_ub*)ensure_buf(compiler, 2);
+               FAIL_IF_NULL(inst);
+
+               *inst++ = 0;
+               *inst++ = type + 4;
+       }
+       else {
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+               /* REX_W is not necessary (src is not immediate). */
+               compiler->mode32 = 1;
+#endif
+               inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
+               FAIL_IF(!inst);
+               *inst++ = GROUP_FF;
+               *inst |= (type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm;
+       }
+       return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op,
+       sljit_si dst, sljit_sw dstw,
+       sljit_si src, sljit_sw srcw,
+       sljit_si type)
+{
+       sljit_ub *inst;
+       sljit_ub cond_set = 0;
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+       sljit_si reg;
+#else
+       /* CHECK_EXTRA_REGS migh overwrite these values. */
+       sljit_si dst_save = dst;
+       sljit_sw dstw_save = dstw;
+#endif
+
+       CHECK_ERROR();
+       CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
+       SLJIT_UNUSED_ARG(srcw);
+
+       if (dst == SLJIT_UNUSED)
+               return SLJIT_SUCCESS;
+
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+       CHECK_EXTRA_REGS(dst, dstw, (void)0);
+       if (SLJIT_UNLIKELY(compiler->flags_saved))
+               FAIL_IF(emit_restore_flags(compiler, op & SLJIT_KEEP_FLAGS));
+
+       type &= 0xff;
+       /* setcc = jcc + 0x10. */
+       cond_set = get_jump_code(type) + 0x10;
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+       if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src) {
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 3);
+               FAIL_IF(!inst);
+               INC_SIZE(4 + 3);
+               /* Set low register to conditional flag. */
+               *inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B;
+               *inst++ = GROUP_0F;
+               *inst++ = cond_set;
+               *inst++ = MOD_REG | reg_lmap[TMP_REG1];
+               *inst++ = REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B);
+               *inst++ = OR_rm8_r8;
+               *inst++ = MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst];
+               return SLJIT_SUCCESS;
+       }
+
+       reg = (op == SLJIT_MOV && FAST_IS_REG(dst)) ? dst : TMP_REG1;
+
+       inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 4);
+       FAIL_IF(!inst);
+       INC_SIZE(4 + 4);
+       /* Set low register to conditional flag. */
+       *inst++ = (reg_map[reg] <= 7) ? REX : REX_B;
+       *inst++ = GROUP_0F;
+       *inst++ = cond_set;
+       *inst++ = MOD_REG | reg_lmap[reg];
+       *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
+       *inst++ = GROUP_0F;
+       *inst++ = MOVZX_r_rm8;
+       *inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg];
+
+       if (reg != TMP_REG1)
+               return SLJIT_SUCCESS;
+
+       if (GET_OPCODE(op) < SLJIT_ADD) {
+               compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV;
+               return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
+       }
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+               || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+       compiler->skip_checks = 1;
+#endif
+       return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REG1, 0);
+#else /* SLJIT_CONFIG_X86_64 */
+       if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) {
+               if (reg_map[dst] <= 4) {
+                       /* Low byte is accessible. */
+                       inst = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3);
+                       FAIL_IF(!inst);
+                       INC_SIZE(3 + 3);
+                       /* Set low byte to conditional flag. */
+                       *inst++ = GROUP_0F;
+                       *inst++ = cond_set;
+                       *inst++ = MOD_REG | reg_map[dst];
+
+                       *inst++ = GROUP_0F;
+                       *inst++ = MOVZX_r_rm8;
+                       *inst = MOD_REG | (reg_map[dst] << 3) | reg_map[dst];
+                       return SLJIT_SUCCESS;
+               }
+
+               /* Low byte is not accessible. */
+               if (cpu_has_cmov == -1)
+                       get_cpu_features();
+
+               if (cpu_has_cmov) {
+                       EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);
+                       /* a xor reg, reg operation would overwrite the flags. */
+                       EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0);
+
+                       inst = (sljit_ub*)ensure_buf(compiler, 1 + 3);
+                       FAIL_IF(!inst);
+                       INC_SIZE(3);
+
+                       *inst++ = GROUP_0F;
+                       /* cmovcc = setcc - 0x50. */
+                       *inst++ = cond_set - 0x50;
+                       *inst++ = MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1];
+                       return SLJIT_SUCCESS;
+               }
+
+               inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
+               FAIL_IF(!inst);
+               INC_SIZE(1 + 3 + 3 + 1);
+               *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
+               /* Set al to conditional flag. */
+               *inst++ = GROUP_0F;
+               *inst++ = cond_set;
+               *inst++ = MOD_REG | 0 /* eax */;
+
+               *inst++ = GROUP_0F;
+               *inst++ = MOVZX_r_rm8;
+               *inst++ = MOD_REG | (reg_map[dst] << 3) | 0 /* eax */;
+               *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
+               return SLJIT_SUCCESS;
+       }
+
+       if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src && reg_map[dst] <= 4) {
+               SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R0] == 0, scratch_reg1_must_be_eax);
+               if (dst != SLJIT_R0) {
+                       inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1);
+                       FAIL_IF(!inst);
+                       INC_SIZE(1 + 3 + 2 + 1);
+                       /* Set low register to conditional flag. */
+                       *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
+                       *inst++ = GROUP_0F;
+                       *inst++ = cond_set;
+                       *inst++ = MOD_REG | 0 /* eax */;
+                       *inst++ = OR_rm8_r8;
+                       *inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst];
+                       *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
+               }
+               else {
+                       inst = (sljit_ub*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2);
+                       FAIL_IF(!inst);
+                       INC_SIZE(2 + 3 + 2 + 2);
+                       /* Set low register to conditional flag. */
+                       *inst++ = XCHG_r_rm;
+                       *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
+                       *inst++ = GROUP_0F;
+                       *inst++ = cond_set;
+                       *inst++ = MOD_REG | 1 /* ecx */;
+                       *inst++ = OR_rm8_r8;
+                       *inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */;
+                       *inst++ = XCHG_r_rm;
+                       *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
+               }
+               return SLJIT_SUCCESS;
+       }
+
+       /* Set TMP_REG1 to the bit. */
+       inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
+       FAIL_IF(!inst);
+       INC_SIZE(1 + 3 + 3 + 1);
+       *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
+       /* Set al to conditional flag. */
+       *inst++ = GROUP_0F;
+       *inst++ = cond_set;
+       *inst++ = MOD_REG | 0 /* eax */;
+
+       *inst++ = GROUP_0F;
+       *inst++ = MOVZX_r_rm8;
+       *inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */;
+
+       *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
+
+       if (GET_OPCODE(op) < SLJIT_ADD)
+               return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+               || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+       compiler->skip_checks = 1;
+#endif
+       return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
+#endif /* SLJIT_CONFIG_X86_64 */
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset)
+{
+       CHECK_ERROR();
+       CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset));
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+
+       CHECK_EXTRA_REGS(dst, dstw, (void)0);
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+       compiler->mode32 = 0;
+#endif
+
+       ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset);
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+       if (NOT_HALFWORD(offset)) {
+               FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset));
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+               SLJIT_ASSERT(emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
+               return compiler->error;
+#else
+               return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0);
+#endif
+       }
+#endif
+
+       if (offset != 0)
+               return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
+       return emit_mov(compiler, dst, dstw, SLJIT_SP, 0);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value)
+{
+       sljit_ub *inst;
+       struct sljit_const *const_;
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+       sljit_si reg;
+#endif
+
+       CHECK_ERROR_PTR();
+       CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
+       ADJUST_LOCAL_OFFSET(dst, dstw);
+
+       CHECK_EXTRA_REGS(dst, dstw, (void)0);
+
+       const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
+       PTR_FAIL_IF(!const_);
+       set_const(const_, compiler);
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+       compiler->mode32 = 0;
+       reg = SLOW_IS_REG(dst) ? dst : TMP_REG1;
+
+       if (emit_load_imm64(compiler, reg, init_value))
+               return NULL;
+#else
+       if (dst == SLJIT_UNUSED)
+               dst = TMP_REG1;
+
+       if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
+               return NULL;
+#endif
+
+       inst = (sljit_ub*)ensure_buf(compiler, 2);
+       PTR_FAIL_IF(!inst);
+
+       *inst++ = 0;
+       *inst++ = 1;
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+       if (dst & SLJIT_MEM)
+               if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
+                       return NULL;
+#endif
+
+       return const_;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
+{
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+       *(sljit_sw*)addr = new_addr - (addr + 4);
+#else
+       *(sljit_uw*)addr = new_addr;
+#endif
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
+{
+       *(sljit_sw*)addr = new_constant;
+}
diff --git a/ext/pcre/pcrelib/sljit/sljitUtils.c b/ext/pcre/pcrelib/sljit/sljitUtils.c
new file mode 100644 (file)
index 0000000..5294b5f
--- /dev/null
@@ -0,0 +1,334 @@
+/*
+ *    Stack-less Just-In-Time compiler
+ *
+ *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice, this list of
+ *      conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *      of conditions and the following disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* ------------------------------------------------------------------------ */
+/*  Locks                                                                   */
+/* ------------------------------------------------------------------------ */
+
+#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) || (defined SLJIT_UTIL_GLOBAL_LOCK && SLJIT_UTIL_GLOBAL_LOCK)
+
+#if (defined SLJIT_SINGLE_THREADED && SLJIT_SINGLE_THREADED)
+
+#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR)
+
+static SLJIT_INLINE void allocator_grab_lock(void)
+{
+       /* Always successful. */
+}
+
+static SLJIT_INLINE void allocator_release_lock(void)
+{
+       /* Always successful. */
+}
+
+#endif /* SLJIT_EXECUTABLE_ALLOCATOR */
+
+#if (defined SLJIT_UTIL_GLOBAL_LOCK && SLJIT_UTIL_GLOBAL_LOCK)
+
+SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_grab_lock(void)
+{
+       /* Always successful. */
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_release_lock(void)
+{
+       /* Always successful. */
+}
+
+#endif /* SLJIT_UTIL_GLOBAL_LOCK */
+
+#elif defined(_WIN32) /* SLJIT_SINGLE_THREADED */
+
+#include "windows.h"
+
+#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR)
+
+static HANDLE allocator_mutex = 0;
+
+static SLJIT_INLINE void allocator_grab_lock(void)
+{
+       /* No idea what to do if an error occures. Static mutexes should never fail... */
+       if (!allocator_mutex)
+               allocator_mutex = CreateMutex(NULL, TRUE, NULL);
+       else
+               WaitForSingleObject(allocator_mutex, INFINITE);
+}
+
+static SLJIT_INLINE void allocator_release_lock(void)
+{
+       ReleaseMutex(allocator_mutex);
+}
+
+#endif /* SLJIT_EXECUTABLE_ALLOCATOR */
+
+#if (defined SLJIT_UTIL_GLOBAL_LOCK && SLJIT_UTIL_GLOBAL_LOCK)
+
+static HANDLE global_mutex = 0;
+
+SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_grab_lock(void)
+{
+       /* No idea what to do if an error occures. Static mutexes should never fail... */
+       if (!global_mutex)
+               global_mutex = CreateMutex(NULL, TRUE, NULL);
+       else
+               WaitForSingleObject(global_mutex, INFINITE);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_release_lock(void)
+{
+       ReleaseMutex(global_mutex);
+}
+
+#endif /* SLJIT_UTIL_GLOBAL_LOCK */
+
+#else /* _WIN32 */
+
+#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR)
+
+#include <pthread.h>
+
+static pthread_mutex_t allocator_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static SLJIT_INLINE void allocator_grab_lock(void)
+{
+       pthread_mutex_lock(&allocator_mutex);
+}
+
+static SLJIT_INLINE void allocator_release_lock(void)
+{
+       pthread_mutex_unlock(&allocator_mutex);
+}
+
+#endif /* SLJIT_EXECUTABLE_ALLOCATOR */
+
+#if (defined SLJIT_UTIL_GLOBAL_LOCK && SLJIT_UTIL_GLOBAL_LOCK)
+
+#include <pthread.h>
+
+static pthread_mutex_t global_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_grab_lock(void)
+{
+       pthread_mutex_lock(&global_mutex);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_release_lock(void)
+{
+       pthread_mutex_unlock(&global_mutex);
+}
+
+#endif /* SLJIT_UTIL_GLOBAL_LOCK */
+
+#endif /* _WIN32 */
+
+/* ------------------------------------------------------------------------ */
+/*  Stack                                                                   */
+/* ------------------------------------------------------------------------ */
+
+#if (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK) || (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR)
+
+#ifdef _WIN32
+#include "windows.h"
+#else
+/* Provides mmap function. */
+#include <sys/mman.h>
+/* For detecting the page size. */
+#include <unistd.h>
+
+#ifndef MAP_ANON
+
+#include <fcntl.h>
+
+/* Some old systems does not have MAP_ANON. */
+static sljit_si dev_zero = -1;
+
+#if (defined SLJIT_SINGLE_THREADED && SLJIT_SINGLE_THREADED)
+
+static SLJIT_INLINE sljit_si open_dev_zero(void)
+{
+       dev_zero = open("/dev/zero", O_RDWR);
+       return dev_zero < 0;
+}
+
+#else /* SLJIT_SINGLE_THREADED */
+
+#include <pthread.h>
+
+static pthread_mutex_t dev_zero_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static SLJIT_INLINE sljit_si open_dev_zero(void)
+{
+       pthread_mutex_lock(&dev_zero_mutex);
+       dev_zero = open("/dev/zero", O_RDWR);
+       pthread_mutex_unlock(&dev_zero_mutex);
+       return dev_zero < 0;
+}
+
+#endif /* SLJIT_SINGLE_THREADED */
+
+#endif
+
+#endif
+
+#endif /* SLJIT_UTIL_STACK || SLJIT_EXECUTABLE_ALLOCATOR */
+
+#if (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK)
+
+/* Planning to make it even more clever in the future. */
+static sljit_sw sljit_page_align = 0;
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_CALL sljit_allocate_stack(sljit_uw limit, sljit_uw max_limit, void *allocator_data)
+{
+       struct sljit_stack *stack;
+       union {
+               void *ptr;
+               sljit_uw uw;
+       } base;
+#ifdef _WIN32
+       SYSTEM_INFO si;
+#endif
+
+       SLJIT_UNUSED_ARG(allocator_data);
+       if (limit > max_limit || limit < 1)
+               return NULL;
+
+#ifdef _WIN32
+       if (!sljit_page_align) {
+               GetSystemInfo(&si);
+               sljit_page_align = si.dwPageSize - 1;
+       }
+#else
+       if (!sljit_page_align) {
+               sljit_page_align = sysconf(_SC_PAGESIZE);
+               /* Should never happen. */
+               if (sljit_page_align < 0)
+                       sljit_page_align = 4096;
+               sljit_page_align--;
+       }
+#endif
+
+       /* Align limit and max_limit. */
+       max_limit = (max_limit + sljit_page_align) & ~sljit_page_align;
+
+       stack = (struct sljit_stack*)SLJIT_MALLOC(sizeof(struct sljit_stack), allocator_data);
+       if (!stack)
+               return NULL;
+
+#ifdef _WIN32
+       base.ptr = VirtualAlloc(NULL, max_limit, MEM_RESERVE, PAGE_READWRITE);
+       if (!base.ptr) {
+               SLJIT_FREE(stack, allocator_data);
+               return NULL;
+       }
+       stack->base = base.uw;
+       stack->limit = stack->base;
+       stack->max_limit = stack->base + max_limit;
+       if (sljit_stack_resize(stack, stack->base + limit)) {
+               sljit_free_stack(stack, allocator_data);
+               return NULL;
+       }
+#else
+#ifdef MAP_ANON
+       base.ptr = mmap(NULL, max_limit, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+#else
+       if (dev_zero < 0) {
+               if (open_dev_zero()) {
+                       SLJIT_FREE(stack, allocator_data);
+                       return NULL;
+               }
+       }
+       base.ptr = mmap(NULL, max_limit, PROT_READ | PROT_WRITE, MAP_PRIVATE, dev_zero, 0);
+#endif
+       if (base.ptr == MAP_FAILED) {
+               SLJIT_FREE(stack, allocator_data);
+               return NULL;
+       }
+       stack->base = base.uw;
+       stack->limit = stack->base + limit;
+       stack->max_limit = stack->base + max_limit;
+#endif
+       stack->top = stack->base;
+       return stack;
+}
+
+#undef PAGE_ALIGN
+
+SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_free_stack(struct sljit_stack* stack, void *allocator_data)
+{
+       SLJIT_UNUSED_ARG(allocator_data);
+#ifdef _WIN32
+       VirtualFree((void*)stack->base, 0, MEM_RELEASE);
+#else
+       munmap((void*)stack->base, stack->max_limit - stack->base);
+#endif
+       SLJIT_FREE(stack, allocator_data);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_sw SLJIT_CALL sljit_stack_resize(struct sljit_stack* stack, sljit_uw new_limit)
+{
+       sljit_uw aligned_old_limit;
+       sljit_uw aligned_new_limit;
+
+       if ((new_limit > stack->max_limit) || (new_limit < stack->base))
+               return -1;
+#ifdef _WIN32
+       aligned_new_limit = (new_limit + sljit_page_align) & ~sljit_page_align;
+       aligned_old_limit = (stack->limit + sljit_page_align) & ~sljit_page_align;
+       if (aligned_new_limit != aligned_old_limit) {
+               if (aligned_new_limit > aligned_old_limit) {
+                       if (!VirtualAlloc((void*)aligned_old_limit, aligned_new_limit - aligned_old_limit, MEM_COMMIT, PAGE_READWRITE))
+                               return -1;
+               }
+               else {
+                       if (!VirtualFree((void*)aligned_new_limit, aligned_old_limit - aligned_new_limit, MEM_DECOMMIT))
+                               return -1;
+               }
+       }
+       stack->limit = new_limit;
+       return 0;
+#else
+       if (new_limit >= stack->limit) {
+               stack->limit = new_limit;
+               return 0;
+       }
+       aligned_new_limit = (new_limit + sljit_page_align) & ~sljit_page_align;
+       aligned_old_limit = (stack->limit + sljit_page_align) & ~sljit_page_align;
+       /* If madvise is available, we release the unnecessary space. */
+#if defined(MADV_DONTNEED)
+       if (aligned_new_limit < aligned_old_limit)
+               madvise((void*)aligned_new_limit, aligned_old_limit - aligned_new_limit, MADV_DONTNEED);
+#elif defined(POSIX_MADV_DONTNEED)
+       if (aligned_new_limit < aligned_old_limit)
+               posix_madvise((void*)aligned_new_limit, aligned_old_limit - aligned_new_limit, POSIX_MADV_DONTNEED);
+#endif
+       stack->limit = new_limit;
+       return 0;
+#endif
+}
+
+#endif /* SLJIT_UTIL_STACK */
+
+#endif