bison -W -d parser.y -v --report-file=parser.info
parser.tab.h: parser.tab.c
-parsertest: parser.tab.c lexer.yy.c main.c opcode.c bytecode.c compile.c execute.c builtin.c jv.c jv_parse.c jv_print.c jv_dtoa.c
+parsertest: parser.tab.c lexer.yy.c main.c opcode.c bytecode.c compile.c execute.c builtin.c jv.c jv_parse.c jv_print.c jv_dtoa.c jv_unicode.c
$(CC) -o $@ $^
jv_test: jv_test.c jv.c jv_print.c jv_dtoa.c
#include "jv.h"
#include "jv_dtoa.h"
#include "jv_parse.h"
+#include "jv_unicode.h"
typedef const char* presult;
return r;
}
-static int utf8_encode(int codepoint, char* out) {
- assert(codepoint >= 0 && codepoint <= 0x10FFFF);
- char* start = out;
- if (codepoint <= 0x7F) {
- *out++ = codepoint;
- } else if (codepoint <= 0x7FF) {
- *out++ = 0xC0 + ((codepoint & 0x7C0) >> 6);
- *out++ = 0x80 + ((codepoint & 0x03F));
- } else if(codepoint <= 0xFFFF) {
- *out++ = 0xE0 + ((codepoint & 0xF000) >> 12);
- *out++ = 0x80 + ((codepoint & 0x0FC0) >> 6);
- *out++ = 0x80 + ((codepoint & 0x003F));
- } else {
- *out++ = 0xF0 + ((codepoint & 0x1C0000) >> 18);
- *out++ = 0x80 + ((codepoint & 0x03F000) >> 12);
- *out++ = 0x80 + ((codepoint & 0x000FC0) >> 6);
- *out++ = 0x80 + ((codepoint & 0x00003F));
- }
- return out - start;
-}
-
static pfunc found_string(struct jv_parser* p) {
char* in = p->tokenbuf;
char* out = p->tokenbuf;
|(surrogate - 0xDC00));
}
// FIXME assert valid codepoint
- out += utf8_encode(codepoint, out);
+ out += jvp_utf8_encode(codepoint, out);
break;
default:
--- /dev/null
+#include <assert.h>
+#include "jv_unicode.h"
+int jvp_utf8_encode_length(int codepoint) {
+ if (codepoint <= 0x7F) return 1;
+ else if (codepoint <= 0x7FF) return 2;
+ else if (codepoint <= 0xFFFF) return 3;
+ else return 4;
+}
+
+int jvp_utf8_encode(int codepoint, char* out) {
+ assert(codepoint >= 0 && codepoint <= 0x10FFFF);
+ char* start = out;
+ if (codepoint <= 0x7F) {
+ *out++ = codepoint;
+ } else if (codepoint <= 0x7FF) {
+ *out++ = 0xC0 + ((codepoint & 0x7C0) >> 6);
+ *out++ = 0x80 + ((codepoint & 0x03F));
+ } else if(codepoint <= 0xFFFF) {
+ *out++ = 0xE0 + ((codepoint & 0xF000) >> 12);
+ *out++ = 0x80 + ((codepoint & 0x0FC0) >> 6);
+ *out++ = 0x80 + ((codepoint & 0x003F));
+ } else {
+ *out++ = 0xF0 + ((codepoint & 0x1C0000) >> 18);
+ *out++ = 0x80 + ((codepoint & 0x03F000) >> 12);
+ *out++ = 0x80 + ((codepoint & 0x000FC0) >> 6);
+ *out++ = 0x80 + ((codepoint & 0x00003F));
+ }
+ assert(out - start == jvp_utf8_encode_length(codepoint));
+ return out - start;
+}
--- /dev/null
+#ifndef JV_UNICODE_H
+#define JV_UNICODE_H
+int jvp_utf8_decode_length(char startchar);
+
+int jvp_utf8_encode_length(int codepoint);
+int jvp_utf8_encode(int codepoint, char* out);
+#endif