jq: Backport a bunch of commits in order to fix jq-1.6 terrible performance on startuplilik-openwrt-22.03
@ -0,0 +1,119 @@ | |||
From a949ffe9554b5af5614d31b795805f56939a031b Mon Sep 17 00:00:00 2001 | |||
From: Muh Muhten <muh.muhten@gmail.com> | |||
Date: Fri, 8 Feb 2019 16:52:04 -0500 | |||
Subject: [PATCH 1/9] Improve linking time by marking subtrees with unbound | |||
symbols | |||
--- | |||
src/compile.c | 29 +++++++++++++++++++++++++---- | |||
1 file changed, 25 insertions(+), 4 deletions(-) | |||
--- a/src/compile.c | |||
+++ b/src/compile.c | |||
@@ -49,9 +49,10 @@ struct inst { | |||
// Unbound instructions (references to other things that may or may not | |||
// exist) are created by "gen_foo_unbound", and bindings are created by | |||
// block_bind(definition, body), which binds all instructions in | |||
- // body which are unboudn and refer to "definition" by name. | |||
+ // body which are unbound and refer to "definition" by name. | |||
struct inst* bound_by; | |||
char* symbol; | |||
+ int any_unbound; | |||
int nformals; | |||
int nactuals; | |||
@@ -73,6 +74,7 @@ static inst* inst_new(opcode op) { | |||
i->bytecode_pos = -1; | |||
i->bound_by = 0; | |||
i->symbol = 0; | |||
+ i->any_unbound = 0; | |||
i->nformals = -1; | |||
i->nactuals = -1; | |||
i->subfn = gen_noop(); | |||
@@ -156,6 +158,7 @@ block gen_const_global(jv constant, cons | |||
inst* i = inst_new(STORE_GLOBAL); | |||
i->imm.constant = constant; | |||
i->symbol = strdup(name); | |||
+ i->any_unbound = 0; | |||
return inst_block(i); | |||
} | |||
@@ -211,6 +214,7 @@ block gen_op_unbound(opcode op, const ch | |||
assert(opcode_describe(op)->flags & OP_HAS_BINDING); | |||
inst* i = inst_new(op); | |||
i->symbol = strdup(name); | |||
+ i->any_unbound = 1; | |||
return inst_block(i); | |||
} | |||
@@ -224,6 +228,7 @@ block gen_op_bound(opcode op, block bind | |||
assert(block_is_single(binder)); | |||
block b = gen_op_unbound(op, binder.first->symbol); | |||
b.first->bound_by = binder.first; | |||
+ b.first->any_unbound = 0; | |||
return b; | |||
} | |||
@@ -324,7 +329,7 @@ static int block_count_refs(block binder | |||
return nrefs; | |||
} | |||
-static int block_bind_subblock(block binder, block body, int bindflags, int break_distance) { | |||
+static int block_bind_subblock_inner(int* any_unbound, block binder, block body, int bindflags, int break_distance) { | |||
assert(block_is_single(binder)); | |||
assert((opcode_describe(binder.first->op)->flags & bindflags) == (bindflags & ~OP_BIND_WILDCARD)); | |||
assert(binder.first->symbol); | |||
@@ -336,6 +341,9 @@ static int block_bind_subblock(block bin | |||
binder.first->nformals = block_count_formals(binder); | |||
int nrefs = 0; | |||
for (inst* i = body.first; i; i = i->next) { | |||
+ if (i->any_unbound == 0) | |||
+ continue; | |||
+ | |||
int flags = opcode_describe(i->op)->flags; | |||
if ((flags & bindflags) == (bindflags & ~OP_BIND_WILDCARD) && i->bound_by == 0 && | |||
(!strcmp(i->symbol, binder.first->symbol) || | |||
@@ -357,14 +365,25 @@ static int block_bind_subblock(block bin | |||
// a break whenever we come across a STOREV of *anonlabel... | |||
break_distance++; | |||
} | |||
+ | |||
+ i->any_unbound = (i->symbol && !i->bound_by); | |||
+ | |||
// binding recurses into closures | |||
- nrefs += block_bind_subblock(binder, i->subfn, bindflags, break_distance); | |||
+ nrefs += block_bind_subblock_inner(&i->any_unbound, binder, i->subfn, bindflags, break_distance); | |||
// binding recurses into argument list | |||
- nrefs += block_bind_subblock(binder, i->arglist, bindflags, break_distance); | |||
+ nrefs += block_bind_subblock_inner(&i->any_unbound, binder, i->arglist, bindflags, break_distance); | |||
+ | |||
+ if (i->any_unbound) | |||
+ *any_unbound = 1; | |||
} | |||
return nrefs; | |||
} | |||
+static int block_bind_subblock(block binder, block body, int bindflags, int break_distance) { | |||
+ int any_unbound; | |||
+ return block_bind_subblock_inner(&any_unbound, binder, body, bindflags, break_distance); | |||
+} | |||
+ | |||
static int block_bind_each(block binder, block body, int bindflags) { | |||
assert(block_has_only_binders(binder, bindflags)); | |||
bindflags |= OP_HAS_BINDING; | |||
@@ -550,6 +569,7 @@ block gen_function(const char* name, blo | |||
} | |||
i->subfn = body; | |||
i->symbol = strdup(name); | |||
+ i->any_unbound = -1; | |||
i->arglist = formals; | |||
block b = inst_block(i); | |||
block_bind_subblock(b, b, OP_IS_CALL_PSEUDO | OP_HAS_BINDING, 0); | |||
@@ -1081,6 +1101,7 @@ block gen_cbinding(const struct cfunctio | |||
inst* i = inst_new(CLOSURE_CREATE_C); | |||
i->imm.cfunc = &cfunctions[cfunc]; | |||
i->symbol = strdup(i->imm.cfunc->name); | |||
+ i->any_unbound = 0; | |||
code = block_bind(inst_block(i), code, OP_IS_CALL_PSEUDO); | |||
} | |||
return code; |
@ -0,0 +1,85 @@ | |||
From aab54373e9406ee2a154b8d6166b3045aa3484ee Mon Sep 17 00:00:00 2001 | |||
From: Muh Muhten <muh.muhten@gmail.com> | |||
Date: Sat, 9 Feb 2019 17:24:18 -0500 | |||
Subject: [PATCH 2/9] Reimplement block_drop_unreferenced in linear time | |||
--- | |||
src/compile.c | 50 +++++++++++++++++++++++++++++--------------------- | |||
1 file changed, 29 insertions(+), 21 deletions(-) | |||
--- a/src/compile.c | |||
+++ b/src/compile.c | |||
@@ -53,6 +53,7 @@ struct inst { | |||
struct inst* bound_by; | |||
char* symbol; | |||
int any_unbound; | |||
+ int referenced; | |||
int nformals; | |||
int nactuals; | |||
@@ -75,6 +76,7 @@ static inst* inst_new(opcode op) { | |||
i->bound_by = 0; | |||
i->symbol = 0; | |||
i->any_unbound = 0; | |||
+ i->referenced = 0; | |||
i->nformals = -1; | |||
i->nactuals = -1; | |||
i->subfn = gen_noop(); | |||
@@ -465,30 +467,36 @@ block block_bind_referenced(block binder | |||
return block_join(refd, body); | |||
} | |||
+static void block_mark_referenced(block body) { | |||
+ int saw_top = 0; | |||
+ for (inst* i = body.last; i; i = i->prev) { | |||
+ if (saw_top && i->bound_by == i && !i->referenced) | |||
+ continue; | |||
+ if (i->op == TOP) { | |||
+ saw_top = 1; | |||
+ } | |||
+ if (i->bound_by) { | |||
+ i->bound_by->referenced = 1; | |||
+ } | |||
+ | |||
+ block_mark_referenced(i->arglist); | |||
+ block_mark_referenced(i->subfn); | |||
+ } | |||
+} | |||
+ | |||
block block_drop_unreferenced(block body) { | |||
- inst* curr; | |||
+ block_mark_referenced(body); | |||
+ | |||
block refd = gen_noop(); | |||
- block unrefd = gen_noop(); | |||
- int drop; | |||
- do { | |||
- drop = 0; | |||
- while ((curr = block_take(&body)) && curr->op != TOP) { | |||
- block b = inst_block(curr); | |||
- if (block_count_refs(b,refd) + block_count_refs(b,body) == 0) { | |||
- unrefd = BLOCK(unrefd, b); | |||
- drop++; | |||
- } else { | |||
- refd = BLOCK(refd, b); | |||
- } | |||
- } | |||
- if (curr && curr->op == TOP) { | |||
- body = BLOCK(inst_block(curr),body); | |||
+ inst* curr; | |||
+ while ((curr = block_take(&body))) { | |||
+ if (curr->bound_by == curr && !curr->referenced) { | |||
+ inst_free(curr); | |||
+ } else { | |||
+ refd = BLOCK(inst_block(curr), refd); | |||
} | |||
- body = BLOCK(refd, body); | |||
- refd = gen_noop(); | |||
- } while (drop != 0); | |||
- block_free(unrefd); | |||
- return body; | |||
+ } | |||
+ return refd; | |||
} | |||
jv block_take_imports(block* body) { |
@ -0,0 +1,78 @@ | |||
From e6676ebbd2ab0a6283d96c797dbe93552c1a222c Mon Sep 17 00:00:00 2001 | |||
From: Muh Muhten <muh.muhten@gmail.com> | |||
Date: Mon, 18 Feb 2019 21:00:59 -0500 | |||
Subject: [PATCH 3/9] Simplify definition of block_bind_referenced | |||
--- | |||
src/compile.c | 49 ++++++++----------------------------------------- | |||
1 file changed, 8 insertions(+), 41 deletions(-) | |||
--- a/src/compile.c | |||
+++ b/src/compile.c | |||
@@ -317,20 +317,6 @@ static int block_count_actuals(block b) | |||
return args; | |||
} | |||
-static int block_count_refs(block binder, block body) { | |||
- int nrefs = 0; | |||
- for (inst* i = body.first; i; i = i->next) { | |||
- if (i != binder.first && i->bound_by == binder.first) { | |||
- nrefs++; | |||
- } | |||
- // counting recurses into closures | |||
- nrefs += block_count_refs(binder, i->subfn); | |||
- // counting recurses into argument list | |||
- nrefs += block_count_refs(binder, i->arglist); | |||
- } | |||
- return nrefs; | |||
-} | |||
- | |||
static int block_bind_subblock_inner(int* any_unbound, block binder, block body, int bindflags, int break_distance) { | |||
assert(block_is_single(binder)); | |||
assert((opcode_describe(binder.first->op)->flags & bindflags) == (bindflags & ~OP_BIND_WILDCARD)); | |||
@@ -434,37 +420,18 @@ block block_bind_library(block binder, b | |||
return body; // We don't return a join because we don't want those sticking around... | |||
} | |||
-// Bind binder to body and throw away any defs in binder not referenced | |||
-// (directly or indirectly) from body. | |||
+// Bind binder to body, then throw it away if not referenced. | |||
block block_bind_referenced(block binder, block body, int bindflags) { | |||
+ assert(block_is_single(binder)); | |||
assert(block_has_only_binders(binder, bindflags)); | |||
bindflags |= OP_HAS_BINDING; | |||
- block refd = gen_noop(); | |||
- block unrefd = gen_noop(); | |||
- int nrefs; | |||
- for (int last_kept = 0, kept = 0; ; ) { | |||
- for (inst* curr; (curr = block_take(&binder));) { | |||
- block b = inst_block(curr); | |||
- nrefs = block_bind_each(b, body, bindflags); | |||
- // Check if this binder is referenced from any of the ones we | |||
- // already know are referenced by body. | |||
- nrefs += block_count_refs(b, refd); | |||
- nrefs += block_count_refs(b, body); | |||
- if (nrefs) { | |||
- refd = BLOCK(refd, b); | |||
- kept++; | |||
- } else { | |||
- unrefd = BLOCK(unrefd, b); | |||
- } | |||
- } | |||
- if (kept == last_kept) | |||
- break; | |||
- last_kept = kept; | |||
- binder = unrefd; | |||
- unrefd = gen_noop(); | |||
+ | |||
+ if (block_bind_subblock(binder, body, bindflags, 0) == 0) { | |||
+ block_free(binder); | |||
+ } else { | |||
+ body = BLOCK(binder, body); | |||
} | |||
- block_free(unrefd); | |||
- return block_join(refd, body); | |||
+ return body; | |||
} | |||
static void block_mark_referenced(block body) { |
@ -0,0 +1,40 @@ | |||
From 2e3dbb884199bba6cc07345f6d394f1ac53465ac Mon Sep 17 00:00:00 2001 | |||
From: Muh Muhten <muh.muhten@gmail.com> | |||
Date: Tue, 19 Feb 2019 00:34:04 -0500 | |||
Subject: [PATCH 4/9] Pass on the error message when rel_path is invalid | |||
"Module path must be a string" is not a useful error message when the | |||
reason the module path isn't a string is because the string it was got | |||
replaced with an invalid with an error message for some other reason. | |||
Also fixes a few memory leaks on early exits. | |||
--- | |||
src/linker.c | 16 +++++++++++++--- | |||
1 file changed, 13 insertions(+), 3 deletions(-) | |||
--- a/src/linker.c | |||
+++ b/src/linker.c | |||
@@ -138,10 +138,20 @@ static jv jv_basename(jv name) { | |||
// Asummes validated relative path to module | |||
static jv find_lib(jq_state *jq, jv rel_path, jv search, const char *suffix, jv jq_origin, jv lib_origin) { | |||
- if (jv_get_kind(search) != JV_KIND_ARRAY) | |||
- return jv_invalid_with_msg(jv_string_fmt("Module search path must be an array")); | |||
- if (jv_get_kind(rel_path) != JV_KIND_STRING) | |||
+ if (!jv_is_valid(rel_path)) { | |||
+ jv_free(search); | |||
+ return rel_path; | |||
+ } | |||
+ if (jv_get_kind(rel_path) != JV_KIND_STRING) { | |||
+ jv_free(rel_path); | |||
+ jv_free(search); | |||
return jv_invalid_with_msg(jv_string_fmt("Module path must be a string")); | |||
+ } | |||
+ if (jv_get_kind(search) != JV_KIND_ARRAY) { | |||
+ jv_free(rel_path); | |||
+ jv_free(search); | |||
+ return jv_invalid_with_msg(jv_string_fmt("Module search path must be an array")); | |||
+ } | |||
struct stat st; | |||
int ret; |
@ -0,0 +1,50 @@ | |||
From d0fe86177427e0c3bc2cec1436d74472e4b618dd Mon Sep 17 00:00:00 2001 | |||
From: Muh Muhten <muh.muhten@gmail.com> | |||
Date: Tue, 19 Feb 2019 00:35:40 -0500 | |||
Subject: [PATCH 5/9] Catch .. as the first component of a module path | |||
Only the second and subsequent path components were being checked, which | |||
I guess is theoretically security-relevant. | |||
There's no apparent point to reconstructing the path after splitting it | |||
by adding /s back in, either. | |||
--- | |||
src/linker.c | 9 ++------- | |||
1 file changed, 2 insertions(+), 7 deletions(-) | |||
--- a/src/linker.c | |||
+++ b/src/linker.c | |||
@@ -98,12 +98,9 @@ static jv validate_relpath(jv name) { | |||
return res; | |||
} | |||
jv components = jv_string_split(jv_copy(name), jv_string("/")); | |||
- jv rp = jv_array_get(jv_copy(components), 0); | |||
- components = jv_array_slice(components, 1, jv_array_length(jv_copy(components))); | |||
jv_array_foreach(components, i, x) { | |||
if (!strcmp(jv_string_value(x), "..")) { | |||
jv_free(x); | |||
- jv_free(rp); | |||
jv_free(components); | |||
jv res = jv_invalid_with_msg(jv_string_fmt("Relative paths to modules may not traverse to parent directories (%s)", s)); | |||
jv_free(name); | |||
@@ -111,18 +108,16 @@ static jv validate_relpath(jv name) { | |||
} | |||
if (i > 0 && jv_equal(jv_copy(x), jv_array_get(jv_copy(components), i - 1))) { | |||
jv_free(x); | |||
- jv_free(rp); | |||
jv_free(components); | |||
jv res = jv_invalid_with_msg(jv_string_fmt("module names must not have equal consecutive components: %s", | |||
jv_string_value(name))); | |||
jv_free(name); | |||
return res; | |||
} | |||
- rp = jv_string_concat(rp, jv_string_concat(jv_string("/"), x)); | |||
+ jv_free(x); | |||
} | |||
jv_free(components); | |||
- jv_free(name); | |||
- return rp; | |||
+ return name; | |||
} | |||
// Assumes name has been validated |
@ -0,0 +1,37 @@ | |||
From a114b871e460ef2ddcf7698bc6b18651c976626a Mon Sep 17 00:00:00 2001 | |||
From: Muh Muhten <muh.muhten@gmail.com> | |||
Date: Tue, 19 Feb 2019 00:14:53 -0500 | |||
Subject: [PATCH 6/9] Replace TOP-before-imports special case with assert | |||
The case isn't actually possible afaict. | |||
--- | |||
src/compile.c | 11 ++++------- | |||
1 file changed, 4 insertions(+), 7 deletions(-) | |||
--- a/src/compile.c | |||
+++ b/src/compile.c | |||
@@ -469,10 +469,10 @@ block block_drop_unreferenced(block body | |||
jv block_take_imports(block* body) { | |||
jv imports = jv_array(); | |||
- inst* top = NULL; | |||
- if (body->first && body->first->op == TOP) { | |||
- top = block_take(body); | |||
- } | |||
+ /* Parser should never generate TOP before imports */ | |||
+ assert(!(body->first && body->first->op == TOP && body->first->next && | |||
+ (body->first->next->op == MODULEMETA || body->first->next->op == DEPS))); | |||
+ | |||
while (body->first && (body->first->op == MODULEMETA || body->first->op == DEPS)) { | |||
inst* dep = block_take(body); | |||
if (dep->op == DEPS) { | |||
@@ -480,9 +480,6 @@ jv block_take_imports(block* body) { | |||
} | |||
inst_free(dep); | |||
} | |||
- if (top) { | |||
- *body = block_join(inst_block(top),*body); | |||
- } | |||
return imports; | |||
} | |||
@ -0,0 +1,39 @@ | |||
From 90b92d8c73446bb50eee14ca8d88c5224002001a Mon Sep 17 00:00:00 2001 | |||
From: Muh Muhten <muh.muhten@gmail.com> | |||
Date: Sun, 15 Oct 2017 01:57:17 -0400 | |||
Subject: [PATCH 7/9] Add import metadata key "optional" | |||
A library marked is imported if found, but silently skipped if missing. | |||
This is the desired semantic for the auto-include at ~/.jq | |||
--- | |||
src/linker.c | 9 ++++++++- | |||
1 file changed, 8 insertions(+), 1 deletion(-) | |||
--- a/src/linker.c | |||
+++ b/src/linker.c | |||
@@ -246,6 +246,9 @@ static int process_dependencies(jq_state | |||
jv v = jv_object_get(jv_copy(dep), jv_string("raw")); | |||
if (jv_get_kind(v) == JV_KIND_TRUE) | |||
raw = 1; | |||
+ int optional = 0; | |||
+ if (jv_get_kind(jv_object_get(jv_copy(dep), jv_string("optional"))) == JV_KIND_TRUE) | |||
+ optional = 1; | |||
jv_free(v); | |||
jv relpath = validate_relpath(jv_object_get(jv_copy(dep), jv_string("relpath"))); | |||
jv as = jv_object_get(jv_copy(dep), jv_string("as")); | |||
@@ -259,10 +262,14 @@ static int process_dependencies(jq_state | |||
jv resolved = find_lib(jq, relpath, search, is_data ? ".json" : ".jq", jv_copy(jq_origin), jv_copy(lib_origin)); | |||
// XXX ...move the rest of this into a callback. | |||
if (!jv_is_valid(resolved)) { | |||
+ jv_free(as); | |||
+ if (optional) { | |||
+ jv_free(resolved); | |||
+ continue; | |||
+ } | |||
jv emsg = jv_invalid_get_msg(resolved); | |||
jq_report_error(jq, jv_string_fmt("jq: error: %s\n",jv_string_value(emsg))); | |||
jv_free(emsg); | |||
- jv_free(as); | |||
jv_free(deps); | |||
jv_free(jq_origin); | |||
jv_free(lib_origin); |
@ -0,0 +1,99 @@ | |||
From 4c5a08b9e01ebfce5c8914dd82c1722737bbecab Mon Sep 17 00:00:00 2001 | |||
From: Muh Muhten <muh.muhten@gmail.com> | |||
Date: Tue, 19 Feb 2019 00:39:34 -0500 | |||
Subject: [PATCH 8/9] Load ~/.jq as a library instead of with builtins | |||
Remove the special code which loads ~/.jq in builtin.c, and instead glue | |||
an optional include which points to the same file onto the main program | |||
in linker.c. | |||
Fixes a minor bug where errors in ~/.jq would be labelled <builtin>. | |||
--- | |||
src/builtin.c | 44 +++++++------------------------------------- | |||
src/linker.c | 10 ++++++++++ | |||
2 files changed, 17 insertions(+), 37 deletions(-) | |||
--- a/src/builtin.c | |||
+++ b/src/builtin.c | |||
@@ -1706,9 +1706,7 @@ static block bind_bytecoded_builtins(blo | |||
return block_bind(builtins, b, OP_IS_CALL_PSEUDO); | |||
} | |||
- | |||
- | |||
-static const char* const jq_builtins = | |||
+static const char jq_builtins[] = | |||
/* Include jq-coded builtins */ | |||
#include "src/builtin.inc" | |||
@@ -1744,45 +1742,17 @@ static block gen_builtin_list(block buil | |||
return BLOCK(builtins, gen_function("builtins", gen_noop(), gen_const(list))); | |||
} | |||
-static int builtins_bind_one(jq_state *jq, block* bb, const char* code) { | |||
- struct locfile* src; | |||
- src = locfile_init(jq, "<builtin>", code, strlen(code)); | |||
- block funcs; | |||
- int nerrors = jq_parse_library(src, &funcs); | |||
- if (nerrors == 0) { | |||
- *bb = block_bind(funcs, *bb, OP_IS_CALL_PSEUDO); | |||
- } | |||
- locfile_free(src); | |||
- return nerrors; | |||
-} | |||
- | |||
-static int slurp_lib(jq_state *jq, block* bb) { | |||
- int nerrors = 0; | |||
- char* home = getenv("HOME"); | |||
- if (home) { // silently ignore no $HOME | |||
- jv filename = jv_string_append_str(jv_string(home), "/.jq"); | |||
- jv data = jv_load_file(jv_string_value(filename), 1); | |||
- if (jv_is_valid(data)) { | |||
- nerrors = builtins_bind_one(jq, bb, jv_string_value(data) ); | |||
- } | |||
- jv_free(filename); | |||
- jv_free(data); | |||
- } | |||
- return nerrors; | |||
-} | |||
- | |||
int builtins_bind(jq_state *jq, block* bb) { | |||
- block builtins = gen_noop(); | |||
- int nerrors = slurp_lib(jq, bb); | |||
- if (nerrors) { | |||
- block_free(*bb); | |||
- return nerrors; | |||
- } | |||
- nerrors = builtins_bind_one(jq, &builtins, jq_builtins); | |||
+ block builtins; | |||
+ struct locfile* src = locfile_init(jq, "<builtin>", jq_builtins, sizeof(jq_builtins)-1); | |||
+ int nerrors = jq_parse_library(src, &builtins); | |||
assert(!nerrors); | |||
+ locfile_free(src); | |||
+ | |||
builtins = bind_bytecoded_builtins(builtins); | |||
builtins = gen_cbinding(function_list, sizeof(function_list)/sizeof(function_list[0]), builtins); | |||
builtins = gen_builtin_list(builtins); | |||
+ | |||
*bb = block_bind(builtins, *bb, OP_IS_CALL_PSEUDO); | |||
*bb = block_drop_unreferenced(*bb); | |||
return nerrors; | |||
--- a/src/linker.c | |||
+++ b/src/linker.c | |||
@@ -387,6 +387,16 @@ int load_program(jq_state *jq, struct lo | |||
if (nerrors) | |||
return nerrors; | |||
+ char* home = getenv("HOME"); | |||
+ if (home) { // silently ignore no $HOME | |||
+ /* Import ~/.jq as a library named "" found in $HOME */ | |||
+ block import = gen_import_meta(gen_import("", NULL, 0), | |||
+ gen_const(JV_OBJECT( | |||
+ jv_string("optional"), jv_true(), | |||
+ jv_string("search"), jv_string(home)))); | |||
+ program = BLOCK(import, program); | |||
+ } | |||
+ | |||
nerrors = process_dependencies(jq, jq_get_jq_origin(jq), jq_get_prog_origin(jq), &program, &lib_state); | |||
block libs = gen_noop(); | |||
for (uint64_t i = 0; i < lib_state.ct; ++i) { |
@ -0,0 +1,170 @@ | |||
From 916c12fb593005771a6ce098f5a7da4dec0051d1 Mon Sep 17 00:00:00 2001 | |||
From: Muh Muhten <muh.muhten@gmail.com> | |||
Date: Wed, 20 Feb 2019 01:48:56 -0500 | |||
Subject: [PATCH 9/9] Make builtin binding fast again by binding only | |||
referenced symbols | |||
Avoid doing the internal binding of top-level symbols in the parser, | |||
leaving that work to be done in a post-processing step. For builtins, | |||
this lets us do a reference-aware bind step (block_bind_incremental) | |||
*after* generating builtins/0. | |||
Libraries are a bit trickier since they may be bound multiple times, so | |||
instead of thinking through the implications I added (block_bind_self) | |||
to resolve all internal symbols immediately. | |||
--- | |||
src/builtin.c | 4 ++-- | |||
src/compile.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++---- | |||
src/compile.h | 3 ++- | |||
src/linker.c | 1 + | |||
src/parser.c | 2 +- | |||
src/parser.y | 2 +- | |||
6 files changed, 54 insertions(+), 9 deletions(-) | |||
--- a/src/builtin.c | |||
+++ b/src/builtin.c | |||
@@ -1703,7 +1703,7 @@ static block bind_bytecoded_builtins(blo | |||
BLOCK(gen_param("start"), gen_param("end")), | |||
range)); | |||
} | |||
- return block_bind(builtins, b, OP_IS_CALL_PSEUDO); | |||
+ return BLOCK(builtins, b); | |||
} | |||
static const char jq_builtins[] = | |||
@@ -1753,7 +1753,7 @@ int builtins_bind(jq_state *jq, block* b | |||
builtins = gen_cbinding(function_list, sizeof(function_list)/sizeof(function_list[0]), builtins); | |||
builtins = gen_builtin_list(builtins); | |||
- *bb = block_bind(builtins, *bb, OP_IS_CALL_PSEUDO); | |||
+ *bb = block_bind_incremental(builtins, *bb, OP_IS_CALL_PSEUDO); | |||
*bb = block_drop_unreferenced(*bb); | |||
return nerrors; | |||
} | |||
--- a/src/compile.c | |||
+++ b/src/compile.c | |||
@@ -222,8 +222,9 @@ block gen_op_unbound(opcode op, const ch | |||
block gen_op_var_fresh(opcode op, const char* name) { | |||
assert(opcode_describe(op)->flags & OP_HAS_VARIABLE); | |||
- return block_bind(gen_op_unbound(op, name), | |||
- gen_noop(), OP_HAS_VARIABLE); | |||
+ block b = gen_op_unbound(op, name); | |||
+ b.first->bound_by = b.first; | |||
+ return b; | |||
} | |||
block gen_op_bound(opcode op, block binder) { | |||
@@ -382,7 +383,7 @@ static int block_bind_each(block binder, | |||
return nrefs; | |||
} | |||
-block block_bind(block binder, block body, int bindflags) { | |||
+static block block_bind(block binder, block body, int bindflags) { | |||
block_bind_each(binder, body, bindflags); | |||
return block_join(binder, body); | |||
} | |||
@@ -434,6 +435,48 @@ block block_bind_referenced(block binder | |||
return body; | |||
} | |||
+static inst* block_take_last(block* b) { | |||
+ inst* i = b->last; | |||
+ if (i == 0) | |||
+ return 0; | |||
+ if (i->prev) { | |||
+ i->prev->next = i->next; | |||
+ b->last = i->prev; | |||
+ i->prev = 0; | |||
+ } else { | |||
+ b->first = 0; | |||
+ b->last = 0; | |||
+ } | |||
+ return i; | |||
+} | |||
+ | |||
+// Binds a sequence of binders, which *must not* alrady be bound to each other, | |||
+// to body, throwing away unreferenced defs | |||
+block block_bind_incremental(block binder, block body, int bindflags) { | |||
+ assert(block_has_only_binders(binder, bindflags)); | |||
+ bindflags |= OP_HAS_BINDING; | |||
+ | |||
+ inst* curr; | |||
+ while ((curr = block_take_last(&binder))) { | |||
+ body = block_bind_referenced(inst_block(curr), body, bindflags); | |||
+ } | |||
+ return body; | |||
+} | |||
+ | |||
+block block_bind_self(block binder, int bindflags) { | |||
+ assert(block_has_only_binders(binder, bindflags)); | |||
+ bindflags |= OP_HAS_BINDING; | |||
+ block body = gen_noop(); | |||
+ | |||
+ inst* curr; | |||
+ while ((curr = block_take_last(&binder))) { | |||
+ block b = inst_block(curr); | |||
+ block_bind_subblock(b, body, bindflags, 0); | |||
+ body = BLOCK(b, body); | |||
+ } | |||
+ return body; | |||
+} | |||
+ | |||
static void block_mark_referenced(block body) { | |||
int saw_top = 0; | |||
for (inst* i = body.last; i; i = i->prev) { | |||
@@ -1074,7 +1117,7 @@ block gen_cbinding(const struct cfunctio | |||
i->imm.cfunc = &cfunctions[cfunc]; | |||
i->symbol = strdup(i->imm.cfunc->name); | |||
i->any_unbound = 0; | |||
- code = block_bind(inst_block(i), code, OP_IS_CALL_PSEUDO); | |||
+ code = BLOCK(inst_block(i), code); | |||
} | |||
return code; | |||
} | |||
--- a/src/compile.h | |||
+++ b/src/compile.h | |||
@@ -72,9 +72,10 @@ int block_has_only_binders(block, int bi | |||
int block_has_main(block); | |||
int block_is_funcdef(block b); | |||
int block_is_single(block b); | |||
-block block_bind(block binder, block body, int bindflags); | |||
block block_bind_library(block binder, block body, int bindflags, const char* libname); | |||
block block_bind_referenced(block binder, block body, int bindflags); | |||
+block block_bind_incremental(block binder, block body, int bindflags); | |||
+block block_bind_self(block binder, int bindflags); | |||
block block_drop_unreferenced(block body); | |||
jv block_take_imports(block* body); | |||
--- a/src/linker.c | |||
+++ b/src/linker.c | |||
@@ -336,6 +336,7 @@ static int load_library(jq_state *jq, jv | |||
jv_string(dirname(lib_origin)), | |||
&program, lib_state); | |||
free(lib_origin); | |||
+ program = block_bind_self(program, OP_IS_CALL_PSEUDO); | |||
} | |||
} | |||
state_idx = lib_state->ct++; | |||
--- a/src/parser.c | |||
+++ b/src/parser.c | |||
@@ -2425,7 +2425,7 @@ yyreduce: | |||
case 9: | |||
#line 333 "src/parser.y" /* yacc.c:1646 */ | |||
{ | |||
- (yyval.blk) = block_bind((yyvsp[-1].blk), (yyvsp[0].blk), OP_IS_CALL_PSEUDO); | |||
+ (yyval.blk) = block_join((yyvsp[-1].blk), (yyvsp[0].blk)); | |||
} | |||
#line 2431 "src/parser.c" /* yacc.c:1646 */ | |||
break; | |||
--- a/src/parser.y | |||
+++ b/src/parser.y | |||
@@ -331,7 +331,7 @@ FuncDefs: | |||
$$ = gen_noop(); | |||
} | | |||
FuncDef FuncDefs { | |||
- $$ = block_bind($1, $2, OP_IS_CALL_PSEUDO); | |||
+ $$ = block_join($1, $2); | |||
} | |||
Exp: |