Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion vm/ByteCodeTranslator/spotbugs-exclude.xml
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,12 @@
<Bug pattern="EQ_GETCLASS_AND_CLASS_CONSTANT" />
</Match>

<!-- Parser ingests input files and uses the same CLI-exit pattern. -->
<!--
Parser ingests input files and uses the same CLI-exit pattern. The native-symbol
and name->class indexes are lazily built static caches; they are safe here because
the translator runs single-threaded across one translation run (same rationale as
the ST_WRITE_TO_STATIC_FROM_INSTANCE_METHOD exclusions above).
-->
<Match>
<Class name="~com\.codename1\.tools\.translator\.Parser(\$.*)?" />
<Or>
Expand All @@ -111,6 +116,8 @@
<Bug pattern="OS_OPEN_STREAM_EXCEPTION_PATH" />
<Bug pattern="OBL_UNSATISFIED_OBLIGATION_EXCEPTION_EDGE" />
<Bug pattern="DLS_DEAD_LOCAL_STORE" />
<Bug pattern="LI_LAZY_INIT_STATIC" />
<Bug pattern="LI_LAZY_INIT_UPDATE_STATIC" />
</Or>
</Match>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -411,12 +411,10 @@ public static List<ByteCodeClass> clearUnmarked(List<ByteCodeClass> lst) {
}

private ByteCodeClass findClass(String s, List<ByteCodeClass> lst) {
for(ByteCodeClass c : lst) {
if(c.clsName.equals(s)) {
return c;
}
}
return null;
// lst is always Parser.classes here (markDependencies -> markDependent), so
// the shared name index gives the same first-match result in O(1) instead of
// the old O(N) scan that ran per dependency per class during marking.
return Parser.getClassObject(s);
}

public void updateAllDependencies() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -587,25 +587,21 @@ public boolean isMethodUsedByNative(String[] nativeSources, ByteCodeClass cls) {



// check native code
// check native code: O(|symbol|) lookups against the inverted index instead
// of an O(native_bytes) substring scan per method. Semantics are identical --
// the index answers "is X a substring of the native source text".
StringBuilder b = new StringBuilder();
this.appendFunctionPointer(b);
String str = b.toString();
boolean foundClassName = false;
for(String s : nativeSources) {
if (cls != null && !foundClassName && s.contains(clsName)) {
// For later we record whether the class is used.
foundClassName = true;
}
if(s.contains(str)) {
usedByNative = true;
if (cls != null) {
cls.setUsedByNative(true);
}
return true;
NativeSymbolIndex idx = Parser.getNativeSymbolIndex(nativeSources);
if (idx.contains(str)) {
usedByNative = true;
if (cls != null) {
cls.setUsedByNative(true);
}
return true;
}
if (!foundClassName && cls != null) {
if (cls != null && !idx.contains(clsName)) {
// We didn't find the class at all.
// Let's record that as it will save us time
// when looking up other methods in this class.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
package com.codename1.tools.translator;

import java.util.HashMap;
import java.util.HashSet;

/**
* Inverts the "is this symbol referenced by native code" scan.
*
* The old code did, per Java method/class, a full substring scan of every
* native source string -- O(methods x native_bytes). Native symbols are CN1
* mangled identifiers ([A-Za-z0-9_]+), and a query string X matches the native
* text iff X is a substring of some maximal identifier token in it. So we:
* 1. tokenize all native sources into the DISTINCT set of identifier tokens
* (this dedups repeated symbols across files, bounding the structure),
* 2. build a suffix automaton over those tokens (joined by '\n' so a query
* can never span two tokens), giving O(|X|) substring membership.
*
* Result: native cost becomes O(native_bytes) once + O(|X|) per query, instead
* of O(methods x native_bytes). Semantics are identical to String.contains over
* the raw native text because queries are themselves delimiter-free identifiers.
*/
final class NativeSymbolIndex {
private int[] len;
private int[] link;
private HashMap<Character, Integer>[] next;
private int last;
private int sz;

@SuppressWarnings("unchecked")
NativeSymbolIndex(String[] nativeSources) {
HashSet<String> tokens = new HashSet<String>();
if (nativeSources != null) {
for (String s : nativeSources) {
if (s == null) {
continue;
}
int n = s.length();
int i = 0;
while (i < n) {
if (isIdent(s.charAt(i))) {
int j = i + 1;
while (j < n && isIdent(s.charAt(j))) {
j++;
}
tokens.add(s.substring(i, j));
i = j;
} else {
i++;
}
}
}
}
StringBuilder sb = new StringBuilder();
for (String t : tokens) {
sb.append(t).append('\n');
}
String text = sb.toString();

int cap = 2 * Math.max(1, text.length()) + 5;
len = new int[cap];
link = new int[cap];
next = new HashMap[cap];
sz = 0;
last = newState(0, -1);
for (int i = 0; i < text.length(); i++) {
extend(text.charAt(i));
}
}

private int newState(int l, int lnk) {
len[sz] = l;
link[sz] = lnk;
next[sz] = new HashMap<Character, Integer>();
return sz++;
}

private void extend(char c) {
int cur = newState(len[last] + 1, -1);
int p = last;
while (p != -1 && !next[p].containsKey(c)) {
next[p].put(c, cur);
p = link[p];
}
if (p == -1) {
link[cur] = 0;
} else {
int q = next[p].get(c);
if (len[p] + 1 == len[q]) {
link[cur] = q;
} else {
int clone = newState(len[p] + 1, link[q]);
next[clone].putAll(next[q]);
while (p != -1) {
Integer t = next[p].get(c);
if (t == null || t != q) {
break;
}
next[p].put(c, clone);
p = link[p];
}
link[q] = clone;
link[cur] = clone;
}
}
last = cur;
}

/** True iff pat occurs as a substring of some native identifier token. */
boolean contains(String pat) {
int cur = 0;
for (int i = 0; i < pat.length(); i++) {
Integer nx = next[cur].get(pat.charAt(i));
if (nx == null) {
return false;
}
cur = nx;
}
return true;
}

private static boolean isIdent(char c) {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
|| (c >= '0' && c <= '9') || c == '_';
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,12 @@ public class Parser extends ClassVisitor {
public static void cleanup() {
nativeSources = null;
classes.clear();
// classes is cleared in place (same List reference), so the name index's
// (reference, size) guard cannot detect a subsequent same-size refill across
// translation runs in the same JVM (e.g. the unit tests). Invalidate it here.
classIndexMap = null;
classIndexSource = null;
classIndexSize = -1;
dependencyGraph.clear();
BytecodeMethod.setDependencyGraph(null);
ByteCodeClass.cleanup();
Expand All @@ -86,13 +92,7 @@ public static void parse(File sourceFile) throws Exception {
}

private static ByteCodeClass getClassByName(String name) {
name = name.replace('/', '_').replace('$', '_');
for(ByteCodeClass bc : classes) {
if(bc.getClsName().equals(name)) {
return bc;
}
}
return null;
return classIndex().get(name.replace('/', '_').replace('$', '_'));
}

/**
Expand Down Expand Up @@ -285,15 +285,50 @@ private static void appendClassOffset(ByteCodeClass bc, List<Integer> clsIds) {
}
}

// Inverted index over the native sources for O(1) "is this symbol referenced
// by native code" queries (see NativeSymbolIndex). Built lazily and cached
// against the nativeSources array identity, mirroring the per-method memo in
// BytecodeMethod.isMethodUsedByNative.
private static NativeSymbolIndex nativeSymbolIndex;
private static String[] nativeSymbolIndexSources;
public static NativeSymbolIndex getNativeSymbolIndex(String[] nativeSources) {
if (nativeSymbolIndex == null || nativeSymbolIndexSources != nativeSources) {
nativeSymbolIndex = new NativeSymbolIndex(nativeSources);
nativeSymbolIndexSources = nativeSources;
}
return nativeSymbolIndex;
}

private static final ArrayList<String> constantPool = new ArrayList<>();

public static ByteCodeClass getClassObject(String name) {
for(ByteCodeClass cls : classes) {
if(cls.getClsName().equals(name)) {
return cls;
// Name -> class index, replacing the O(N) linear scans that getClassObject /
// getClassByName / ByteCodeClass.findClass used to do. Those run per dependency
// per class during the dead-code cull, so the scans were O(N^2) per pass.
// Rebuilt lazily when `classes` changes. `classes` is only ever reassigned (new
// reference) or grown in place via add()/cleared -- it is never mutated to a
// same-reference, same-size, different-content state -- so the (reference, size)
// pair uniquely identifies its state and makes this self-correcting.
private static HashMap<String, ByteCodeClass> classIndexMap;
private static List<ByteCodeClass> classIndexSource;
private static int classIndexSize;
private static HashMap<String, ByteCodeClass> classIndex() {
if (classIndexMap == null || classIndexSource != classes || classIndexSize != classes.size()) {
HashMap<String, ByteCodeClass> m = new HashMap<String, ByteCodeClass>(classes.size() * 2);
for (ByteCodeClass cls : classes) {
// first-wins, matching the old "return the first match" linear scan
if (!m.containsKey(cls.getClsName())) {
m.put(cls.getClsName(), cls);
}
}
classIndexMap = m;
classIndexSource = classes;
classIndexSize = classes.size();
}
return null;
return classIndexMap;
}

public static ByteCodeClass getClassObject(String name) {
return classIndex().get(name);
}

/**
Expand Down
Loading