Reduce the number of JNI calls while creating a Partition
Improve partition creation duration process by reducing the number of calls of CQI.cqpQuery()
[WIP]
The partition creation can be long when there are a lot of parts.
After profiling the process, the bottleneck is located in native method
org.txm.searchengine.cqMemCqiServer.cqpQuery(String, String, String)
Here are some tests, for trace, that have not been really significative
on Windows (maybe 5 or 10% less duration on a partition with 2400 parts
than current method) but measures are tricky. The tests consist to
reduce the native calls number from Java that can be heavy through JNI
by defining a native method to execute all the queries and by
centralizing the multi-parts creation in Java side (rather than calling
a native method for each part creation).
steps, Java side:
-
create org.txm.searchengine.cqMemCqiServer.cqpQueries(String, String[], String[]) public native Boolean cqpQueries(String arg0, String[] arg1, String[] arg2) throws IOException, UnexpectedAnswerException, CqiServerError ;
-
create org.txm.searchengine.cqICqiClient.cqpQueries(String, String[], String[]) public void cqpQueries(String motherCorpus, String[] subcorpusNamers, String[] queries) throws IOException, UnexpectedAnswerException, CqiServerError;
- create org.txm.searchengine.cqMemCqiClient.cqpQueries(String, String[], String[]) @Override public void cqpQueries(String arg0, String[] arg1, String[] arg2) throws IOException, UnexpectedAnswerException, CqiServerError { Boolean ret = server.cqpQueries(arg0, arg1, arg2); if(ret == null || !ret) { int e = server.getErrorCode(); throwExceptionFromCqi(e); } }
-
create org.txm.searchengine.cqcorpus.Partition.createParts(String, List, List) private ArrayList createParts(String partitionName, List partNames, List queries) throws CqiClientException {
ArrayList<Part> parts = new ArrayList<Part>(partNames.size()); //Log.finest(NLS.bind(Messages.CREATING_PART,partName, query)); // long start = System.currentTimeMillis(); ArrayList<String> cqpPartIds = new ArrayList<String>(partNames.size()); for(int i = 0; i < partNames.size(); i++) { try { String partCqpId = CqpObject.partNamePrefix + Corpus.getNextSubcorpusCounter(); cqpPartIds.add(partCqpId); parts.add(new Part(partCqpId, partitionName, partNames.get(i), this, new Query(queries.get(i)))); } catch(InvalidCqpIdException e) { // TODO Auto-generated catch block e.printStackTrace(); } } try { CorpusManager.getCorpusManager().getCqiClient().cqpQueries(this.corpus.getQualifiedCqpId(), cqpPartIds.toArray(new String[cqpPartIds.size()]), queries.toArray(new String[queries.size()])); } catch (Exception e) { try { throw new CqiClientException(Messages.Partition_9 + partitionName + " last error: "+Toolbox.getCqiClient().getLastCQPError()); //$NON-NLS-1$ //$NON-NLS-2$ } catch (Exception e1) { System.out.println(Messages.Partition_18+e1); org.txm.utils.logger.Log.printStackTrace(e1); return null; } } // long end = System.currentTimeMillis(); //Log.finest(NLS.bind(Messages.PART_CREATED, partitionName + "_" + partName, (end - start))); //$NON-NLS-1$//$NON-NLS-2$ return parts; } -
create org.txm.searchengine.cqcorpus.Partition.Partition(Corpus, String, List, List) public Partition(Corpus corpus, String name, List queries, List partnames) throws CqiClientException { this.corpus = corpus; this.structure = null; this.property = null; if (name == null || name.trim().length() == 0) name = "noname"; //NON-NLS-1 this.name = name; Log.info(NLS.bind(Messages.NEW_PARTION, this.corpus, this.name)); long start = System.currentTimeMillis(); this.parts = new ArrayList();
// FIXME: tests optimisations CQP LIB parts.addAll(createParts(name, partnames, queries)); // for (int i = 0; i < queries.size(); i++) { // String queryS = queries.get(i); // String partitionName = this.getName(); // String partName = partnames.get(i); // if (partName.trim().length() == 0) partName = "-"; //$NON-NLS-1$ // Part part = createPart(partitionName, partName, queryS); // parts.add(part); // } // // long end = System.currentTimeMillis(); Log.info(NLS.bind(Messages.PARTITION_CREATED, this.name, (end - start))); }
Steps, native side:
- create JNIEXPORT jobject JNICALL
Java_org_txm_searchengine_cqp_MemCqiServer_cqpQueries
(JNIEnv *, jobject, jstring, jobjectArray, jobjectArray); in MemCqiServer.h/MemCqiServer.c
JNIEXPORT jobject JNICALL Java_org_txm_searchengine_cqp_MemCqiServer_cqpQueries
(JNIEnv * env, jobject obj, jstring motherCorpus, jobjectArray subcorpusNames, jobjectArray jqueries) {
char *child, *mother, *query, *c, *sc;
jboolean iscopy;
mother = (*env)->GetStringUTFChars(env, motherCorpus, &iscopy);
int *children = (*env)->GetObjectArrayElement(env, subcorpusNames, NULL);
int childrenCount = (*env)->GetArrayLength(env, subcorpusNames);
int *queries = (*env)->GetObjectArrayElement(env, jqueries, NULL);
if (!split_subcorpus_spec(mother, &c, &sc)) {
(*env)->ReleaseStringChars(env, motherCorpus, mother);
//(*env)->ReleaseStringChars(env, subcorpus, child);
//(*env)->ReleaseStringChars(env, jquery, query);
return throwException(env, obj);
} else {
int test2 = cqi_activate_corpus(mother);
int i;
for(i = 0; i < childrenCount; i++) {
child = (*env)->GetStringUTFChars(env, (*env)->GetObjectArrayElement(env, subcorpusNames, i), &iscopy);
query = (*env)->GetStringUTFChars(env, (*env)->GetObjectArrayElement(env, jqueries, i), &iscopy);
/* printf("\n ******* i = ");
printf("%d", i);
printf(" ******* child ");
printf(child);
printf(" ******* query ");
printf(query);*/
// fflush(stdout);
char *cqp_query;
int len = strlen(child) + strlen(query) + 10;
cqp_query = (char *) cl_realloc(cqp_query, len);
int test1 = check_subcorpus_name(child);
//printf("\ntests results: subcorpus_name=%d activation=%d\n", test1, test2);
if (!test1 || !test2) {
(*env)->ReleaseStringChars(env, motherCorpus, mother);
//(*env)->ReleaseStringChars(env, subcorpus, child);
//(*env)->ReleaseStringChars(env, jquery, query);
return throwException(env, obj);
}
else {
query_lock = floor(1e9 * cl_runif()) + 1; // activate query lock mode with random key
//printf("CQPSERVER: query_lock = %d\n", query_lock);
if (query_has_semicolon(query))
sprintf(cqp_query, "%s = %s", child, query);
else
sprintf(cqp_query, "%s = %s;", child, query);
//printf("CQi: parsing %s\n", cqp_query);
if (!cqp_parse_string(cqp_query)) { // parser and execute
fprintf(stderr, "start of throw exeption");
return throwCLException(env, obj);
//fprintf(stderr, "End of throw exeption");
} else {
char *full_child = combine_subcorpus_spec(c, child); // c is the 'physical' part of the mother corpus
CorpusList *childcl = cqi_find_corpus(full_child);
if ((childcl) == NULL) {
(*env)->ReleaseStringChars(env, motherCorpus, mother);
//(*env)->ReleaseStringChars(env, subcorpus, child);
//(*env)->ReleaseStringChars(env, jquery, query);
return throwCLException(env, obj);
} else {
if (server_log) {
printf("'%s' ran the following query on %s\n", "cqplib", mother);
printf("\t%s\n", cqp_query);
printf("and got %d matches.\n", childcl->size);
}
}
if (full_child) cl_free(full_child);
}
query_lock = 0; // deactivate query lock mode
}
if (cqp_query) cl_free(cqp_query);
//(*env)->ReleaseStringUTFChars(env, string1, child);
//(*env)->ReleaseStringUTFChars(env, string2, query);
}
}
if (c) cl_free(c);
if (sc) cl_free(sc);
(*env)->ReleaseStringChars(env, motherCorpus, mother);
//(*env)->ReleaseStringChars(env, subcorpus, child);
//(*env)->ReleaseStringChars(env, jquery, query);
return toBoolean(env, obj, JNI_TRUE);
}
Other tips:
- we may assume that we give an unique corpora id from Java layer to native layer and remove the tests check_subcorpus_name()
- passing array through JNI and getting array elements seems to be heavy maybe we may use buffer instead
- mange this test “if (query_has_semicolon(query))” in another way, eg. add a function parameter
(from redmine: issue id 1756, created on 2016/03/29 by Sebastien Jacquot)
- Relations:
- relates #978 (closed)
- relates #957