mirror of
https://github.com/torproject/metrics-lib.git
synced 2024-11-26 18:50:28 +00:00
Optimize parsing large files with many descriptors.
When parsing a large file with many descriptors we would repeatedly search the remaining file for the sequence "newline + keyword + space" and then "newline + keyword + newline" to find the start of the next descriptor. However, if the keyword is always followed by newline, the first search would always fail. The optimization here is to search once whether the keyword is followed by space or newline and avoid unnecessary searches when going through the file. In the long term we should use a better parser. But in the short term this optimization will have a major impact on performance, in particular with regard to concatenated microdescriptors.
This commit is contained in:
parent
ee7c3eb738
commit
ff7e36c156
@ -4,6 +4,9 @@
|
||||
- Parse version 3 onion service statistics contained in extra-info
|
||||
descriptors.
|
||||
|
||||
* Medium changes
|
||||
- Optimize parsing of large files containing many descriptors.
|
||||
|
||||
|
||||
# Changes in version 2.14.0 - 2020-08-07
|
||||
|
||||
|
@ -181,16 +181,25 @@ public class DescriptorParserImpl implements DescriptorParser {
|
||||
String ascii = new String(rawDescriptorBytes, StandardCharsets.US_ASCII);
|
||||
boolean containsAnnotations = ascii.startsWith("@")
|
||||
|| ascii.contains(NL + "@");
|
||||
boolean containsKeywordSpace = ascii.startsWith(key.keyword + SP)
|
||||
|| ascii.contains(NL + key.keyword + SP);
|
||||
boolean containsKeywordNewline = ascii.startsWith(key.keyword + NL)
|
||||
|| ascii.contains(NL + key.keyword + NL);
|
||||
while (startAnnotations < endAllDescriptors) {
|
||||
int startDescriptor;
|
||||
if (startAnnotations == ascii.indexOf(key.keyword + SP,
|
||||
startAnnotations) || startAnnotations == ascii.indexOf(
|
||||
key.keyword + NL)) {
|
||||
int startDescriptor = -1;
|
||||
if ((containsKeywordSpace
|
||||
&& startAnnotations == ascii.indexOf(key.keyword + SP,
|
||||
startAnnotations))
|
||||
|| (containsKeywordNewline
|
||||
&& startAnnotations == ascii.indexOf(key.keyword + NL,
|
||||
startAnnotations))) {
|
||||
startDescriptor = startAnnotations;
|
||||
} else {
|
||||
startDescriptor = ascii.indexOf(NL + key.keyword + SP,
|
||||
startAnnotations - 1);
|
||||
if (startDescriptor < 0) {
|
||||
if (containsKeywordSpace) {
|
||||
startDescriptor = ascii.indexOf(NL + key.keyword + SP,
|
||||
startAnnotations - 1);
|
||||
}
|
||||
if (startDescriptor < 0 && containsKeywordNewline) {
|
||||
startDescriptor = ascii.indexOf(NL + key.keyword + NL,
|
||||
startAnnotations - 1);
|
||||
}
|
||||
@ -204,10 +213,10 @@ public class DescriptorParserImpl implements DescriptorParser {
|
||||
if (containsAnnotations) {
|
||||
endDescriptor = ascii.indexOf(NL + "@", startDescriptor);
|
||||
}
|
||||
if (endDescriptor < 0) {
|
||||
if (endDescriptor < 0 && containsKeywordSpace) {
|
||||
endDescriptor = ascii.indexOf(NL + key.keyword + SP, startDescriptor);
|
||||
}
|
||||
if (endDescriptor < 0) {
|
||||
if (endDescriptor < 0 && containsKeywordNewline) {
|
||||
endDescriptor = ascii.indexOf(NL + key.keyword + NL, startDescriptor);
|
||||
}
|
||||
if (endDescriptor < 0) {
|
||||
|
Loading…
Reference in New Issue
Block a user