Skip to content

Commit

Permalink
Run token sequence normalization without topological sorting whenever…
Browse files Browse the repository at this point in the history
… match merging is enabled to prevent interference.
  • Loading branch information
tsaglam committed Jul 16, 2024
1 parent 7b0d246 commit 0f73cc0
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 25 deletions.
3 changes: 2 additions & 1 deletion core/src/main/java/de/jplag/JPlag.java
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,8 @@ public static JPlagResult run(JPlagOptions options) throws ExitException {
SubmissionSetBuilder builder = new SubmissionSetBuilder(options);
SubmissionSet submissionSet = builder.buildSubmissionSet();
if (options.normalize() && options.language().supportsNormalization() && options.language().requiresCoreNormalization()) {
submissionSet.normalizeSubmissions();
boolean normalizeOrder = !options.mergingOptions().enabled(); // match merging conflicts with sorting
submissionSet.normalizeSubmissions(normalizeOrder);
}
int submissionCount = submissionSet.numberOfSubmissions();
if (submissionCount < 2) {
Expand Down
7 changes: 4 additions & 3 deletions core/src/main/java/de/jplag/Submission.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import org.slf4j.LoggerFactory;

import de.jplag.exceptions.LanguageException;
import de.jplag.normalization.TokenStringNormalizer;
import de.jplag.normalization.TokenSequenceNormalizer;
import de.jplag.options.JPlagOptions;

/**
Expand Down Expand Up @@ -256,10 +256,11 @@ private static File createErrorDirectory(String... subdirectoryNames) {
/**
* Perform token sequence normalization, which makes the token sequence invariant to dead code insertion and independent
* statement reordering.
* @param sorting determines whether to perform topological sorting during normalization.
*/
void normalize() {
void normalize(boolean sorting) {
List<Integer> originalOrder = getOrder(tokenList);
tokenList = TokenStringNormalizer.normalize(tokenList);
tokenList = TokenSequenceNormalizer.normalize(tokenList, sorting);
List<Integer> normalizedOrder = getOrder(tokenList);

logger.debug("original line order: {}", originalOrder);
Expand Down
12 changes: 9 additions & 3 deletions core/src/main/java/de/jplag/SubmissionSet.java
Original file line number Diff line number Diff line change
Expand Up @@ -99,11 +99,17 @@ public List<Submission> getInvalidSubmissions() {
return invalidSubmissions;
}

public void normalizeSubmissions() {
/**
* Normalizes the token sequences of all submissions (including basecode). This makes the token sequence invariant to
* dead code insertion and independent statement reordering by removing dead tokens and optionally reordering tokens to
* a deterministic order.
* @param sorting determines whether to perform topological sorting during normalization.
*/
public void normalizeSubmissions(boolean sorting) {
if (baseCodeSubmission != null) {
baseCodeSubmission.normalize();
baseCodeSubmission.normalize(sorting);
}
ProgressBarLogger.iterate(ProgressBarType.TOKEN_STRING_NORMALIZATION, submissions, Submission::normalize);
ProgressBarLogger.iterate(ProgressBarType.TOKEN_STRING_NORMALIZATION, submissions, submission -> submission.normalize(sorting));
}

private List<Submission> filterValidSubmissions() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,24 +16,34 @@
/**
* Performs token sequence normalization.
*/
public class TokenStringNormalizer {
public class TokenSequenceNormalizer {

private TokenStringNormalizer() {
private TokenSequenceNormalizer() {
}

/**
* Performs token sequence normalization. Tokens representing dead code have been eliminated and tokens representing
* subsequent independent statements have been put in a fixed order. Works by first constructing a Normalization Graph
* and then turning it back into a token sequence.
* subsequent independent statements have been put in a fixed order if sorting is true. Works by first constructing a
* Normalization Graph and then turning it back into a token sequence.
* @param tokens The original token sequence, remains unaltered.
* @return The normalized token sequence as unmodifiable list.
* @param sorting Boolean flag to control if the tokens should be topologically sorted.
* @return The normalized token sequence.
*/
public static List<Token> normalize(List<Token> tokens) {
public static List<Token> normalize(List<Token> tokens, boolean sorting) {
SimpleDirectedGraph<Statement, MultipleEdge> normalizationGraph = new NormalizationGraphConstructor(tokens).get();
propagateKeepStatus(normalizationGraph);
if (sorting) {
return normalizeWithSorting(tokens, normalizationGraph);
} else {
return normalizeWithoutSorting(normalizationGraph, tokens);
}

}

// Add tokens in normalized original order, removing dead tokens
private static List<Token> normalizeWithSorting(List<Token> tokens, SimpleDirectedGraph<Statement, MultipleEdge> normalizationGraph) {
List<Token> normalizedTokens = new ArrayList<>(tokens.size());
spreadKeep(normalizationGraph);
PriorityQueue<Statement> roots = normalizationGraph.vertexSet().stream() //
.filter(v -> !Graphs.vertexHasPredecessors(normalizationGraph, v)) //
PriorityQueue<Statement> roots = normalizationGraph.vertexSet().stream().filter(v -> !Graphs.vertexHasPredecessors(normalizationGraph, v))
.collect(Collectors.toCollection(PriorityQueue::new));
while (!roots.isEmpty()) {
PriorityQueue<Statement> newRoots = new PriorityQueue<>();
Expand All @@ -51,13 +61,24 @@ public static List<Token> normalize(List<Token> tokens) {
} while (!roots.isEmpty());
roots = newRoots;
}
return Collections.unmodifiableList(normalizedTokens);
return normalizedTokens;
}

// Add tokens in the original order, removing dead tokens
private static List<Token> normalizeWithoutSorting(SimpleDirectedGraph<Statement, MultipleEdge> normalizationGraph, List<Token> tokens) {
List<Token> normalizedTokens = new ArrayList<>(tokens.size());
for (Statement statement : normalizationGraph.vertexSet()) {
if (statement.semantics().keep()) {
normalizedTokens.addAll(statement.tokens());
}
}
return normalizedTokens;
}

/**
* Spread keep status to every node that does not represent dead code. Nodes without keep status are later eliminated.
*/
private static void spreadKeep(SimpleDirectedGraph<Statement, MultipleEdge> normalizationGraph) {
private static void propagateKeepStatus(SimpleDirectedGraph<Statement, MultipleEdge> normalizationGraph) {
Queue<Statement> visit = new LinkedList<>(normalizationGraph.vertexSet().stream() //
.filter(tl -> tl.semantics().keep()).toList());
while (!visit.isEmpty()) {
Expand Down
27 changes: 20 additions & 7 deletions core/src/test/java/de/jplag/NormalizationTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,37 +6,50 @@
import java.util.stream.Collectors;

import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;

import de.jplag.exceptions.ExitException;
import de.jplag.options.JPlagOptions;

class NormalizationTest extends TestBase {
private final Map<String, List<TokenType>> tokenStringMap;
private final List<TokenType> originalTokenString;
private Map<String, List<TokenType>> tokenStringMap;
private List<TokenType> originalTokenString;
private SubmissionSet submissionSet;

NormalizationTest() throws ExitException {
@BeforeEach
void setUp() throws ExitException {
JPlagOptions options = getDefaultOptions("normalization");
SubmissionSetBuilder builder = new SubmissionSetBuilder(options);
SubmissionSet submissionSet = builder.buildSubmissionSet();
submissionSet.normalizeSubmissions();
submissionSet = builder.buildSubmissionSet();

}

private void normalizeSubmissions(boolean sorting) {
submissionSet.normalizeSubmissions(sorting);
Function<Submission, List<TokenType>> getTokenString = submission -> submission.getTokenList().stream().map(Token::getType).toList();
tokenStringMap = submissionSet.getSubmissions().stream().collect(Collectors.toMap(Submission::getName, getTokenString));
originalTokenString = tokenStringMap.get("Squares.java");
}

@Test
void testInsertionNormalization() {
@ParameterizedTest
@ValueSource(booleans = {true, false})
void testInsertionNormalization(boolean sorting) {
normalizeSubmissions(sorting);
Assertions.assertIterableEquals(originalTokenString, tokenStringMap.get("SquaresInserted.java"));
}

@Test
void testReorderingNormalization() {
normalizeSubmissions(true);
Assertions.assertIterableEquals(originalTokenString, tokenStringMap.get("SquaresReordered.java"));
}

@Test
void testInsertionReorderingNormalization() {
normalizeSubmissions(true);
Assertions.assertIterableEquals(originalTokenString, tokenStringMap.get("SquaresInsertedReordered.java"));
}
}

0 comments on commit 0f73cc0

Please sign in to comment.