Skip to content

Commit

Permalink
Added duplication identification logic. Improved error identification.
Browse files Browse the repository at this point in the history
  • Loading branch information
bbchristians committed May 22, 2020
1 parent 797a71c commit 23dc7bb
Show file tree
Hide file tree
Showing 7 changed files with 61 additions and 31 deletions.
10 changes: 6 additions & 4 deletions src/main/java/edu/rit/se/git/RepositoryCommitReference.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,7 @@

import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.*;
import java.util.stream.Collectors;

/**
Expand All @@ -43,6 +40,11 @@ public class RepositoryCommitReference {
* @return A list of the commit's parents
*/
public List<RepositoryCommitReference> getParentCommitReferences() {
// Debugging code -- should NOT be included in any releases.
// Used to start a search at a specific commit
// if( this.commit.getName().equals("bff409f2b8a090d8103e85cdbe4643a99d3b44f9") ) {
// return new ArrayList<>();
// }
final RevWalk rw = new RevWalk(this.gitInstance.getRepository());
return Arrays.stream(this.commit.getParents())
.map(RevCommit::toObjectId)
Expand Down
13 changes: 7 additions & 6 deletions src/main/java/edu/rit/se/satd/SATDMiner.java
Original file line number Diff line number Diff line change
Expand Up @@ -179,22 +179,23 @@ private List<DiffPair> getAllDiffPairs(RepositoryCommitReference curRef) {
* in this project
* @param diff a SATDDifference object
* @return the SATDDifference object
* TODO what can be done about instances with the same text, in the same method and class??
* Entries like this will have a new satdInstanceId generated
*/
private SATDDifference mapSATDInstanceLikeness(SATDDifference diff) {
diff.getSatdInstances().stream()
.distinct()
diff.getSatdInstances()
.forEach(satdInstance -> {
switch (satdInstance.getResolution()) {
case SATD_ADDED:
// SATD was added, so we know it wont relate to other instances
// SATD was added, so we know it won't relate to other instances
// It could possibly be duplicated from another instance, but detecting
// that is currently out of scope for this tool.
if( !this.satdInstanceMappings.containsKey(satdInstance.getNewInstance()) ) {
this.satdInstanceMappings.put(satdInstance.getNewInstance(), this.getNewSATDId());
} else {
System.err.println("here!");
if( isErrorOutputEnabled() ) {
System.err.println("\nMultiple SATD_ADDED instances for " +
satdInstance.getOldInstance().toString());
}
this.status.addErrorEncountered();
}
satdInstance.setId(this.satdInstanceMappings.get(satdInstance.getNewInstance()));
break;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import lombok.Getter;
import lombok.RequiredArgsConstructor;
import lombok.Setter;

@RequiredArgsConstructor
public class OldToNewCommentMapping {
Expand All @@ -15,6 +16,10 @@ public class OldToNewCommentMapping {
@Getter
private boolean isMapped = false;

@Getter
@Setter
private int duplicationId = 0;

public void mapTo(OldToNewCommentMapping other) {
this.isMapped = true;
if (other != null) {
Expand All @@ -30,7 +35,14 @@ public boolean commentsMatch(OldToNewCommentMapping other) {
return this.comment.getComment().equals(other.comment.getComment())
&& this.comment.getContainingMethod().equals(other.comment.getContainingMethod())
&& this.comment.getContainingClass().equals(other.comment.getContainingClass())
&& this.file.equals(other.file);
&& this.file.equals(other.file)
&& this.duplicationId == other.duplicationId;
}

@Override
public int hashCode() {
return this.comment.hashCode() +
this.file.hashCode() +
this.duplicationId;
}
}
29 changes: 21 additions & 8 deletions src/main/java/edu/rit/se/satd/mining/RepositoryDiffMiner.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,7 @@
import lombok.NonNull;
import lombok.RequiredArgsConstructor;

import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.*;
import java.util.stream.Collectors;

/**
Expand Down Expand Up @@ -61,10 +58,12 @@ public SATDDifference mineDiff() {
.flatMap(oldFile -> olderSATD.get(oldFile).getComments().stream()
.map(comment -> new OldToNewCommentMapping(comment, oldFile)))
.collect(Collectors.toList());
populateDuplicationIds(oldSATDMappings);
final List<OldToNewCommentMapping> newSATDMappings = newerSATD.keySet().stream()
.flatMap(newFile -> newerSATD.get(newFile).getComments().stream()
.map(comment -> new OldToNewCommentMapping(comment, newFile)))
.collect(Collectors.toList());
populateDuplicationIds(newSATDMappings);
final List<String> erroredFiles = new ArrayList<>();
// Add errored files to known errors
erroredFiles.addAll(newerSATD.values().stream()
Expand Down Expand Up @@ -120,13 +119,27 @@ private static List<SATDInstance> mineDiffsFromMappedSATDInstances(CommitToCommi
boolean isOld) {
return mappings.stream()
.filter(OldToNewCommentMapping::isNotMapped)
.map(mapping -> isOld ?
cToCDiff.loadDiffsForOldFile(mapping.getFile(), mapping.getComment()) :
cToCDiff.loadDiffsForNewFile(mapping.getFile(), mapping.getComment()))
.flatMap(Collection::stream)
.flatMap(mapping -> {
final List<SATDInstance> minedInstances = (isOld ?
cToCDiff.loadDiffsForOldFile(mapping.getFile(), mapping.getComment()) :
cToCDiff.loadDiffsForNewFile(mapping.getFile(), mapping.getComment()));
return minedInstances.stream()
.peek(a -> a.setDuplicationId(mapping.getDuplicationId()));
})
.collect(Collectors.toList());
}

private static void populateDuplicationIds(List<OldToNewCommentMapping> mappingList) {
final Map<OldToNewCommentMapping, Integer> curDupIds = new HashMap<>();
mappingList.forEach(mapping -> {
if( !curDupIds.containsKey(mapping) ) {
curDupIds.put(mapping, 0);
}
mapping.setDuplicationId(curDupIds.get(mapping));
curDupIds.put(mapping, curDupIds.get(mapping) + 1);
});
}

public String getDiffString() {
return this.secondRepo.getCommitHash();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,6 @@ public List<SATDInstance> loadDiffsForNewFile(String newFile, GroupedComment com
.collect(Collectors.toList());
}

// TODO Can the SATD ID be set here?
private List<SATDInstance> getSATDFromDiffOldFile(DiffEntry diffEntry, GroupedComment oldComment) {
final List<SATDInstance> satd = new ArrayList<>();

Expand All @@ -86,8 +85,6 @@ private List<SATDInstance> getSATDFromDiffOldFile(DiffEntry diffEntry, GroupedCo
this.getCommentsInFileInNewRepository(diffEntry.getNewPath());
final GroupedComment newComment = comInNewRepository.getComments().stream()
.filter(nc -> nc.getComment().equals(oldComment.getComment()))
// TODO how do we account for multiple SATD Instances in the same file with identical comments
// In the same class and method?
.filter(nc -> nc.getContainingMethod().equals(oldComment.getContainingMethod()))
.findFirst()
.orElse(new NullGroupedComment());
Expand Down Expand Up @@ -133,7 +130,6 @@ private List<SATDInstance> getSATDFromDiffOldFile(DiffEntry diffEntry, GroupedCo
SATDInstance.SATDResolution.SATD_REMOVED
)
);
return satd;
}
// If an updated comment was found, and it is not identical to the old comment
if( !updatedComments.isEmpty() &&
Expand Down Expand Up @@ -161,9 +157,7 @@ private List<SATDInstance> getSATDFromDiffOldFile(DiffEntry diffEntry, GroupedCo
})
.collect(Collectors.toList())
);
return satd;
}
// If the comment was updated and they are identical to the old comment
if(oldComment.getContainingMethod().equals(NULL_FIELD) ||
oldComment.getContainingClass().equals(NULL_FIELD) ||
editsTouchedClassOrMethodSignatureOldComment(editsToFile, oldComment)) {
Expand Down
11 changes: 9 additions & 2 deletions src/main/java/edu/rit/se/satd/model/SATDInstance.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ public class SATDInstance {
@Getter
@Setter
private int id = -1;
// SATD Duplication ID to differentiate instances if they
// align in all other ways
@Getter
@Setter
private int duplicationId = 0;

public int getStartLineNumberOldFile() {
return this.oldInstance.getComment().getStartLine();
Expand Down Expand Up @@ -56,7 +61,8 @@ public boolean equals(Object obj) {
if( obj instanceof SATDInstance ) {
return this.oldInstance.equals(((SATDInstance) obj).oldInstance) &&
this.newInstance.equals(((SATDInstance) obj).newInstance) &&
this.resolution.equals(((SATDInstance) obj).resolution);
this.resolution.equals(((SATDInstance) obj).resolution) &&
this.duplicationId == ((SATDInstance) obj).duplicationId;
}
return false;
}
Expand All @@ -65,6 +71,7 @@ public boolean equals(Object obj) {
public int hashCode() {
return this.oldInstance.hashCode() +
this.newInstance.hashCode() +
this.resolution.hashCode();
this.resolution.hashCode() +
this.duplicationId;
}
}
9 changes: 5 additions & 4 deletions src/main/java/edu/rit/se/satd/writer/MySQLOutputWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import java.io.FileInputStream;
import java.io.IOException;
import java.sql.*;
import java.util.Calendar;
import java.util.Properties;

public class MySQLOutputWriter implements OutputWriter {
Expand Down Expand Up @@ -220,14 +221,14 @@ private String getCommitId(Connection conn, CommitMetaData commitMetaData, int p
updateStmt.setString(4, commitMetaData.getCommitterName()); // committer_name
updateStmt.setString(5, commitMetaData.getCommitterEmail()); // committer_email
if( commitMetaData.getAuthorDate() != null ) {
updateStmt.setDate(6, new Date(commitMetaData.getAuthorDate().getTime())); // author_date
updateStmt.setTimestamp(6, new Timestamp(commitMetaData.getAuthorDate().getTime()), Calendar.getInstance()); // author_date
} else {
updateStmt.setDate(6, null);
updateStmt.setTimestamp(6, null);
}
if( commitMetaData.getCommitDate() != null ) {
updateStmt.setDate(7, new Date(commitMetaData.getCommitDate().getTime())); // commit_date
updateStmt.setTimestamp(7, new Timestamp(commitMetaData.getCommitDate().getTime())); // commit_date
} else {
updateStmt.setDate(7, null);
updateStmt.setTimestamp(7, null);
}
updateStmt.setInt(8, projectId);
updateStmt.executeUpdate();
Expand Down

0 comments on commit 23dc7bb

Please sign in to comment.