Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
6c420c3
Add functionality to fetch external commits
Leo-Send Jul 21, 2025
f198a6e
Change 'extractHashtags' to ignore codeblocks
Leo-Send Jul 21, 2025
e537267
Document findings in 'README.md'
Leo-Send Jul 21, 2025
2f33e8b
Add Section to README.md
Leo-Send Aug 9, 2025
443dcdc
Fix spelling in 'README.md'
Leo-Send Aug 12, 2025
038bbf7
Update Copyright headers
Leo-Send Aug 26, 2025
55828ff
Rename field and setter for external commits
Leo-Send Oct 28, 2025
de8b9b7
Improve performance for extracting hashtags
Leo-Send Oct 31, 2025
526df21
Add state reason and type to issues data
shirazJafri Aug 1, 2025
560420f
Add event tracking for state changes
shirazJafri Aug 2, 2025
55b63d7
Add events tracking for issue type changed
shirazJafri Aug 2, 2025
a9395e4
Change change_type field to not appear in result
shirazJafri Aug 5, 2025
1d03eb8
Add events for parent and sub issues
shirazJafri Aug 6, 2025
ec47c5a
Add tracking for connected events
shirazJafri Aug 7, 2025
98e4e6d
Remove unnecessary map entry
Leo-Send Aug 26, 2025
b980f8d
Improve SHA-1 extraction from issue comments
Leo-Send Sep 25, 2025
6925ad8
Update copyright headers
Leo-Send Sep 25, 2025
1b6f8d5
Remove superfluous state reasons and events
Leo-Send Oct 14, 2025
efe5fec
Add exception if state reason is not a legal value
Leo-Send Oct 28, 2025
d3dd80e
Add functionality to detect suggestions
Leo-Send Oct 31, 2025
4371458
Resolve issues pointed out in review
Leo-Send Nov 4, 2025
e16d751
Add additional information to dummy user
Leo-Send Jan 27, 2026
0c0f8ae
Update copyright header
Leo-Send Feb 3, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,3 +91,19 @@ repo.getIssues(false).ifPresent(issueData -> issueData.forEach(issue -> {
System.out.println(comment.user.username + ": " + comment.body));
}));
```

### Further data processing

The data extracted by this tool can be further processed, for example using the `run-issues.py` script from the tool [`codeface-extraction`](https://github.com/se-sic/codeface-extraction). This organizes and unifies the issue data into a single csv-like .list file. It also allows for synchronization with data from other data extraction tools, such as `codeface`.

### `referenced` events

`referenced` events are events generated in an issue if a commit references that issue in its commit message. The intended behavior is that the event is present in the issue's event data, and the commit is again present in the related commits of the issue. This does not work if it is not possible to fetch that commit. In this case, the event still exists, but it contains a link to a commit that the api cannot resolve, meaning that no data about the commit can be accessed.
Known causes of this include:

- a commit was rebased and changed/removed
- an external repository was deleted
- the commit's branch was deleted

Note that the commit might still be reachable until the automatic garbage collection has removed it from the remote repository.
In itself, this is not problematic. However, when further processing the data using `codeface-extraction`, this may lead to these `referenced` events being present in the final data, even though they should be filtered out as part of the issue processing.
51 changes: 51 additions & 0 deletions src/de/uni_passau/fim/gitwrapper/EventData.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
/**
* Copyright (C) 2016-2018 Florian Heck
* Copyright (C) 2019 Thomas Bock
* Copyright (C) 2025 Leo Sendelbach
* Copyright (C) 2025 Shiraz Jafri
*
* This file is part of GitHubWrapper.
*
Expand Down Expand Up @@ -33,6 +35,7 @@ public abstract class EventData {
UserData user;
OffsetDateTime created_at;
String event;
Long id;

/**
* The User that created the Event.
Expand Down Expand Up @@ -193,4 +196,52 @@ public UserData getAssigner() {
return assigner;
}
}

/**
* An Event generated by changing the state of an issue.
*/
public class StateChangedEventData extends EventData {

@Expose(deserialize = false)
Commit commit;
StateReason state_reason;

/**
* The commit references.
*/
public Commit getCommit() {
return commit;
}

/**
* The reason for the state change.
*/
public StateReason getStateReason() {
return state_reason;
}
}

/**
* An Event generated by changing the type of an issue.
*/
public class IssueTypeChangedEventData extends EventData {
}

/**
* An Event generated by changing the parent issue of an issue.
*/
public class ParentIssueChangedEventData extends EventData {
}

/**
* An Event generated by changing the sub-issue of an issue.
*/
public class SubIssueChangedEventData extends EventData {
}

/**
* An Event generated by connecting to a repository.
*/
public class ConnectedEventData extends EventData {
}
}
93 changes: 90 additions & 3 deletions src/de/uni_passau/fim/gitwrapper/EventDataProcessor.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
/**
* Copyright (C) 2016-2018 Florian Heck
* Copyright (C) 2019 Thomas Bock
* Copyright (C) 2025 Leo Sendelbach
* Copyright (C) 2025 Shiraz Jafri
*
* This file is part of GitHubWrapper.
*
Expand Down Expand Up @@ -43,7 +45,16 @@ class EventDataProcessor implements JsonDeserializer<EventData>, JsonSerializer<
map.put("unlabeled", EventData.LabeledEventData.class);
map.put("referenced", EventData.ReferencedEventData.class);
map.put("merged", EventData.ReferencedEventData.class);
map.put("closed", EventData.ReferencedEventData.class);
map.put("closed", EventData.StateChangedEventData.class);
map.put("reopened", EventData.StateChangedEventData.class);
map.put("connected", EventData.ConnectedEventData.class);
map.put("issue_type_added", EventData.IssueTypeChangedEventData.class);
map.put("issue_type_changed", EventData.IssueTypeChangedEventData.class);
map.put("issue_type_removed", EventData.IssueTypeChangedEventData.class);
map.put("parent_issue_added", EventData.ParentIssueChangedEventData.class);
map.put("parent_issue_removed", EventData.ParentIssueChangedEventData.class);
map.put("sub_issue_added", EventData.SubIssueChangedEventData.class);
map.put("sub_issue_removed", EventData.SubIssueChangedEventData.class);
map.put("review_requested", EventData.RequestedReviewEventData.class);
map.put("review_request_removed", EventData.RequestedReviewEventData.class);
map.put("review_dismissed", EventData.DismissedReviewEventData.class);
Expand Down Expand Up @@ -86,8 +97,12 @@ public void postDeserialize(EventData.ReferencedEventData result, JsonElement sr
}

result.commit = repo.getGithubCommit(hash.getAsString()).orElseGet(() -> {
LOG.warning("Found commit unknown to GitHub and local git repo: " + hash);
return null;
LOG.warning("Found commit unknown to GitHub and local git repo: " + hash + " Retry using url...");
JsonElement url = src.getAsJsonObject().get("commit_url");
return repo.getGithubCommitUrl(hash.getAsString(), url.getAsString()).orElseGet(() -> {
LOG.warning("Could not find commit: " + hash);
return null;
});
});
}

Expand Down Expand Up @@ -158,4 +173,76 @@ public void postDeserialize(EventData.AssignedEventData result, JsonElement src,
@Override
public void postSerialize(JsonElement result, EventData.AssignedEventData src, Gson gson) { }
}

/**
* Processor for state change events.
*/
static class StateChangedEventProcessor implements PostProcessor<EventData.StateChangedEventData> {

private GitHubRepository repo;

/**
* Creates a new EventDataProcessor for the given repo.
*
* @param repo
* the repo
*/
StateChangedEventProcessor(GitHubRepository repo) {
this.repo = repo;
}

@Override
public void postDeserialize(EventData.StateChangedEventData result, JsonElement src, Gson gson) {
JsonElement stateReasonElement = src.getAsJsonObject().get("state_reason");
String stateReasonValue = (stateReasonElement != null && !stateReasonElement.isJsonNull())
? stateReasonElement.getAsString()
: null;
result.state_reason = StateReason.getFromString(stateReasonValue);

JsonElement hash = src.getAsJsonObject().get("commit_id");
if (hash.isJsonNull()) {
return;
}

result.commit = repo.getGithubCommit(hash.getAsString()).orElseGet(() -> {
LOG.warning("Found commit unknown to GitHub and local git repo: " + hash + " Retry using url...");
JsonElement url = src.getAsJsonObject().get("commit_url");
return repo.getGithubCommitUrl(hash.getAsString(), url.getAsString()).orElseGet(() -> {
LOG.warning("Could not find commit: " + hash);
return null;
});
});
}

@Override
public void postSerialize(JsonElement result, EventData.StateChangedEventData src, Gson gson) { }
}

/**
* Processor for issue type change events.
*/
static class IssueTypeChangedEventProcessor implements PostProcessor<EventData.IssueTypeChangedEventData> {

@Override
public void postDeserialize(EventData.IssueTypeChangedEventData result, JsonElement src, Gson gson) {
}

@Override
public void postSerialize(JsonElement result, EventData.IssueTypeChangedEventData src, Gson gson) {
}
}

/**
* Processor for connected events.
*/
static class ConnectedEventProcessor implements PostProcessor<EventData.ConnectedEventData> {

@Override
public void postDeserialize(EventData.ConnectedEventData result, JsonElement src, Gson gson) {
}

@Override
public void postSerialize(JsonElement result, EventData.ConnectedEventData src, Gson gson) {
}
}
}
21 changes: 21 additions & 0 deletions src/de/uni_passau/fim/gitwrapper/GitHubCommit.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/**
* Copyright (C) 2019 Thomas Bock
* Copyright (C) 2025 Leo Sendelbach
*
* This file is part of GitHubWrapper.
*
Expand All @@ -26,6 +27,7 @@ public class GitHubCommit extends Commit {
private String authorUsername;
private String committerUsername;
private boolean addedToPullRequest = false;
private boolean external = false;

/**
* Constructs a new {@link GitHubCommit} with the given <code>id</code> made in the <code>repo</code>.
Expand Down Expand Up @@ -119,4 +121,23 @@ public boolean isAddedToPullRequest() {
void setAddedToPullRequest(boolean added) {
this.addedToPullRequest = added;
}

/**
* Returns whether this commit is an external commit.
*
* @return whether this commit is an external commit
*/
boolean isExternal() {
return this.external;
}

/**
* Sets whether this commit is an external commit
*
* @param external this commit is an external commit
*/
void setExternal(boolean external) {
this.external = external;
}

}
41 changes: 40 additions & 1 deletion src/de/uni_passau/fim/gitwrapper/GitHubRepository.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
* Copyright (C) 2016-2020 Florian Heck
* Copyright (C) 2018 Claus Hunsen
* Copyright (C) 2019-2021 Thomas Bock
* Copyright (C) 2025 Leo Sendelbach
* Copyright (C) 2025 Shiraz Jafri
*
* This file is part of GitHubWrapper.
*
Expand Down Expand Up @@ -241,6 +243,9 @@ public GitHubRepository(String url, File dir, GitWrapper git, List<String> oauth
gfb.registerPostProcessor(EventData.LabeledEventData.class, new EventDataProcessor.LabeledEventProcessor());
gfb.registerPostProcessor(EventData.DismissedReviewEventData.class, new EventDataProcessor.DismissedReviewEventProcessor());
gfb.registerPostProcessor(EventData.AssignedEventData.class, new EventDataProcessor.AssignedEventProcessor());
gfb.registerPostProcessor(EventData.StateChangedEventData.class, new EventDataProcessor.StateChangedEventProcessor(this));
gfb.registerPostProcessor(EventData.IssueTypeChangedEventData.class, new EventDataProcessor.IssueTypeChangedEventProcessor());
gfb.registerPostProcessor(EventData.ConnectedEventData.class, new EventDataProcessor.ConnectedEventProcessor());
gfb.registerPostProcessor(ReviewData.ReviewInitialCommentData.class, new ReviewDataProcessor.ReviewInitialCommentDataProcessor(this));
GsonBuilder gb = gfb.createGsonBuilder();
gb.registerTypeAdapter(Commit.class, new CommitProcessor(this, userProcessor));
Expand Down Expand Up @@ -352,6 +357,8 @@ public Optional<List<IssueData>> getIssues(boolean includePullRequests, OffsetDa
}
else timeLimit = "";
Type finalType = type;
// For debugging, you may add additional parameters to the string. For example, '/issues?creator=username&state=all'
// will fetch issues created by the specified and all related issues and commits.
getJSONStringFromPath("/issues?state=all" + timeLimit).map(json -> {
List<IssueData> data;
try {
Expand All @@ -367,7 +374,7 @@ public Optional<List<IssueData>> getIssues(boolean includePullRequests, OffsetDa
threadPool.submit(() -> data.parallelStream().forEach(IssueData::freeze));

} catch (JsonSyntaxException e) {
LOG.warning("Encountered invalid JSON: " + json);
LOG.warning("Encountered invalid JSON: " + json + "\n\n" + e.getMessage() + "\n\n" + e);
return null;
}
return data;
Expand Down Expand Up @@ -1028,6 +1035,38 @@ Optional<GitHubCommit> getGithubCommit(String hash) {
});
}

Optional<GitHubCommit> getGithubCommitUrl(String hash, String url) {
if (offline.get()) {
return Optional.of(getGHCommitUnchecked(DummyCommit.DUMMY_COMMIT_ID));
} else {
try {
Optional<GitHubCommit> res = getJSONStringFromURL(url).map(commitInfo ->
gson.fromJson(commitInfo, new TypeToken<GitHubCommit>() {}.getType()));
checkedHashes.put(hash, res);
if (res.isPresent()) {
res.get().setExternal(true);
}
return res;
} catch (JsonSyntaxException e) {
/* For whatever reason, the JSON String is malformed, perhaps due to ill-encoded characters
* in patches within the files element of the JSON String.
* Due to that, get the JSON String again and remove the content of the files element of the
* JSON String, as it is not needed for further processing.
*/
LOG.info("Malformed JSON String when querying data for commit " + url + ". Neglect files element.");
String jsonStringFromURL = getJSONStringFromURL(url).get();
jsonStringFromURL = StringUtils.substringBefore(jsonStringFromURL, "\"files\":[");
jsonStringFromURL = jsonStringFromURL + "\"files\":[]}";
Optional<GitHubCommit> res = Optional.of(gson.fromJson(jsonStringFromURL, new TypeToken<GitHubCommit>() {}.getType()));
checkedHashes.put(hash, res);
if (res.isPresent()) {
res.get().setExternal(true);
}
return res;
}
}
}

/**
* Creates a new Commit with the given data, and tries to fill in the missing data from the local Repository
*
Expand Down
23 changes: 23 additions & 0 deletions src/de/uni_passau/fim/gitwrapper/IssueData.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
/**
* Copyright (C) 2016-2018 Florian Heck
* Copyright (C) 2019 Thomas Bock
* Copyright (C) 2025 Leo Sendelbach
* Copyright (C) 2025 Shiraz Jafri
*
* This file is part of GitHubWrapper.
*
Expand Down Expand Up @@ -39,6 +41,7 @@ public class IssueData implements GitHubRepository.IssueDataCached {
UserData user;

@Expose(deserialize = false) State state;
@Expose(deserialize = false) TypeData type;
OffsetDateTime created_at;
@Nullable OffsetDateTime closed_at;

Expand All @@ -51,6 +54,7 @@ public class IssueData implements GitHubRepository.IssueDataCached {
private List<ReviewData> reviewsList;
private List<ReferencedLink<GitHubCommit>> relatedCommits;
List<ReferencedLink<Integer>> relatedIssues;
private List<Integer> subIssues;

transient GitHubRepository repo;
private transient boolean frozen;
Expand Down Expand Up @@ -100,6 +104,16 @@ void setRelatedCommits(List<ReferencedLink<GitHubCommit>> commits) {
relatedCommits = commits;
}

/**
* Sets a list of sub-issues to this Issue.
*
* @param issues
* the list of issue numbers
*/
void setSubIssues(List<Integer> issues) {
subIssues = issues;
}

/**
* Sets a list of related Issues (rather their numbers) to this Issue
* from links containing just issues numbers.
Expand Down Expand Up @@ -276,6 +290,15 @@ public List<ReferencedLink<GitHubCommit>> getRelatedCommits() {
return relatedCommits;
}

/**
* Gets a List of all sub-issues that belong to the Issue.
*
* @return a List of sub-issues in form of a list containing their issue numbers
*/
public List<Integer> getSubIssues() {
return subIssues;
}

/**
* Gets a List of all Issues referenced in the Issue and its Comments.
*
Expand Down
Loading