Skip to content

Commit

Permalink
Refactored PyPaYamlFile
Browse files Browse the repository at this point in the history
- Moved initialization into a static from method
  - This separates how the object is created from what medium it can be created from
- Changed get's to getOrDefaults
  - This allows nullpointers to be eaten and replaced with a default value
- Removed affected and references
  - These are not used outside of parsing, so they should be removed
  - If they are needed in the future, we should parse them into proper objects, not the generic Object
- Added tests to support the refactor
  • Loading branch information
ctevse committed Sep 22, 2023
1 parent d685990 commit 22e63c7
Show file tree
Hide file tree
Showing 3 changed files with 120 additions and 28 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ private HashMap<String, RawVulnerability> extractCVEsFromVulns() {
for (File file : files ) {

try {
PyPAYamlFile parsedFile = new PyPAYamlFile(file);
PyPAYamlFile parsedFile = PyPAYamlFile.from(file);
ArrayList<String> cvesInFile = parsedFile.getCves();
for (String c : cvesInFile) {
vulnMap.put(c, (new RawVulnerability(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
*/
package edu.rit.se.nvip.crawler.github;

import lombok.Data;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.nd4j.shade.yaml.snakeyaml.Yaml;
Expand All @@ -33,59 +34,59 @@
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;

@Data
public class PyPAYamlFile {

// PYSEC ID found at top of file
private String id;
private final String id;

// Vuln description
private String details;
private final String details;

// Publish date
private final String published;

// Last modified date
private final String modified;

// Affected Array of Obj
// TODO: proper access methods and types
private ArrayList<Object> affected;
//private final List<Object> affected;

// Array of { type: String, url: String } objects
// TODO: proper access methods
private ArrayList<LinkedHashMap<String, String>> references;
//private final ArrayList<LinkedHashMap<String, String>> references;

// Array of vuln aliases (CVE IDs located in here)
private ArrayList<String> aliases = new ArrayList<>();
private final List<String> aliases;

// Last modified date
private String modified;
private static final Logger logger = LogManager.getLogger(PyPAYamlFile.class.getSimpleName());

// Publish date
private String published;
public static PyPAYamlFile from(File f) {

private final Logger logger = LogManager.getLogger(getClass().getSimpleName());
public PyPAYamlFile(File f) {
Map<String, Object> data;
try {
InputStream inputStream = Files.newInputStream(f.toPath());
Yaml yaml = new Yaml();
Map<String, Object> data = yaml.load(inputStream);
this.id = data.get("id").toString();
this.details = data.get("details").toString();
this.affected = (ArrayList<Object>) data.get("affected");
this.references = (ArrayList<LinkedHashMap<String, String>>) data.get("references");
this.aliases = data.get("aliases") == null ? new ArrayList<>() : (ArrayList<String>) data.get("aliases");
this.modified = data.get("modified").toString();
this.published = data.get("published").toString();

data = yaml.load(inputStream);
} catch (IOException fe) {
logger.error("YAML Parser I/O exception for file: " + f.getName());
return null;
}
}

public String getDetails() { return this.details; }

public String getModified() { return this.modified; }
String id = data.getOrDefault("id", "").toString();
String details = data.getOrDefault("details", "").toString();
String modified = data.getOrDefault("modified", "").toString();
String published = data.getOrDefault("published", "").toString();
// List<Object> affected = (ArrayList<Object>) data.get("affected");
// ArrayList<LinkedHashMap<String, String>> references = (ArrayList<LinkedHashMap<String, String>>) data.get("references");
List<String> aliases = data.get("aliases") == null ? new ArrayList<>() : (ArrayList<String>) data.get("aliases");

public String getPublished() { return this.published; }

public String getId() { return this.id; }
return new PyPAYamlFile(id, details, published, modified, aliases);
}

/**
* access aliases and search for any alias that contains a CVE id
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
package edu.rit.se.nvip.crawler.github;

import org.junit.jupiter.api.Test;

import java.io.File;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;

import static org.junit.jupiter.api.Assertions.*;


public class PyPaYamlFileTest {

private final Path pypaResources = Paths.get("src", "test", "resources", "crawler", "github", "pypa");

@Test
public void test_from_pysec_2023_173(){
PyPAYamlFile expected = new PyPAYamlFile(
"PYSEC-2023-173",
"Piccolo is an ORM and query builder which supports asyncio. In versions 0.120.0" +
" and prior, the implementation of `BaseUser.login` leaks enough information to a" +
" malicious user such that they would be able to successfully generate a list of valid" +
" users on the platform. As Piccolo on its own does not also enforce strong passwords," +
" these lists of valid accounts are likely to be used in a password spray attack with" +
" the outcome being attempted takeover of user accounts on the platform. The impact" +
" of this vulnerability is minor as it requires chaining with other attack vectors" +
" in order to gain more then simply a list of valid users on the underlying platform." +
" The likelihood of this vulnerability is possible as it requires minimal skills to" +
" pull off, especially given the underlying login functionality for Piccolo based" +
" sites is open source. This issue has been patched in version 0.121.0.",
"Tue Sep 12 21:15:00 UTC 2023",
"Tue Sep 19 05:26:00 UTC 2023",
List.of("CVE-2023-41885", "GHSA-h7cm-mrvq-wcfr")
);

File pysec173Yaml = pypaResources.resolve(Paths.get("PYSEC-2023-173.yaml")).toFile();
PyPAYamlFile actual = PyPAYamlFile.from(pysec173Yaml);

assertEquals(expected, actual);
}

@Test
public void test_from_pysec_2023_174(){
PyPAYamlFile expected = new PyPAYamlFile(
"PYSEC-2023-174",
"imagecodecs versions before v2023.9.18 bundled libwebp binaries in wheels" +
" that are vulnerable to CVE-2023-4863. imagecodecs v2023.9.18 upgrades the bundled" +
" libwebp binary to v1.3.2.",
"",
"Wed Sep 20 05:12:42 UTC 2023",
List.of()
);

File pysec174Yaml = pypaResources.resolve(Paths.get("PYSEC-2023-174.yaml")).toFile();

PyPAYamlFile actual = PyPAYamlFile.from(pysec174Yaml);

assertEquals(expected, actual);
}

@Test
public void test_get_cves_with_no_cves_returns_empty_list(){
List<String> expected = List.of();

PyPAYamlFile pyPaFile = new PyPAYamlFile(
"",
"",
"",
"",
List.of()
);

assertEquals(expected, pyPaFile.getCves());
}

@Test
public void test_get_cves_returns_only_cves(){
List<String> expected = List.of("CVE-2023-41885");

PyPAYamlFile pyPaFile = new PyPAYamlFile(
"",
"",
"",
"",
List.of("CVE-2023-41885", "GHSA-h7cm-mrvq-wcfr")
);

assertEquals(expected, pyPaFile.getCves());
}
}

0 comments on commit 22e63c7

Please sign in to comment.