diff --git a/crawler/src/main/java/edu/rit/se/nvip/crawler/github/PyPAGithubScraper.java b/crawler/src/main/java/edu/rit/se/nvip/crawler/github/PyPAGithubScraper.java index 6d1e8214c..e98e2ae5f 100644 --- a/crawler/src/main/java/edu/rit/se/nvip/crawler/github/PyPAGithubScraper.java +++ b/crawler/src/main/java/edu/rit/se/nvip/crawler/github/PyPAGithubScraper.java @@ -80,7 +80,7 @@ private HashMap extractCVEsFromVulns() { for (File file : files ) { try { - PyPAYamlFile parsedFile = new PyPAYamlFile(file); + PyPAYamlFile parsedFile = PyPAYamlFile.from(file); ArrayList cvesInFile = parsedFile.getCves(); for (String c : cvesInFile) { vulnMap.put(c, (new RawVulnerability( diff --git a/crawler/src/main/java/edu/rit/se/nvip/crawler/github/PyPAYamlFile.java b/crawler/src/main/java/edu/rit/se/nvip/crawler/github/PyPAYamlFile.java index f1acd3288..8ea8e2418 100644 --- a/crawler/src/main/java/edu/rit/se/nvip/crawler/github/PyPAYamlFile.java +++ b/crawler/src/main/java/edu/rit/se/nvip/crawler/github/PyPAYamlFile.java @@ -23,6 +23,7 @@ */ package edu.rit.se.nvip.crawler.github; +import lombok.Data; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.nd4j.shade.yaml.snakeyaml.Yaml; @@ -33,59 +34,59 @@ import java.nio.file.Files; import java.util.ArrayList; import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; +@Data public class PyPAYamlFile { // PYSEC ID found at top of file - private String id; + private final String id; // Vuln description - private String details; + private final String details; + + // Publish date + private final String published; + + // Last modified date + private final String modified; // Affected Array of Obj // TODO: proper access methods and types - private ArrayList affected; + //private final List affected; // Array of { type: String, url: String } objects // TODO: proper access methods - private ArrayList> references; + //private final ArrayList> references; // Array of vuln aliases (CVE IDs located in here) - private ArrayList aliases = new ArrayList<>(); + private final List aliases; - // Last modified date - private String modified; + private static final Logger logger = LogManager.getLogger(PyPAYamlFile.class.getSimpleName()); - // Publish date - private String published; + public static PyPAYamlFile from(File f) { - private final Logger logger = LogManager.getLogger(getClass().getSimpleName()); - public PyPAYamlFile(File f) { + Map data; try { InputStream inputStream = Files.newInputStream(f.toPath()); Yaml yaml = new Yaml(); - Map data = yaml.load(inputStream); - this.id = data.get("id").toString(); - this.details = data.get("details").toString(); - this.affected = (ArrayList) data.get("affected"); - this.references = (ArrayList>) data.get("references"); - this.aliases = data.get("aliases") == null ? new ArrayList<>() : (ArrayList) data.get("aliases"); - this.modified = data.get("modified").toString(); - this.published = data.get("published").toString(); - + data = yaml.load(inputStream); } catch (IOException fe) { logger.error("YAML Parser I/O exception for file: " + f.getName()); + return null; } - } - - public String getDetails() { return this.details; } - public String getModified() { return this.modified; } + String id = data.getOrDefault("id", "").toString(); + String details = data.getOrDefault("details", "").toString(); + String modified = data.getOrDefault("modified", "").toString(); + String published = data.getOrDefault("published", "").toString(); +// List affected = (ArrayList) data.get("affected"); +// ArrayList> references = (ArrayList>) data.get("references"); + List aliases = data.get("aliases") == null ? new ArrayList<>() : (ArrayList) data.get("aliases"); - public String getPublished() { return this.published; } - - public String getId() { return this.id; } + return new PyPAYamlFile(id, details, published, modified, aliases); + } /** * access aliases and search for any alias that contains a CVE id diff --git a/crawler/src/test/java/edu/rit/se/nvip/crawler/github/PyPaYamlFileTest.java b/crawler/src/test/java/edu/rit/se/nvip/crawler/github/PyPaYamlFileTest.java new file mode 100644 index 000000000..6709807d4 --- /dev/null +++ b/crawler/src/test/java/edu/rit/se/nvip/crawler/github/PyPaYamlFileTest.java @@ -0,0 +1,91 @@ +package edu.rit.se.nvip.crawler.github; + +import org.junit.jupiter.api.Test; + +import java.io.File; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.*; + + +public class PyPaYamlFileTest { + + private final Path pypaResources = Paths.get("src", "test", "resources", "crawler", "github", "pypa"); + + @Test + public void test_from_pysec_2023_173(){ + PyPAYamlFile expected = new PyPAYamlFile( + "PYSEC-2023-173", + "Piccolo is an ORM and query builder which supports asyncio. In versions 0.120.0" + + " and prior, the implementation of `BaseUser.login` leaks enough information to a" + + " malicious user such that they would be able to successfully generate a list of valid" + + " users on the platform. As Piccolo on its own does not also enforce strong passwords," + + " these lists of valid accounts are likely to be used in a password spray attack with" + + " the outcome being attempted takeover of user accounts on the platform. The impact" + + " of this vulnerability is minor as it requires chaining with other attack vectors" + + " in order to gain more then simply a list of valid users on the underlying platform." + + " The likelihood of this vulnerability is possible as it requires minimal skills to" + + " pull off, especially given the underlying login functionality for Piccolo based" + + " sites is open source. This issue has been patched in version 0.121.0.", + "Tue Sep 12 21:15:00 UTC 2023", + "Tue Sep 19 05:26:00 UTC 2023", + List.of("CVE-2023-41885", "GHSA-h7cm-mrvq-wcfr") + ); + + File pysec173Yaml = pypaResources.resolve(Paths.get("PYSEC-2023-173.yaml")).toFile(); + PyPAYamlFile actual = PyPAYamlFile.from(pysec173Yaml); + + assertEquals(expected, actual); + } + + @Test + public void test_from_pysec_2023_174(){ + PyPAYamlFile expected = new PyPAYamlFile( + "PYSEC-2023-174", + "imagecodecs versions before v2023.9.18 bundled libwebp binaries in wheels" + + " that are vulnerable to CVE-2023-4863. imagecodecs v2023.9.18 upgrades the bundled" + + " libwebp binary to v1.3.2.", + "", + "Wed Sep 20 05:12:42 UTC 2023", + List.of() + ); + + File pysec174Yaml = pypaResources.resolve(Paths.get("PYSEC-2023-174.yaml")).toFile(); + + PyPAYamlFile actual = PyPAYamlFile.from(pysec174Yaml); + + assertEquals(expected, actual); + } + + @Test + public void test_get_cves_with_no_cves_returns_empty_list(){ + List expected = List.of(); + + PyPAYamlFile pyPaFile = new PyPAYamlFile( + "", + "", + "", + "", + List.of() + ); + + assertEquals(expected, pyPaFile.getCves()); + } + + @Test + public void test_get_cves_returns_only_cves(){ + List expected = List.of("CVE-2023-41885"); + + PyPAYamlFile pyPaFile = new PyPAYamlFile( + "", + "", + "", + "", + List.of("CVE-2023-41885", "GHSA-h7cm-mrvq-wcfr") + ); + + assertEquals(expected, pyPaFile.getCves()); + } +}