Skip to content

Commit

Permalink
Merge pull request #8 from sansecio/wdg/vendor
Browse files Browse the repository at this point in the history
Convert indexing from MD5 to CRC32
  • Loading branch information
gwillem authored Mar 31, 2023
2 parents 230d829 + 4b712f9 commit 5bf6450
Show file tree
Hide file tree
Showing 23 changed files with 218 additions and 152 deletions.
11 changes: 11 additions & 0 deletions .composer/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"config": {
"allow-plugins": true
},
"repositories": {
"0": {
"type": "composer",
"url": "https://repo.magento.com/"
}
}
}
2 changes: 2 additions & 0 deletions .envrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
export COMPOSER_HOME=$PWD/.composer
export COMPOSER_IGNORE_PLATFORM_REQS=1
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
!.composer/config.json
.composer/*
.vscode
*.bin
/db
/build/*
22 changes: 13 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

![](https://buq.eu/screenshots/6595XfnX5wwUPzbFQGkU0GgN.png)

A forensic tool to quickly find unauthorized modifications in a Magento 1 or 2 code base. Corediff compares each line of code with a database of 1.7M legitimate code hashes and shows you the lines that have not been seen before. A bit like [@NYT_first_said](https://maxbittker.github.io/clear-pipes/).
A forensic tool to quickly find unauthorized modifications in an open source code base, such as Magento. Corediff compares each line of code with a database of 1.7M legitimate code hashes and shows you the lines that have not been seen before. A bit like [@NYT_first_said](https://maxbittker.github.io/clear-pipes/).

> _"Corediff saved us countless hours"_
Expand Down Expand Up @@ -41,25 +41,29 @@ Use our binary package (available for Linux & Mac, arm64 & amd64)
osarch=$(uname -sm | tr 'LD ' 'ld-')
curl https://sansec.io/downloads/$osarch/corediff -O
chmod 755 corediff
./corediff <magento_path> | less -SR
./corediff <store-path> | less -SR
```

Or compile from source (requires Go 1.13+):
Or compile from source (requires recent Go version):

```sh
git clone https://github.com/sansecio/magento-corediff.git
cd magento-corediff
go run . <magento_path>
git clone https://github.com/sansecio/corediff.git
cd corediff
go run . <store-path>
```

At the first run, `corediff` will automatically download the Sansec hash database (~26MB).
At the first run, `corediff` will automatically download the Sansec hash database.

# Community contributed datasets

[@fros_it](https://twitter.com/fros_it) has kindly contributed hashes for his collection of Magento Connect extensions, including all available historical copies. Download the [extension hash database](https://api.sansec.io/downloads/corediff-db/m1ext.db) here (62MB) and use it like this:
[@fros_it](https://twitter.com/fros_it) has kindly contributed hashes for his collection of Magento Connect extensions, including all available historical copies. Download the [extension hash database](https://sansec.io/downloads/corediff-db/m1ext.db) here (62MB) and use it like this:

![](https://buq.eu/screenshots/RXdQ1Mmg5KliivMtK6DlHTcP.png)

# Todo

- [ ] Compression of hash db? Eg https://github.com/Smerity/govarint, https://github.com/bits-and-blooms/bloom

# Contributing

Adding or maintaining hashes?
Expand All @@ -80,4 +84,4 @@ Contributions welcome! Naturally, we only accept hashes from trusted sources. [C

Sansec's flagship software [eComscan](https://sansec.io/?corediff) is used by ecommerce agencies, law enforcement and PCI forensic investigators. We are proud to open source many of our internal tools and hope that it will benefit our partners and customers. Malware contributions welcome.

(C) 2022 [Sansec BV](https://sansec.io/?corediff) // info@sansec.io
(C) 2023 [Sansec BV](https://sansec.io/?corediff) // info@sansec.io
49 changes: 27 additions & 22 deletions corediff.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,46 +2,48 @@ package main

import (
"bufio"
"encoding/binary"
"fmt"
"io"
"log"
"os"
"path/filepath"
)

func loadDB(path string) hashDB {
var placeholder = struct{}{}

func loadDB(path string) hashDB {
m := make(hashDB)

f, err := os.Open(path)
if os.IsNotExist(err) {
return m
} else if err != nil {
log.Fatal(err)
}
check(err)
defer f.Close()
reader := bufio.NewReader(f)
for {
b := make([]byte, 16)
n, err := reader.Read(b)
if n == 0 {
var b uint32
err = binary.Read(reader, binary.LittleEndian, &b)
if err == io.EOF {
break
} else if err != nil {
log.Fatal(err)
}
check(err)
var b2 [16]byte
copy(b2[:], b) // need to convert to array first
m[b2] = true
m[b] = placeholder
}
return m
}

func saveDB(path string, db hashDB) {
f, err := os.Create(path)
if err != nil {
log.Fatal(err)
}
defer f.Close()
check(err)
for k := range db {
n, err := f.Write(k[:])
check(err)
if n != 16 {
log.Fatal("Wrote unexpected number of bytes?")
if err := binary.Write(f, binary.LittleEndian, k); err != nil {
log.Fatal(err)
}
}
}
Expand All @@ -60,10 +62,10 @@ func parseFile(path, relPath string, db hashDB, updateDB bool) (hits []int, line
copy(l, x)
lines = append(lines, l)
h := hash(normalizeLine(l))
if !db[h] {
if _, ok := db[h]; !ok {
hits = append(hits, i)
if updateDB {
db[h] = true
db[h] = placeholder
}
}
}
Expand Down Expand Up @@ -103,7 +105,11 @@ func checkPath(root string, db hashDB, args *baseArgs) *walkStats {

// Only do path checking for non-root elts
if path != root && !args.IgnorePaths {
if !db[pathHash(relPath)] {

_, foundInDb := db[pathHash(relPath)]
shouldExclude := pathIsExcluded(relPath)

if !foundInDb || shouldExclude {
stats.filesCustomCode++
logVerbose(grey(" ? ", relPath))
return nil
Expand Down Expand Up @@ -163,7 +169,7 @@ func addPath(root string, db hashDB, args *baseArgs) {
// If relPath has valid ext, add hash of "path:<relPath>" to db
// Never add root path (possibly file)
if !args.IgnorePaths && path != root && !pathIsExcluded(relPath) {
db[pathHash(relPath)] = true
db[pathHash(relPath)] = placeholder
}

hits, _ := parseFile(path, relPath, db, true)
Expand All @@ -184,15 +190,15 @@ func main() {
args := setup()
db := loadDB(args.Database)

logInfo(boldwhite("\nMagento Corediff loaded ", len(db), " precomputed hashes. (C) 2020-2022 labs@sansec.io"))
logInfo(boldwhite("Corediff loaded ", len(db), " precomputed hashes. (C) 2020-2023 labs@sansec.io"))
logInfo("Using database:", args.Database, "\n")

if args.Merge {
for _, p := range args.Path.Path {
db2 := loadDB(p)
logInfo("Merging", filepath.Base(p), "with", len(db2), "entries ..")
for k := range db2 {
db[k] = true
db[k] = placeholder
}
}
logInfo("Saving", args.Database, "with a total of", len(db), "entries.")
Expand Down Expand Up @@ -221,5 +227,4 @@ func main() {
logInfo(" - Files without code :", stats.filesNoCode)
}
}
logInfo()
}
55 changes: 18 additions & 37 deletions corediff_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,47 +5,50 @@ import (
"fmt"
"log"
"os"
"reflect"
"testing"

"github.com/stretchr/testify/assert"
)

func digest(b [16]byte) string {
func digest(b uint32) string {
return fmt.Sprintf("%x", b)
}

func Test_parseFile(t *testing.T) {
hits, lines := parseFile("fixture/odd-encoding.js", "n/a", hashDB{}, false)
fmt.Println("succeeded", len(hits), len(lines))
hdb := hashDB{}
updateDB := true
hits, lines := parseFile("fixture/docroot/odd-encoding.js", "n/a", hdb, updateDB)
assert.Equal(t, 220, len(hdb))
assert.Equal(t, 220, len(hits))
assert.Equal(t, 471, len(lines))
}

func Test_hash(t *testing.T) {
tests := []struct {
args []byte
want string
}{
{
[]byte("banaan"),
"31d674be46e1ba6b54388a671c09accb",
},
{[]byte("banaan"), "14ac6691"},
}
for _, tt := range tests {
t.Run(string(tt.args), func(t *testing.T) {
if got := digest(hash(tt.args)); !reflect.DeepEqual(got, tt.want) {
if got := digest(hash(tt.args)); got != tt.want {
t.Errorf("hash() = %x (%v), want %x", got, got, tt.want)
}
})
}
}

func Test_vendor_bug(t *testing.T) {
db := loadDB("m233.db")
h := [16]byte{145, 49, 107, 134, 191, 186, 29, 135, 27, 49, 110, 122, 36, 242, 133, 65}
fmt.Println("hash is", h)
fmt.Println("hash in db:", db[h])

db := loadDB("fixture/sample.db")
assert.Len(t, db, 238)
wantHash := uint32(3333369281)
if _, ok := db[wantHash]; !ok {
t.Error("hash not in db")
}
}
func Test_Corruption(t *testing.T) {
fh, _ := os.Open("fixture/sample")
fh, _ := os.Open("fixture/docroot/sample")
defer fh.Close()

lines := [][]byte{}
Expand All @@ -54,33 +57,11 @@ func Test_Corruption(t *testing.T) {
for scanner.Scan() {
x := scanner.Bytes()
l := make([]byte, len(x))
// Need to copy, underlying Scan array may change later
copy(l, x)
fmt.Printf("%s\n", l)
lines = append(lines, l)
}
if err := scanner.Err(); err != nil {
log.Fatal(err)
}

fmt.Println("Scanning completed, lines:", len(lines))

for _, l := range lines {
fmt.Printf("%s\n", l)
}
}

func Test_NoFileSource(t *testing.T) {
lines := [][]byte{}

for i := 0; i < 70; i++ {
line := fmt.Sprintf("LINE %3d =======================================================", i)
lines = append(lines, []byte(line))
}

fmt.Println("Scanning completed, lines:", len(lines))

for _, l := range lines {
fmt.Printf("%s\n", l)
}
}
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Binary file added fixture/sample.db
Binary file not shown.
7 changes: 6 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
@@ -1,17 +1,22 @@
module github.com/gwillem/magento-corediff

go 1.17
go 1.20

require (
github.com/cespare/xxhash/v2 v2.2.0
github.com/fatih/color v1.12.0
github.com/gobwas/glob v0.2.3
github.com/gwillem/urlfilecache v0.0.0-20201121123616-8e4f7e58333d
github.com/jessevdk/go-flags v1.5.0
github.com/stretchr/testify v1.6.1
)

require (
github.com/adrg/xdg v0.2.3 // indirect
github.com/davecgh/go-spew v1.1.0 // indirect
github.com/mattn/go-colorable v0.1.8 // indirect
github.com/mattn/go-isatty v0.0.12 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
golang.org/x/sys v0.0.0-20210320140829-1e4c9ba3b0c4 // indirect
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c // indirect
)
3 changes: 3 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
github.com/adrg/xdg v0.2.3 h1:GxXngdYxNDkoUvZXjNJGwqZxWXi43MKbOOlA/00qZi4=
github.com/adrg/xdg v0.2.3/go.mod h1:7I2hH/IT30IsupOpKZ5ue7/qNi3CoKzD6tL3HwpaRMQ=
github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44=
github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/fatih/color v1.12.0 h1:mRhaKNwANqRgUBGKmnI5ZxEk7QXmjQeCcuYFMX2bfcc=
Expand All @@ -23,6 +25,7 @@ golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210320140829-1e4c9ba3b0c4 h1:EZ2mChiOa8udjfp6rRmswTbtZN/QzUQp4ptM4rnjHvc=
golang.org/x/sys v0.0.0-20210320140829-1e4c9ba3b0c4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
8 changes: 4 additions & 4 deletions helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ package main

import (
"bytes"
"crypto/md5"
"fmt"
"hash/crc32"
"os"
"path/filepath"
"regexp"
Expand Down Expand Up @@ -78,11 +78,11 @@ func check(err error) {
}
}

func hash(b []byte) [16]byte {
return md5.Sum(b)
func hash(b []byte) uint32 {
return crc32.ChecksumIEEE(b)
}

func pathHash(p string) [16]byte {
func pathHash(p string) uint32 {
return hash([]byte("path:" + p))
}

Expand Down
30 changes: 0 additions & 30 deletions import-magento.sh

This file was deleted.

Loading

0 comments on commit 5bf6450

Please sign in to comment.