From 7ed7223115b75d79aa6be2ff425ef6d165d5091a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20L=20F=20S=20Bacci?= Date: Mon, 18 Nov 2024 07:33:20 -0300 Subject: [PATCH] Rewrite git log parser (precise parsing) --- scripts/translation/genrevdb.php | 2 +- scripts/translation/lib/GitDiffParser.php | 48 -------- scripts/translation/lib/GitLogParser.php | 136 ++++++++++++++++++++++ scripts/translation/lib/RevcheckRun.php | 6 +- scripts/translation/lib/RevtagParser.php | 2 +- scripts/translation/lib/all.php | 2 +- 6 files changed, 142 insertions(+), 54 deletions(-) delete mode 100644 scripts/translation/lib/GitDiffParser.php diff --git a/scripts/translation/genrevdb.php b/scripts/translation/genrevdb.php index aa87650a6..b2e4a3e4b 100644 --- a/scripts/translation/genrevdb.php +++ b/scripts/translation/genrevdb.php @@ -112,8 +112,8 @@ function generate( SQLite3 $db , string $lang ) } catch ( Exception $e ) { - $db->exec( 'ROLLBACK TRANSACTION' ); consolelog( "Throw: " . $e->getMessage() ); + $db->exec( 'ROLLBACK TRANSACTION' ); exit; } } diff --git a/scripts/translation/lib/GitDiffParser.php b/scripts/translation/lib/GitDiffParser.php deleted file mode 100644 index 169c13c16..000000000 --- a/scripts/translation/lib/GitDiffParser.php +++ /dev/null @@ -1,48 +0,0 @@ - | - * +----------------------------------------------------------------------+ - * | Description: Parse `git diff` to complement file state. | - * +----------------------------------------------------------------------+ - */ - -require_once __DIR__ . '/all.php'; - -class GitDiffParser -{ - public static function parseAddsDels( string $chdir , RevcheckDataFile $file ) - { - $cwd = getcwd(); - chdir( $chdir ); - - $hash = $file->hashRvtg; - $name = $file->path == "" ? $file->name : $file->path . "/" . $file->name; - - $hash = escapeshellarg( $hash ); - $name = escapeshellarg( $name ); - - $output = `git diff --numstat $hash -- $name`; - if ( $output ) - { - preg_match( '/(\d+)\s+(\d+)/' , $output , $matches ); - if ( $matches ) - { - $file->adds = $matches[1]; - $file->dels = $matches[2]; - } - } - - chdir( $cwd ); - } -} diff --git a/scripts/translation/lib/GitLogParser.php b/scripts/translation/lib/GitLogParser.php index 8c3d841c9..0ad45b8ac 100644 --- a/scripts/translation/lib/GitLogParser.php +++ b/scripts/translation/lib/GitLogParser.php @@ -89,4 +89,140 @@ static function parseInto( string $lang , RevcheckFileList & $list ) pclose( $fp ); } + + static function parseDir( string $gdir , RevcheckFileList $list ) + { + $gdir = escapeshellarg( $gdir ); + $proc = new GitLogParserProc( "git -C $gdir log --name-only" ); + + $hash = ""; + $date = ""; + $skip = false; + $lcnt = 0; + + while ( $proc->live ) + { + // Hash + + if ( str_starts_with( $proc->line , "commit " ) ) + { + $hash = trim( substr( $proc->line , 7 ) ); + $date = ""; + $skip = false; + $lcnt = 0; + $proc->next(); + } + else + throw new Exception( "Expected commit hash." ); + + // Headers + + while ( $proc->live && strlen( trim( $proc->line ) ) > 0 ) + { + // Date + if ( str_starts_with( $proc->line , 'Date:' ) ) + { + $line = trim( substr( $proc->line , 5 ) ); + $date = strtotime( $line ); + $proc->next(); + continue; + } + // Other headers + if ( $proc->line[0] != ' ' && strpos( $proc->line , ':' ) > 0 ) + { + $proc->next(); + continue; + } + break; + } + + $proc->skip(); // Empty Line + + // Message + + while ( $proc->live && str_starts_with( $proc->line , ' ' ) ) + { + if ( LOOSE_SKIP_REVCHECK ) // https://github.com/php/doc-base/pull/132 + { + // Messages that contains [skip-revcheck] flags entire commit as ignored. + if ( str_contains( $proc->line , '[skip-revcheck]' ) ) + $skip = true; + } + else + { + // Messages that start with [skip-revcheck] flags entire commit as ignored. + $lcnt++; + if ( $lcnt == 1 && str_starts_with( trim( $line ) , '[skip-revcheck]' ) ) + $skip = true; + } + $proc->next(); + } + + $proc->skip(); // Empty Line + + // Merge commits and empty files commits + + // Merge commits are not followed with file listings. + // Some normal commits also not have file listings + // (see b73609198d4606621f57e165efc457f30e403217). + + if ( str_starts_with( $proc->line , "commit " ) ) + continue; + + // Files + + while ( $proc->live && strlen( trim( $proc->line ) ) > 0 ) + { + $file = $list->get( trim( $proc->line ) ); + + if ( $file != null ) + $file->addGitLogData( $hash , $date , $skip ); + + $proc->next(); + } + + $proc->skip(); // Empty Line + } + } } + +class GitLogParserProc +{ + public bool $live; + public string $line; + private $proc = null; + + function __construct( string $command ) + { + $this->proc = popen( $command , "r" ); + $this->live = true; + $this->next(); + } + + function next() + { + if ( $this->proc == null ) + return; + + $ret = fgets( $this->proc ); + if ( $ret === false ) + $this->stop(); + else + $this->line = $ret; + } + + function skip() + { + if ( trim( $this->line ) != "" ) + throw new Exception( "Skipping non-blank line." ); + $this->next(); + } + + function stop() + { + pclose( $this->proc ); + $this->live = false; + $this->line = ""; + $this->proc = null; + } +} \ No newline at end of file diff --git a/scripts/translation/lib/RevcheckRun.php b/scripts/translation/lib/RevcheckRun.php index b39d08b8a..59e412be4 100644 --- a/scripts/translation/lib/RevcheckRun.php +++ b/scripts/translation/lib/RevcheckRun.php @@ -48,10 +48,10 @@ function __construct( string $sourceDir , string $targetDir , bool $writeResults $this->targetFiles = new RevcheckFileList( $targetDir ); // Source files get info from version control - GitLogParser::parseInto( $sourceDir , $this->sourceFiles ); + GitLogParser::parseDir( $sourceDir , $this->sourceFiles ); // Target files get info from revtags - RevtagParser::parseInto( $targetDir , $this->targetFiles ); + RevtagParser::parseDir( $targetDir , $this->targetFiles ); // match and mix $this->parseTranslationXml(); @@ -197,7 +197,7 @@ private function addData( RevcheckFileItem $info , RevtagInfo|null $revtag = nul case RevcheckStatus::TranslatedOld: case RevcheckStatus::TranslatedWip: $this->slowPathCount++; - GitDiffParser::parseAddsDels( $this->sourceDir , $file ); + GitSlowUtils::parseAddsDels( $this->sourceDir , $file ); } } } diff --git a/scripts/translation/lib/RevtagParser.php b/scripts/translation/lib/RevtagParser.php index cae5eada0..61397fa52 100644 --- a/scripts/translation/lib/RevtagParser.php +++ b/scripts/translation/lib/RevtagParser.php @@ -30,7 +30,7 @@ class RevtagInfo class RevtagParser { - static function parseInto( string $lang , RevcheckFileList & $list ) + static function parseDir( string $lang , RevcheckFileList $list ) { foreach( $list->iterator() as $entry ) $entry->revtag = RevtagParser::parseFile( $lang . '/' . $entry->file ); diff --git a/scripts/translation/lib/all.php b/scripts/translation/lib/all.php index 85b251f97..843c046bb 100644 --- a/scripts/translation/lib/all.php +++ b/scripts/translation/lib/all.php @@ -24,8 +24,8 @@ require_once __DIR__ . '/backport.php'; require_once __DIR__ . '/CacheFile.php'; require_once __DIR__ . '/CacheUtil.php'; -require_once __DIR__ . '/GitDiffParser.php'; require_once __DIR__ . '/GitLogParser.php'; +require_once __DIR__ . '/GitSlowUtils.php'; require_once __DIR__ . '/OutputIgnoreArgv.php'; require_once __DIR__ . '/OutputIgnoreBuffer.php'; require_once __DIR__ . '/QaFileInfo.php';