Skip to content

Commit

Permalink
Rewrite and document git log parsing (#180)
Browse files Browse the repository at this point in the history
  • Loading branch information
alfsb authored Nov 25, 2024
1 parent ed660d4 commit df44b2d
Show file tree
Hide file tree
Showing 7 changed files with 245 additions and 54 deletions.
2 changes: 1 addition & 1 deletion scripts/translation/genrevdb.php
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,8 @@ function generate( SQLite3 $db , string $lang )
}
catch ( Exception $e )
{
$db->exec( 'ROLLBACK TRANSACTION' );
consolelog( "Throw: " . $e->getMessage() );
$db->exec( 'ROLLBACK TRANSACTION' );
exit;
}
}
Expand Down
48 changes: 0 additions & 48 deletions scripts/translation/lib/GitDiffParser.php

This file was deleted.

136 changes: 136 additions & 0 deletions scripts/translation/lib/GitLogParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -89,4 +89,140 @@ static function parseInto( string $lang , RevcheckFileList & $list )

pclose( $fp );
}

static function parseDir( string $gdir , RevcheckFileList $list )
{
$gdir = escapeshellarg( $gdir );
$proc = new GitLogParserProc( "git -C $gdir log --name-only" );

$hash = "";
$date = "";
$skip = false;
$lcnt = 0;

while ( $proc->live )
{
// Hash

if ( str_starts_with( $proc->line , "commit " ) )
{
$hash = trim( substr( $proc->line , 7 ) );
$date = "";
$skip = false;
$lcnt = 0;
$proc->next();
}
else
throw new Exception( "Expected commit hash." );

// Headers

while ( $proc->live && strlen( trim( $proc->line ) ) > 0 )
{
// Date
if ( str_starts_with( $proc->line , 'Date:' ) )
{
$line = trim( substr( $proc->line , 5 ) );
$date = strtotime( $line );
$proc->next();
continue;
}
// Other headers
if ( $proc->line[0] != ' ' && strpos( $proc->line , ':' ) > 0 )
{
$proc->next();
continue;
}
break;
}

$proc->skip(); // Empty Line

// Message

while ( $proc->live && str_starts_with( $proc->line , ' ' ) )
{
if ( LOOSE_SKIP_REVCHECK ) // https://github.com/php/doc-base/pull/132
{
// Messages that contains [skip-revcheck] flags entire commit as ignored.
if ( str_contains( $proc->line , '[skip-revcheck]' ) )
$skip = true;
}
else
{
// Messages that start with [skip-revcheck] flags entire commit as ignored.
$lcnt++;
if ( $lcnt == 1 && str_starts_with( trim( $line ) , '[skip-revcheck]' ) )
$skip = true;
}
$proc->next();
}

$proc->skip(); // Empty Line

// Merge commits and empty files commits

// Merge commits are not followed with file listings.
// Some normal commits also not have file listings
// (see b73609198d4606621f57e165efc457f30e403217).

if ( str_starts_with( $proc->line , "commit " ) )
continue;

// Files

while ( $proc->live && strlen( trim( $proc->line ) ) > 0 )
{
$file = $list->get( trim( $proc->line ) );

if ( $file != null )
$file->addGitLogData( $hash , $date , $skip );

$proc->next();
}

$proc->skip(); // Empty Line
}
}
}

class GitLogParserProc
{
public bool $live;
public string $line;
private $proc = null;

function __construct( string $command )
{
$this->proc = popen( $command , "r" );
$this->live = true;
$this->next();
}

function next()
{
if ( $this->proc == null )
return;

$ret = fgets( $this->proc );
if ( $ret === false )
$this->stop();
else
$this->line = $ret;
}

function skip()
{
if ( trim( $this->line ) != "" )
throw new Exception( "Skipping non-blank line." );
$this->next();
}

function stop()
{
pclose( $this->proc );
$this->live = false;
$this->line = "";
$this->proc = null;
}
}
103 changes: 103 additions & 0 deletions scripts/translation/lib/GitSlowUtils.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
<?php
# +----------------------------------------------------------------------+
# | Copyright (c) 1997-2024 The PHP Group |
# +----------------------------------------------------------------------+
# | This source file is subject to version 3.01 of the PHP license, |
# | that is bundled with this package in the file LICENSE, and is |
# | available through the world-wide-web at the following url: |
# | https://www.php.net/license/3_01.txt. |
# | If you did not receive a copy of the PHP license and are unable to |
# | obtain it through the world-wide-web, please send a note to |
# | license@php.net, so we can mail you a copy immediately. |
# +----------------------------------------------------------------------+
# | Authors: André L F S Bacci <ae php.net> |
# +----------------------------------------------------------------------+
# | Description: Common functions that interact with git command line. |
# +----------------------------------------------------------------------+

require_once __DIR__ . '/all.php';

class GitSlowUtils
{
public static function checkDiffOnlyWsChange( string $gdir , RevcheckDataFile $file ) : bool
{
$hash = $file->hashRvtg;
$flnm = $file->path == "" ? $file->name : $file->path . "/" . $file->name;

$gdir = escapeshellarg( $gdir );
$flnm = escapeshellarg( $flnm );
$hash = escapeshellarg( $hash );

$func = '[' . __CLASS__ . ':' . __FUNCTION__ . ']';

// Fast path

// The git -b option is a bit misleading. It will ignore ws change
// on existing ws runs, but will report insertion or remotion of
// ws runs. This suffices for detecting significant ws changes and
// also ignoring insignificant ws changes in most cases we are
// interessed.

$output = `git -C $gdir diff -b $hash -- $flnm`;
$onlyws = $output == "";

// Slow path

if ( $onlyws )
{
$prev = `git -C $gdir show $hash:$flnm )`;
$next = `git -C $gdir show HEAD:$flnm )`;

if ( $prev == "" || $next == "" )
{
fprintf( STDERR , "$func Failed to read file contents.\n" );
return $onlyws;
}

$prev = GitUtils::discardPrefixSuffixEmptyWs( $prev );
$next = GitUtils::discardPrefixSuffixEmptyWs( $next );

if ( $prev != $next )
{
// Not really an error, but a theory. Report this bug/issue
// to start a discussion if this ws change must be ignored
// or tracked.

fprintf( STDERR , "$func Debug: Fast and slow path differ.\n" );
return false;
}
}

return $onlyws;
}

private static function discardPrefixSuffixEmptyWs( string $text ) : string
{
$lines = explode( "\n" , $text );
$trimLines = [];
foreach ( $lines as $line )
$trimLines[] = trim( $line );
return implode( "" , $trimLines );
}

public static function parseAddsDels( string $gdir , RevcheckDataFile $file )
{
$hash = $file->hashRvtg;
$name = $file->path == "" ? $file->name : $file->path . "/" . $file->name;

$gdir = escapeshellarg( $gdir );
$hash = escapeshellarg( $hash );
$name = escapeshellarg( $name );

$output = `git -C $gdir diff --numstat $hash -- $name`;
if ( $output )
{
preg_match( '/(\d+)\s+(\d+)/' , $output , $matches );
if ( $matches )
{
$file->adds = $matches[1];
$file->dels = $matches[2];
}
}
}
}
6 changes: 3 additions & 3 deletions scripts/translation/lib/RevcheckRun.php
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,10 @@ function __construct( string $sourceDir , string $targetDir , bool $writeResults
$this->targetFiles = new RevcheckFileList( $targetDir );

// Source files get info from version control
GitLogParser::parseInto( $sourceDir , $this->sourceFiles );
GitLogParser::parseDir( $sourceDir , $this->sourceFiles );

// Target files get info from revtags
RevtagParser::parseInto( $targetDir , $this->targetFiles );
RevtagParser::parseDir( $targetDir , $this->targetFiles );

// match and mix
$this->parseTranslationXml();
Expand Down Expand Up @@ -197,7 +197,7 @@ private function addData( RevcheckFileItem $info , RevtagInfo|null $revtag = nul
case RevcheckStatus::TranslatedOld:
case RevcheckStatus::TranslatedWip:
$this->slowPathCount++;
GitDiffParser::parseAddsDels( $this->sourceDir , $file );
GitSlowUtils::parseAddsDels( $this->sourceDir , $file );
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion scripts/translation/lib/RevtagParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class RevtagInfo

class RevtagParser
{
static function parseInto( string $lang , RevcheckFileList & $list )
static function parseDir( string $lang , RevcheckFileList $list )
{
foreach( $list->iterator() as $entry )
$entry->revtag = RevtagParser::parseFile( $lang . '/' . $entry->file );
Expand Down
2 changes: 1 addition & 1 deletion scripts/translation/lib/all.php
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
require_once __DIR__ . '/backport.php';
require_once __DIR__ . '/CacheFile.php';
require_once __DIR__ . '/CacheUtil.php';
require_once __DIR__ . '/GitDiffParser.php';
require_once __DIR__ . '/GitLogParser.php';
require_once __DIR__ . '/GitSlowUtils.php';
require_once __DIR__ . '/OutputIgnoreArgv.php';
require_once __DIR__ . '/OutputIgnoreBuffer.php';
require_once __DIR__ . '/QaFileInfo.php';
Expand Down

0 comments on commit df44b2d

Please sign in to comment.