From ac5287b5977efffe3623e9dcfe9522cec4525190 Mon Sep 17 00:00:00 2001 From: Patrick Beuks Date: Tue, 23 Apr 2024 21:41:38 +0200 Subject: [PATCH 1/2] Detect binary file by NULL byte --- src/Gitonomy/Git/Blob.php | 13 ++++++++++++- tests/Gitonomy/Git/Tests/BlobTest.php | 17 +++++++++++++---- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/src/Gitonomy/Git/Blob.php b/src/Gitonomy/Git/Blob.php index e455fe2..3043b04 100644 --- a/src/Gitonomy/Git/Blob.php +++ b/src/Gitonomy/Git/Blob.php @@ -19,6 +19,8 @@ */ class Blob { + private const FIRST_FEW_BYTES = 8000; + /** * @var Repository */ @@ -39,6 +41,11 @@ class Blob */ protected $mimetype; + /** + * @var bool + */ + protected $text; + /** * @param Repository $repository Repository where the blob is located * @param string $hash Hash of the blob @@ -103,6 +110,10 @@ public function isBinary() */ public function isText() { - return (bool) preg_match('#^text/|^application/xml#', $this->getMimetype()); + if (null === $this->text) { + $this->text = !str_contains(substr($this->getContent(), 0, self::FIRST_FEW_BYTES), chr(0)); + } + + return $this->text; } } diff --git a/tests/Gitonomy/Git/Tests/BlobTest.php b/tests/Gitonomy/Git/Tests/BlobTest.php index 4b28862..431a0a4 100644 --- a/tests/Gitonomy/Git/Tests/BlobTest.php +++ b/tests/Gitonomy/Git/Tests/BlobTest.php @@ -23,6 +23,11 @@ public function getReadmeBlob($repository) return $repository->getCommit(self::LONGFILE_COMMIT)->getTree()->resolvePath('README.md'); } + public function getImageBlob($repository) + { + return $repository->getCommit(self::LONGFILE_COMMIT)->getTree()->resolvePath('image.jpg'); + } + /** * @dataProvider provideFoobar */ @@ -67,8 +72,10 @@ public function testGetMimetype($repository) */ public function testIsText($repository) { - $blob = $this->getReadmeBlob($repository); - $this->assertTrue($blob->isText()); + $readmeBlob = $this->getReadmeBlob($repository); + $this->assertTrue($readmeBlob->isText()); + $imageBlob = $this->getImageBlob($repository); + $this->assertFalse($imageBlob->isText()); } /** @@ -76,7 +83,9 @@ public function testIsText($repository) */ public function testIsBinary($repository) { - $blob = $this->getReadmeBlob($repository); - $this->assertFalse($blob->isBinary()); + $readmeBlob = $this->getReadmeBlob($repository); + $this->assertFalse($readmeBlob->isBinary()); + $imageBlob = $this->getImageBlob($repository); + $this->assertTrue($imageBlob->isBinary()); } } From e35108e0364c5c063612cf632151b4b8bebad6b1 Mon Sep 17 00:00:00 2001 From: Patrick-Beuks Date: Wed, 24 Apr 2024 11:04:22 +0200 Subject: [PATCH 2/2] Add description where the binary check and values come from --- src/Gitonomy/Git/Blob.php | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/Gitonomy/Git/Blob.php b/src/Gitonomy/Git/Blob.php index 3043b04..dfe885f 100644 --- a/src/Gitonomy/Git/Blob.php +++ b/src/Gitonomy/Git/Blob.php @@ -19,6 +19,9 @@ */ class Blob { + /** + * @var int Size that git uses to look for NULL byte: https://git.kernel.org/pub/scm/git/git.git/tree/xdiff-interface.c?h=v2.44.0#n193 + */ private const FIRST_FEW_BYTES = 8000; /** @@ -96,6 +99,9 @@ public function getMimetype() /** * Determines if file is binary. * + * Uses the same check that git uses to determine if a file is binary or not + * https://git.kernel.org/pub/scm/git/git.git/tree/xdiff-interface.c?h=v2.44.0#n193 + * * @return bool */ public function isBinary() @@ -106,6 +112,9 @@ public function isBinary() /** * Determines if file is text. * + * Uses the same check that git uses to determine if a file is binary or not + * https://git.kernel.org/pub/scm/git/git.git/tree/xdiff-interface.c?h=v2.44.0#n193 + * * @return bool */ public function isText()