propresenter-php/tests/RtfExtractorTest.php
Thorsten Bus 22ba4aff7d refactor: make repo Composer-compatible by moving php/ to root and ref/ to doc/reference_samples
- Move src/, tests/, bin/, generated/, proto/, composer.json, composer.lock, phpunit.xml from php/ to repo root
- Move ref/ to doc/reference_samples/ for better organization
- Remove vendor/ from git tracking (now properly gitignored)
- Update all test file paths (dirname adjustments and ref/ -> doc/reference_samples/)
- Update all documentation paths (AGENTS.md, doc/*.md)
- Remove php.bak/ directory
- All 252 tests pass
2026-03-30 13:26:29 +02:00

190 lines
7.2 KiB
PHP

<?php
namespace ProPresenter\Parser\Tests;
use PHPUnit\Framework\TestCase;
use PHPUnit\Framework\Attributes\Test;
use ProPresenter\Parser\RtfExtractor;
class RtfExtractorTest extends TestCase
{
// ─── Real ProPresenter RTF from Test.pro ───
#[Test]
public function extractsMultilineTextFromRealProPresenterRtf(): void
{
$rtf = '{\rtf1\ansi\ansicpg1252\cocoartf2761' . "\n"
. '\cocoatextscaling0\cocoaplatform0{\fonttbl\f0\fnil\fcharset0 HelveticaNeue;}' . "\n"
. '{\colortbl;\red255\green255\blue255;\red255\green255\blue255;}' . "\n"
. '{\*\expandedcolortbl;;\csgray\c100000;}' . "\n"
. '\deftab1680' . "\n"
. '\pard\pardeftab1680\pardirnatural\qc\partightenfactor0' . "\n"
. "\n"
. '\f0\fs84 \cf2 \CocoaLigature0 Vers1.1\\' . "\n"
. 'Vers1.2}';
$result = RtfExtractor::toPlainText($rtf);
$this->assertSame("Vers1.1\nVers1.2", $result);
}
#[Test]
public function extractsSingleLineText(): void
{
$rtf = '{\rtf1\ansi\ansicpg1252\cocoartf2761' . "\n"
. '\cocoatextscaling0\cocoaplatform0{\fonttbl\f0\fnil\fcharset0 HelveticaNeue;}' . "\n"
. '{\colortbl;\red255\green255\blue255;\red255\green255\blue255;}' . "\n"
. '{\*\expandedcolortbl;;\csgray\c100000;}' . "\n"
. '\deftab1680' . "\n"
. '\pard\pardeftab1680\pardirnatural\qc\partightenfactor0' . "\n"
. "\n"
. '\f0\fs84 \cf2 \CocoaLigature0 Chorus1\\' . "\n"
. 'Chorus2}';
$result = RtfExtractor::toPlainText($rtf);
$this->assertSame("Chorus1\nChorus2", $result);
}
// ─── German characters ───
#[Test]
public function extractsGermanCharactersFromRtf(): void
{
// Real pattern from "An einem Kreuz hängt Gottes Sohn.pro"
// Nowdoc preserves backslashes literally - essential for RTF \' hex escapes
$rtf = <<<'RTF'
{\rtf1\ansi\ansicpg1252\cocoartf2761
\cocoatextscaling0\cocoaplatform0{\fonttbl\f0\fnil\fcharset0 AvenirNext-Regular;}
{\colortbl;\red255\green255\blue255;\red255\green255\blue255;\red30\green30\blue30;}
{\*\expandedcolortbl;;\csgenericrgb\c100000\c100000\c100000;\csgenericrgb\c11818\c11807\c11716;}
\pard\slleading-40\pardirnatural\qc\partightenfactor0
\f0\fs120 \cf2 \outl0\strokewidth-40 \strokec3 ist alles, was uns qu\'e4lt, vorbei,\
denn er, der starb, macht alles neu.}
RTF;
$result = RtfExtractor::toPlainText($rtf);
$this->assertSame("ist alles, was uns quält, vorbei,\ndenn er, der starb, macht alles neu.", $result);
}
#[Test]
public function extractsAllGermanSpecialCharacters(): void
{
$rtf = <<<'RTF'
{\rtf1\ansi\ansicpg1252\cocoartf2761
\cocoatextscaling0\cocoaplatform0{\fonttbl\f0\fnil\fcharset0 HelveticaNeue;}
{\colortbl;\red255\green255\blue255;\red255\green255\blue255;}
{\*\expandedcolortbl;;\csgray\c100000;}
\pard\pardirnatural\qc\partightenfactor0
\f0\fs84 \cf2 \CocoaLigature0 Gr\'fc\'dfe \'f6ffnen \'e4ndern \'e9l\'e8ve}
RTF;
$result = RtfExtractor::toPlainText($rtf);
$this->assertSame('Grüße öffnen ändern élève', $result);
}
// ─── Edge cases ───
#[Test]
public function emptyStringReturnsEmpty(): void
{
$this->assertSame('', RtfExtractor::toPlainText(''));
}
#[Test]
public function nullishRtfReturnsEmpty(): void
{
$this->assertSame('', RtfExtractor::toPlainText(' '));
}
#[Test]
public function rtfWithOnlyFormattingReturnsEmpty(): void
{
// RTF with formatting commands but no actual text content
$rtf = '{\rtf1\ansi\ansicpg1252\cocoartf2761' . "\n"
. '\cocoatextscaling0\cocoaplatform0{\fonttbl\f0\fnil\fcharset0 HelveticaNeue;}' . "\n"
. '{\colortbl;\red255\green255\blue255;\red255\green255\blue255;}' . "\n"
. '{\*\expandedcolortbl;;\csgray\c100000;}' . "\n"
. '\deftab1680' . "\n"
. '\pard\pardeftab1680\pardirnatural\qc\partightenfactor0' . "\n"
. "\n"
. '\f0\fs84 \cf2 \CocoaLigature0 }';
$result = RtfExtractor::toPlainText($rtf);
$this->assertSame('', $result);
}
// ─── Translation text box (different font size, same structure) ───
#[Test]
public function extractsTranslationText(): void
{
// Real translation RTF from Test.pro
$rtf = '{\rtf1\ansi\ansicpg1252\cocoartf2761' . "\n"
. '\cocoatextscaling0\cocoaplatform0{\fonttbl\f0\fnil\fcharset0 HelveticaNeue;}' . "\n"
. '{\colortbl;\red255\green255\blue255;\red255\green255\blue255;}' . "\n"
. '{\*\expandedcolortbl;;\cssrgb\c100000\c100000\c100000;}' . "\n"
. '\deftab1680' . "\n"
. '\pard\pardeftab1680\pardirnatural\qc\partightenfactor0' . "\n"
. "\n"
. '\f0\fs80 \cf2 \CocoaLigature0 Translated 1\\' . "\n"
. 'Translated 2}';
$result = RtfExtractor::toPlainText($rtf);
$this->assertSame("Translated 1\nTranslated 2", $result);
}
// ─── Unicode escapes ───
#[Test]
public function handlesUnicodeEscapes(): void
{
// \uN? format where ? is ANSI fallback character
$rtf = '{\rtf1\ansi\ansicpg1252\cocoartf2761' . "\n"
. '\cocoatextscaling0\cocoaplatform0{\fonttbl\f0\fnil\fcharset0 HelveticaNeue;}' . "\n"
. '{\colortbl;\red255\green255\blue255;\red255\green255\blue255;}' . "\n"
. '{\*\expandedcolortbl;;\csgray\c100000;}' . "\n"
. '\pard\pardirnatural\qc\partightenfactor0' . "\n"
. "\n"
. '\f0\fs84 \cf2 \CocoaLigature0 Praise \u9899? Him}';
$result = RtfExtractor::toPlainText($rtf);
// \u9899 = Unicode codepoint 9899 (⚫) - the ? is ANSI fallback, dropped
$this->assertSame('Praise ⚫ Him', $result);
}
// ─── Stroke/outline formatting (real pattern) ───
#[Test]
public function extractsTextWithStrokeFormatting(): void
{
// Real pattern from all-songs: extra \outl0\strokewidth-40 \strokec3
$rtf = '{\rtf1\ansi\ansicpg1252\cocoartf2761' . "\n"
. '\cocoatextscaling0\cocoaplatform0{\fonttbl\f0\fnil\fcharset0 AvenirNext-Regular;}' . "\n"
. '{\colortbl;\red255\green255\blue255;\red255\green255\blue255;\red30\green30\blue30;}' . "\n"
. '{\*\expandedcolortbl;;\csgenericrgb\c100000\c100000\c100000;\csgenericrgb\c11818\c11807\c11716;}' . "\n"
. '\pard\slleading-40\pardirnatural\qc\partightenfactor0' . "\n"
. "\n"
. '\f0\fs120 \cf2 \outl0\strokewidth-40 \strokec3 Hello World}';
$result = RtfExtractor::toPlainText($rtf);
$this->assertSame('Hello World', $result);
}
// ─── Non-RTF input passes through ───
#[Test]
public function nonRtfStringReturnedAsIs(): void
{
$this->assertSame('Just plain text', RtfExtractor::toPlainText('Just plain text'));
}
}