* @license https://www.gnu.org/licenses/agpl-3.0.en.html/ GNU Affero General Public License v3.0 */ namespace App\Helpers; use App\Models\WhitelistedImageUrl; class Bbcode { /** * @var array< * string, * array{ * openBbcode: string, * closeBbcode: string, * openHtml: string, * closeHtml: string, * block: boolean * } * > $parsers. */ private array $parsers = [ 'h1' => [ 'openBbcode' => '/^\[h1\]/i', 'closeBbcode' => '[/h1]', 'openHtml' => '

', 'closeHtml' => '

', 'block' => true, ], 'h2' => [ 'openBbcode' => '/^\[h2\]/i', 'closeBbcode' => '[/h2]', 'openHtml' => '

', 'closeHtml' => '

', 'block' => true, ], 'h3' => [ 'openBbcode' => '/^\[h3\]/i', 'closeBbcode' => '[/h3]', 'openHtml' => '

', 'closeHtml' => '

', 'block' => true, ], 'h4' => [ 'openBbcode' => '/^\[h4\]/i', 'closeBbcode' => '[/h4]', 'openHtml' => '

', 'closeHtml' => '

', 'block' => true, ], 'h5' => [ 'openBbcode' => '/^\[h5\]/i', 'closeBbcode' => '[/h5]', 'openHtml' => '
', 'closeHtml' => '
', 'block' => true, ], 'h6' => [ 'openBbcode' => '/^\[h6\]/i', 'closeBbcode' => '[/h6]', 'openHtml' => '
', 'closeHtml' => '
', 'block' => true, ], 'bold' => [ 'openBbcode' => '/^\[b\]/i', 'closeBbcode' => '[/b]', 'openHtml' => '', 'closeHtml' => '', 'block' => false, ], 'italic' => [ 'openBbcode' => '/^\[i\]/i', 'closeBbcode' => '[/i]', 'openHtml' => '', 'closeHtml' => '', 'block' => false, ], 'underline' => [ 'openBbcode' => '/^\[u\]/i', 'closeBbcode' => '[/u]', 'openHtml' => '', 'closeHtml' => '', 'block' => false, ], 'linethrough' => [ 'openBbcode' => '/^\[s\]/i', 'closeBbcode' => '[/s]', 'openHtml' => '', 'closeHtml' => '', 'block' => false, ], 'size' => [ 'openBbcode' => '/^\[size=(\d+)\]/i', 'closeBbcode' => '[/size]', 'openHtml' => '', 'closeHtml' => '', 'block' => false, ], 'font' => [ 'openBbcode' => '/^\[font=([a-z0-9 ]+)\]/i', 'closeBbcode' => '[/font]', 'openHtml' => '', 'closeHtml' => '', 'block' => false, ], 'color' => [ 'openBbcode' => '/^\[color=(\#[a-f0-9]{3,4}|\#[a-f0-9]{6}|\#[a-f0-9]{8}|[a-z]+)\]/i', 'closeBbcode' => '[/color]', 'openHtml' => '', 'closeHtml' => '', 'block' => false, ], 'center' => [ 'openBbcode' => '/^\[center\]/i', 'closeBbcode' => '[/center]', 'openHtml' => '
', 'closeHtml' => '
', 'block' => true, ], 'left' => [ 'openBbcode' => '/^\[left\]/i', 'closeBbcode' => '[/left]', 'openHtml' => '
', 'closeHtml' => '
', 'block' => true, ], 'right' => [ 'openBbcode' => '/^\[right\]/i', 'closeBbcode' => '[/right]', 'openHtml' => '
', 'closeHtml' => '
', 'block' => true, ], 'quote' => [ 'openBbcode' => '/^\[quote\]/i', 'closeBbcode' => '[/quote]', 'openHtml' => '
', 'closeHtml' => '
', 'block' => true, ], 'namedquote' => [ 'openBbcode' => '/^\[quote=(.*?)\]/i', 'closeBbcode' => '[/quote]', 'openHtml' => '
Quoting $1:

', 'closeHtml' => '

', 'block' => true, ], 'orderedlistnumerical' => [ 'openBbcode' => '/^\[list=1\]/i', 'closeBbcode' => '[/list]', 'openHtml' => '
    ', 'closeHtml' => '
', 'block' => true, ], 'orderedlistalpha' => [ 'openBbcode' => '/^\[list=a\]/i', 'closeBbcode' => '[/list]', 'openHtml' => '
    ', 'closeHtml' => '
', 'block' => true, ], 'unorderedlist' => [ 'openBbcode' => '/^\[list\]/i', 'closeBbcode' => '[/list]', 'openHtml' => '', 'block' => true, ], 'code' => [ 'openBbcode' => '/^\[code\]/i', 'closeBbcode' => '[/code]', 'openHtml' => '
',
            'closeHtml'   => '
', 'block' => true, ], 'pre' => [ 'openBbcode' => '/^\[pre\]/i', 'closeBbcode' => '[/pre]', 'openHtml' => '', 'closeHtml' => '', 'block' => false, ], 'alert' => [ 'openBbcode' => '/^\[alert\]/i', 'closeBbcode' => '[/alert]', 'openHtml' => '
', 'closeHtml' => '
', 'block' => true, ], 'note' => [ 'openBbcode' => '/^\[note\]/i', 'closeBbcode' => '[/note]', 'openHtml' => '
', 'closeHtml' => '
', 'block' => true, ], 'sub' => [ 'openBbcode' => '/^\[sub\]/i', 'closeBbcode' => '[/sub]', 'openHtml' => '', 'closeHtml' => '', 'block' => false, ], 'sup' => [ 'openBbcode' => '/^\[sup\]/i', 'closeBbcode' => '[/sup]', 'openHtml' => '', 'closeHtml' => '', 'block' => false, ], 'small' => [ 'openBbcode' => '/^\[small\]/i', 'closeBbcode' => '[/small]', 'openHtml' => '', 'closeHtml' => '', 'block' => false, ], 'table' => [ 'openBbcode' => '/^\[table\]/i', 'closeBbcode' => '[/table]', 'openHtml' => '', 'closeHtml' => '
', 'block' => true, ], 'table-row' => [ 'openBbcode' => '/^\[tr\]/i', 'closeBbcode' => '[/tr]', 'openHtml' => '', 'closeHtml' => '', 'block' => true, ], 'table-header' => [ 'openBbcode' => '/^\[th\]/i', 'closeBbcode' => '[/th]', 'openHtml' => '', 'closeHtml' => '', 'block' => true, ], 'table-data' => [ 'openBbcode' => '/^\[td\]/i', 'closeBbcode' => '[/td]', 'openHtml' => '', 'closeHtml' => '', 'block' => true, ], 'spoiler' => [ 'openBbcode' => '/^\[spoiler\]/i', 'closeBbcode' => '[/spoiler]', 'openHtml' => '
Spoiler
', 'closeHtml' => '
', 'block' => false, ], 'named-spoiler' => [ 'openBbcode' => '/^\[spoiler=(.*?)\]/i', 'closeBbcode' => '[/spoiler]', 'openHtml' => '
$1
', 'closeHtml' => '
', 'block' => false, ], ]; /** * Parses the BBCode string. */ public function parse(?string $source, bool $replaceLineBreaks = true): string { $source ??= ''; $source = htmlspecialchars($source, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML5, 'UTF-8'); // Replace all void elements since they don't have closing tags $source = str_replace('[*]', '
  • ', (string) $source); $source = str_replace('[hr]', '
    ', $source); $source = preg_replace_callback( '/\[url](.*?)\[\/url]/i', fn ($matches) => ''.$this->sanitizeUrl($matches[1]).'', $source ); $source = preg_replace_callback( '/\[url=(.*?)](.*?)\[\/url]/i', fn ($matches) => ''.$matches[2].'', $source ?? '' ); $source = preg_replace_callback( '/\[img](.*?)\[\/img]/i', fn ($matches) => '', $source ?? '' ); $source = preg_replace_callback( '/\[img width=(\d+)](.*?)\[\/img]/i', fn ($matches) => '', $source ?? '' ); $source = preg_replace_callback( '/\[img=(\d+)(?:x\d+)?](.*?)\[\/img]/i', fn ($matches) => '', $source ?? '' ); // YouTube video elements need to be replaced like this because the content inside the two tags // has to be moved into an HTML attribute $source = preg_replace_callback( '/\[youtube]([a-z0-9_-]{11})\[\/youtube]/i', static fn ($matches) => '', $source ?? '' ); $source = preg_replace_callback( '/\[video]([a-z0-9_-]{11})\[\/video]/i', static fn ($matches) => '', $source ?? '' ); $source = preg_replace_callback( '/\[video="youtube"]([a-z0-9_-]{11})\[\/video]/i', static fn ($matches) => '', $source ?? '' ); // Common comparison syntax used in other torrent management systems is quite specific, // so it must be done here instead $source = preg_replace_callback( '/\[comparison=(.*?)]\s*(.*?)\s*\[\/comparison]/is', function ($matches) { $comparates = preg_split('/\s*,\s*/', $matches[1]); $urls = preg_split('/\s*(?:,|\s)\s*/', $matches[2]); if ($comparates === false || $urls === false) { return 'Broken comparison'; } $validatedUrls = collect($urls)->map(fn ($url) => $this->sanitizeUrl($url, isImage: true)); $chunkedUrls = $validatedUrls->chunk(\count($comparates)); $html = view('partials.comparison', ['comparates' => $comparates, 'urls' => $chunkedUrls])->render(); $html = preg_replace('/\s+/', ' ', $html); if (!\is_string($html)) { return 'Broken html'; } return $html; }, $source ?? '' ); // Stack of unclosed elements $openedElements = []; // Character index $index = 0; // Don't loop more than the length of the source while ($index < \strlen((string) $source)) { // Get the next occurrence of `[` $index = strpos((string) $source, '[', $index); // Break if there are no more occurrences of `[` if ($index === false) { break; } // Break if `[` is the last character of the source if ($index + 1 >= \strlen((string) $source)) { break; } // Is the potential tag opening or closing? if ($source[$index + 1] === '/' && !empty($openedElements)) { $name = array_pop($openedElements); $el = $this->parsers[$name]; $tag = substr((string) $source, $index, \strlen((string) $el['closeBbcode'])); // Replace bbcode tag with html tag if found tag matches expected tag, // otherwise return the expected element's to the stack if (strcasecmp($tag, (string) $el['closeBbcode']) === 0) { $source = substr_replace((string) $source, (string) $el['closeHtml'], $index, \strlen((string) $el['closeBbcode'])); if ($replaceLineBreaks === true && $el['block'] === true) { $this->handleBlockElementSpacing($source, $index, $index, $index + \strlen((string) $el['closeHtml']) - 1); } } else { $openedElements[] = $name; } } else { $remainingText = substr((string) $source, $index); // Find match between found bbcode tag and valid elements foreach ($this->parsers as $name => $el) { // The opening bbcode tag uses the regex `^` character to make // sure only the beginning of $remainingText is matched if (preg_match($el['openBbcode'], $remainingText, $matches) === 1) { $replacement = (string) preg_replace($el['openBbcode'], (string) $el['openHtml'], $matches[0]); $source = substr_replace((string) $source, $replacement, $index, \strlen($matches[0])); if ($replaceLineBreaks === true && $el['block'] === true) { $this->handleBlockElementSpacing($source, $index, $index, $index + \strlen($replacement) - 1); } $openedElements[] = $name; break; } } } $index++; } while (!empty($openedElements)) { $source .= $this->parsers[array_pop($openedElements)]['closeHtml']; } if ($replaceLineBreaks) { // Replace line breaks $source = str_replace(["\r\n", "\n"], '
    ', (string) $source); } return $source; } /** * Remove line breaks immediately before and after tags for block elements. * * Useful so that you can write bbcode like the following without worrying about spacing: * * ``` * [list] * [*]item 1 * [*]item 2 * [/list] * ``` * * @param string $source Reference to the source text content currently being converted from bbcode to html. * @param int $index Reference to the current index of `$source` that the parser must keep track of. * @param int $tagStartIndex The index of the first character of the tag being parsed inside `$source`. Should be the `[` character. * @param int $tagStopIndex The index of the last character of the tag being parsed inside `$source`. Should be the `]` character. */ private function handleBlockElementSpacing(string &$source, int &$index, int $tagStartIndex, int $tagStopIndex): void { // Remove two line breaks (if they exist) instead of one, since a // line break after a block element is positioned on the line after // the block element, while a line break after a non-block element // is positioned at the end of the same line of the non-block element. // I.e. two line breaks after a block element provides the same amount // of vertical space as one line break after a non-block element. for ($i = 0; $i < 2; $i++) { $bbcodeStopIndex = \strlen($source) - 1; // Does there exist 2 characters after the tag and are they \r\n? // Otherwise, does there exist 1 character after the tag and is it \n? // In either case, remove those characters. if ($tagStopIndex + 2 <= $bbcodeStopIndex && substr_compare($source, "\r\n", $tagStopIndex + 1, 2) === 0) { $source = substr_replace($source, '', $tagStopIndex + 1, 2); } elseif ($tagStopIndex + 1 <= $bbcodeStopIndex && $source[$tagStopIndex + 1] === "\n") { $source = substr_replace($source, '', $tagStopIndex + 1, 1); } } // Does there exist 2 characters before the tag and are they \r\n? // Otherwise, does there exist 1 character before the tag and is it \n? // In either case, remove those characters and adjust the current index appropriately. if ($tagStartIndex >= 2 && substr_compare($source, "\r\n", $tagStartIndex - 2, 2) === 0) { $source = substr_replace($source, '', $tagStartIndex - 2, 2); $index -= 2; } elseif ($tagStartIndex >= 1 && $source[$tagStartIndex - 1] === "\n") { $source = substr_replace($source, '', $tagStartIndex - 1, 1); $index -= 1; } } private function sanitizeUrl(string $url, ?bool $isImage = null): string { // Do NOT add `javascript`, `data` or `vbscript` here // or else you will allow an XSS vulnerability! $protocolWhitelist = [ 'http', 'https', 'irc', 'ftp', 'sftp', 'magnet', ]; if (str_starts_with($url, '/')) { $url = config('app.url').'/'.ltrim($url, '/'); } elseif (!\in_array(parse_url($url, PHP_URL_SCHEME), $protocolWhitelist)) { $url = 'https://'.$url; } if (false === filter_var($url, FILTER_VALIDATE_URL)) { return 'Broken link'; } if ($isImage) { $whitelistedImageUrls = cache()->rememberForever( 'whitelisted-image-urls', fn () => WhitelistedImageUrl::query()->pluck('pattern'), ); $isWhitelisted = $whitelistedImageUrls->contains(function (string $pattern) use ($url) { $pattern = str_replace(['\*\*', '\*'], ['.*', '[^\/.]*'], preg_quote($pattern, '/')); return preg_match('/^'.$pattern.'$/i', $url); }); if (!$isWhitelisted) { $url = 'https://wsrv.nl/?n=-1&ll&url='.urlencode($url); } } return $url; } }