Text helper for formatting text for output for security
Code taken from drupal filter module and anqh text class
Class declared in MODPATH/gleez/classes/gleez/text.php on line 13.
array $unitsnumber units and text equivalents
array(31) ( 1000000000 => string(7) "billion" 1000000 => string(7) "million" 1000 => string(8) "thousand" 100 => string(7) "hundred" 90 => string(6) "ninety" 80 => string(6) "eighty" 70 => string(7) "seventy" 60 => string(5) "sixty" 50 => string(5) "fifty" 40 => string(6) "fourty" 30 => string(6) "thirty" 20 => string(6) "twenty" 19 => string(8) "nineteen" 18 => string(8) "eighteen" 17 => string(9) "seventeen" 16 => string(7) "sixteen" 15 => string(7) "fifteen" 14 => string(8) "fourteen" 13 => string(8) "thirteen" 12 => string(6) "twelve" 11 => string(6) "eleven" 10 => string(3) "ten" 9 => string(4) "nine" 8 => string(5) "eight" 7 => string(5) "seven" 6 => string(3) "six" 5 => string(4) "five" 4 => string(4) "four" 3 => string(5) "three" 2 => string(3) "two" 1 => string(3) "one" )
$_link_countinteger 0
$_link_liststring(0) ""Converts text email addresses and anchors into links. Existing links will not be altered.
echo Text::auto_link($text);
This method is not foolproof since it uses regex to parse HTML.
string
$text
required - Text to auto linkunknown
$format
requiredunknown
$filter
requiredstringpublic static function autoLink($text, $format, $filter)
{
// Auto link emails first to prevent problems with "www.domain.com@example.com"
return Autolink::filter($text);
//return parent::auto_link($text);
}
Reverts auto_p
string
$str
required - String to be processedstringpublic static function auto_p_revert($str)
{
$br = preg_match('`<br>[\\n\\r]`', $str) ? '<br>' : '<br />';
return preg_replace('`'.$br.'([\\n\\r])`', '$1', $str);
}
Automatically applies "p" and "br" markup to text.
echo Text::autop($text);
string
$text
required - Subjectunknown
$format
requiredunknown
$filter
requiredstringpublic static function autop($text, $format, $filter)
{
// Standardize newlines
$text = str_replace(array("\r\n", "\r"), "\n", $text);
// Trim whitespace on each line
$text = preg_replace('~^[ \t]+~m', '', $text);
$text = preg_replace('~[ \t]+$~m', '', $text);
// All block level tags
$block = '(?:table|thead|tfoot|caption|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|select|form|blockquote|address|p|h[1-6]|hr)';
// Split at opening and closing PRE, SCRIPT, STYLE, OBJECT, IFRAME tags
// and comments. We don't apply any processing to the contents of these tags
// to avoid messing up code. We look for matched pairs and allow basic
// nesting. For example:
// "processed <pre> ignored <script> ignored </script> ignored </pre> processed"
$chunks = preg_split('@(<!--.*?-->|</?(?:pre|script|style|object|iframe|!--)[^>]*>)@i', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
// Note: PHP ensures the array consists of alternating delimiters and literals
// and begins and ends with a literal (inserting NULL as required).
$ignore = FALSE;
$ignoretag = '';
$output = '';
foreach ($chunks as $i => $chunk)
{
if ($i % 2)
{
$comment = (substr($chunk, 0, 4) == '<!--');
if ($comment)
{
// Nothing to do, this is a comment.
$output .= $chunk;
continue;
}
// Opening or closing tag?
$open = ($chunk[1] != '/');
list($tag) = preg_split('/[ >]/', substr($chunk, 2 - $open), 2);
if (!$ignore)
{
if ($open)
{
$ignore = TRUE;
$ignoretag = $tag;
}
}
// Only allow a matching tag to close it.
elseif (!$open && $ignoretag == $tag)
{
$ignore = FALSE;
$ignoretag = '';
}
}
elseif (!$ignore)
{
// just to make things a little easier, pad the end
$chunk = preg_replace('|\n*$|', '', $chunk) . "\n\n";
$chunk = preg_replace('|<br />\s*<br />|', "\n\n", $chunk);
$chunk = preg_replace('!(<' . $block . '[^>]*>)!', "\n$1", $chunk); // Space things out a little
$chunk = preg_replace('!(</' . $block . '>)!', "$1\n\n", $chunk); // Space things out a little
$chunk = preg_replace("/\n\n+/", "\n\n", $chunk); // take care of duplicates
$chunk = preg_replace('/^\n|\n\s*\n$/', '', $chunk);
// make paragraphs, including one at the end
$chunk = '<p>' . preg_replace('/\n\s*\n\n?(.)/', "</p>\n<p>$1", $chunk) . "</p>\n";
$chunk = preg_replace("|<p>(<li.+?)</p>|", "$1", $chunk); // problem with nested lists
$chunk = preg_replace('|<p><blockquote([^>]*)>|i', "<blockquote$1><p>", $chunk);
$chunk = str_replace('</blockquote></p>', '</p></blockquote>', $chunk);
// under certain strange conditions it could create a P of entirely whitespace
$chunk = preg_replace('|<p>\s*</p>\n?|', '', $chunk);
$chunk = preg_replace('!<p>\s*(</?' . $block . '[^>]*>)!', "$1", $chunk);
$chunk = preg_replace('!(</?' . $block . '[^>]*>)\s*</p>!', "$1", $chunk);
$chunk = preg_replace('|(?<!<br />)\s*\n|', "<br />\n", $chunk); // make line breaks
$chunk = preg_replace('!(</?' . $block . '[^>]*>)\s*<br />!', "$1", $chunk);
$chunk = preg_replace('!<br />(\s*</?(?:p|li|div|th|pre|td|ul|ol)>)!', '$1', $chunk);
$chunk = preg_replace('/&([^#])(?![A-Za-z0-9]{1,8};)/', '&$1', $chunk);
}
$output .= $chunk;
}
return $output;
}
Returns a string with all spaces converted to underscores (by default), accented characters converted to non-accented characters, and non word characters removed.
string
$string
required - The string you want to slugstring
$replacement
= string(1) "-" - Will replace keys in mapstringpublic static function convert_accented_characters($string, $replacement = '-')
{
$string = strtolower($string);
$foreign_characters = array(
'/ä|æ|ǽ/' => 'ae',
'/ö|œ/' => 'oe',
'/ü/' => 'ue',
'/Ä/' => 'Ae',
'/Ü/' => 'Ue',
'/Ö/' => 'Oe',
'/À|Ã|Â|Ã|Ä|Ã…|Ǻ|Ä€|Ä‚|Ä„|Ç/' => 'A',
'/à |á|â|ã|Ã¥|Ç»|Ä|ă|Ä…|ÇŽ|ª/' => 'a',
'/Ç|Ć|Ĉ|Ċ|Č/' => 'C',
'/ç|ć|ĉ|Ä‹|Ä/' => 'c',
'/Ã|ÄŽ|Ä/' => 'D',
'/ð|Ä|Ä‘/' => 'd',
'/È|É|Ê|Ë|Ē|Ĕ|Ė|Ę|Ě/' => 'E',
'/è|é|ê|ë|ē|ĕ|ė|ę|ě/' => 'e',
'/Äœ|Äž|Ä |Ä¢/' => 'G',
'/Ä|ÄŸ|Ä¡|Ä£/' => 'g',
'/Ĥ|Ħ/' => 'H',
'/ĥ|ħ/' => 'h',
'/ÃŒ|Ã|ÃŽ|Ã|Ĩ|Ī|Ĭ|Ç|Ä®|İ/' => 'I',
'/ì|Ã|î|ï|Ä©|Ä«|Ä|Ç|į|ı/' => 'i',
'/Ä´/' => 'J',
'/ĵ/' => 'j',
'/Ķ/' => 'K',
'/Ä·/' => 'k',
'/Ĺ|Ä»|Ľ|Ä¿|Å/' => 'L',
'/ĺ|ļ|ľ|ŀ|ł/' => 'l',
'/Ñ|Ń|Ņ|Ň/' => 'N',
'/ñ|ń|ņ|ň|ʼn/' => 'n',
'/Ã’|Ó|Ô|Õ|ÅŒ|ÅŽ|Ç‘|Å|Æ |Ø|Ǿ/' => 'O',
'/ò|ó|ô|õ|Å|Å|Ç’|Å‘|Æ¡|ø|Ç¿|º/' => 'o',
'/Ŕ|Ŗ|Ř/' => 'R',
'/Å•|Å—|Å™/' => 'r',
'/Ś|Ŝ|Ş|Š/' => 'S',
'/Å›|Å|ÅŸ|Å¡|Å¿/' => 's',
'/Ţ|Ť|Ŧ/' => 'T',
'/ţ|ť|ŧ/' => 't',
'/Ù|Ú|Û|Ũ|Ū|Ŭ|Ů|Ű|Ų|Ư|Ǔ|Ǖ|Ǘ|Ǚ|Ǜ/' => 'U',
'/ù|ú|û|Å©|Å«|Å|ů|ű|ų|ư|Ç”|Ç–|ǘ|Çš|Çœ/' => 'u',
'/Ã|Ÿ|Ŷ/' => 'Y',
'/ý|ÿ|ŷ/' => 'y',
'/Å´/' => 'W',
'/ŵ/' => 'w',
'/Ź|Ż|Ž/' => 'Z',
'/ź|ż|ž/' => 'z',
'/Æ|Ǽ/' => 'AE',
'/ß/' => 'ss',
'/IJ/' => 'IJ',
'/ij/' => 'ij',
'/Å’/' => 'OE',
'/Æ’/' => 'f'
);
if (is_array($replacement))
{
$map = $replacement;
$replacement = '_';
}
$quotedReplacement = preg_quote($replacement, '/');
$merge = array(
'/[^\s\p{Ll}\p{Lm}\p{Lo}\p{Lt}\p{Lu}\p{Nd}]/mu' => ' ',
'/\\s+/' => $replacement,
sprintf('/^[%s]+|[%s]+$/', $quotedReplacement, $quotedReplacement) => ''
);
$map = $foreign_characters + $merge;
return preg_replace(array_keys($map), array_values($map), $string);
}
Parses an HTML snippet and returns it as a DOM object.
This function loads the body part of a partial (X)HTML document and returns a full DOMDocument object that represents this document. You can use dom_serialize() to serialize this DOMDocument back to a XHTML snippet.
The partial (X)HTML snippet to load. Invalid mark-up will be corrected on import.
A DOMDocument that represents the loaded (X)HTML snippet.
string
$text
required - Text string to filter htmlstatic function dom_load($text)
{
ore warnings during HTML soup loading.
ocument = @DOMDocument::loadHTML('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
p://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html
s="http://www.w3.org/1999/xhtml"><head><meta http-equiv="Content-Type"
ent="text/html; charset=utf-8" /></head><body>' . $text . '</body></html>');
$dom_document;
}
Empty paragraph killer: because users are sometimes overzealous with the return key. Multiple returns will not break the site's style.
When entering more than one carriage return, only the first will be honored.
The text to be checked or processed.
$text
$text
requiredpublic static function emptyparagraph($text)
{
return preg_replace('#<p[^>]*>(\s| ?)*</p>#', '', $text);
}
Core function to run all enabled filters by the format id on given string
static function filters($text)
{
$config = Kohana::$config->load('inputfilter');
if(!array_key_exists($text->format, $config->get('formats') ) OR !isset($text->format))
{
//make sure a valid format id exists, if not set default format id
$text->format = (int) $config->get('default_format', 1);
}
$filters = $config->formats[$text->format]['filters'];
$filter_info = InputFilter::filters();
//sort filters by weight
$filters = Arr::array_sort($filters, 'weight');
// Give filters the chance to escape HTML-like data such as code or formulas.
foreach ($filters as $name => $filter)
{
if ($filter['status'] AND !empty($filter_info[$name]['prepare callback']))
{
$text->text = InputFilter::callback( $filter_info[$name]['prepare callback'],
$text->text, $text->format, $filter
);
}
}
// Perform filtering
foreach ($filters as $name => $filter)
{
if ($filter['status'] AND !empty($filter_info[$name]['process callback']))
{
$text->text = InputFilter::callback( $filter_info[$name]['process callback'],
$text->text, $text->format, $filter
);
}
}
return $text->text;
}
Converts fractions to their html equivalent (for example, 1/4 will become ¼)
string
$text
required - String to be processedstringpublic static function fractions($text)
{
// Converts fractions to their html equivalent (for example, 1/4 will become ¼).
$processed_text = $text;
$processed_text = self::_replace_fraction('1/4', '¼', $processed_text);
$processed_text = self::_replace_fraction('3/4', '¾', $processed_text);
$processed_text = self::_replace_fraction('1/2', '½', $processed_text);
$processed_text = self::_replace_fraction('1/3', '⅓', $processed_text);
$processed_text = self::_replace_fraction('2/3', '⅔', $processed_text);
$processed_text = self::_replace_fraction('1/8', '⅛', $processed_text);
$processed_text = self::_replace_fraction('3/8', '⅜', $processed_text);
$processed_text = self::_replace_fraction('5/8', '⅝', $processed_text);
$processed_text = self::_replace_fraction('7/8', '⅞', $processed_text);
return $processed_text;
}
Extract link URLs from HTML content.
string
$html
required - The HTMLboolean
$unique
= bool FALSE - Remove duplicate URLs?arraypublic static function get_urls($html, $unique = FALSE)
{
$regexp = "/<a[^>]+href\s*=\s*[\"|']([^\s\"']+)[\"|'][^>]*>[^<]*<\/a>/i";
preg_match_all($regexp, stripslashes($html), $matches);
$matches = $matches[1];
if ($unique)
{
$matches = array_values(array_unique($matches));
}
return $matches;
}
public static function highlight($str, $keywords)
{
// Trim, strip tags, and replace multiple spaces with single spaces
$keywords = preg_replace('/\s\s+/', ' ', strip_tags(trim($keywords)));
// Highlight partial matches
$var = '';
foreach (explode(' ', $keywords) as $keyword)
{
$replacement = '<span class="highlight-partial">'.$keyword.'</span>';
$var .= $replacement." ";
$str = str_ireplace($keyword, $replacement, $str);
}
// Highlight full matches
$str = str_ireplace(rtrim($var), '<span class="highlight">'.$keywords.'</span>', $str);
return $str;
}
HTML filter. Provides filtering of input into accepted HTML.
static function html($text, $format, $filter) {
$text = (string) InputFilter::factory($text, $format, $filter)->render();
if ($filter['settings']['html_nofollow'])
{
$html_dom = self::dom_load($text);
$links = $html_dom->getElementsByTagName('a');
foreach ($links as $link)
{
$link->setAttribute('rel', 'nofollow');
//Shortens long URLs to http://www.example.com/long/url...
if ($filter['settings']['url_length'])
{
$link->nodeValue = Text::limit_chars($link->nodeValue,
(int) $filter['settings']['url_length'], '....');
}
}
$text = self::dom_serialize($html_dom);
}//Message::debug($text);
return trim($text);
}
Scan input and make sure that all HTML tags are properly closed and nested.
string
$text
required - Text string to filter htmlpublic static function htmlcorrector($text)
{
return Text::dom_serialize(Text::dom_load($text));
}
Adds tag around the initial letter of each paragraph
string
$text
required - String to be processedstringpublic static function initialcaps($text)
{
// Adds <span class="initial"> tag around the initial letter of each paragraph.
// Only add after an opening <p> tag, ignoring any leading spaces. First letter must be a letter or number (no symbols).
// Works with contractions.
$processed_text = preg_replace('/(<p[^>]*>\s*)([A-Z0-9])([A-Z\'\s]{1})/i', '$1<span class="initial">$2</span>$3', $text);
return $processed_text;
}
Run all the enabled filters on a piece of text.
Note: Because filters can inject JavaScript or execute PHP code, security is vital here. When a user supplies a text format, you should validate it using filter_access() before accepting/using it. This is normally done in the validation stage of the Form API. You should for example never make a preview of content in a disallowed format.
The text to be filtered. The format id of the text to be filtered. If no format is assigned, the fallback format will be used. Optional: the language code of the text to be filtered, e.g. 'en' for English. This allows filters to be language aware so language specific text replacement can be implemented. Boolean whether to cache the filtered output in the {cache_filter} table. The caller may set this to FALSE when the output is already cached elsewhere to avoid duplicate cache lookups and storage.
$text
$text
required$format_id
$format_id
= bool FALSE$langcode
$langcode
= bool FALSE$cache
$cache
= bool FALSEpublic static function markup($text, $format_id = FALSE, $langcode = FALSE, $cache = FALSE)
{
g = Kohana::$config->load('inputfilter');
t_id = isset($format_id) ? (int) $format_id : (int) $config->get('default_format', 1);
$langcode = isset($langcode) ? $langcode : I18n::$lang;
// Check for a cached version of this piece of text.
$cache_id = $format_id . ':' . $langcode . ':' . hash('sha256', $text);
if ($cache AND $cached = Cache::instance('cache_filter')->get($cache_id))
{
return $cached;
}
// Convert all Windows and Mac newlines to a single newline, so filters
// only need to deal with one possibility.
$text = str_replace(array("\r\n", "\r"), "\n", $text);
t = str_replace('<!--break-->', '', $text);
$textObj = new ArrayObject(array(
t' => (string) $text,
mat' => (int) $format_id,
gcode' => (string) $langcode,
he' => (bool) $cache,
he_id' => (string) $cache_id
), ArrayObject::ARRAY_AS_PROPS);
Module::event('inputfilter', $textObj);
$text = (is_string($textObj->text)) ? $textObj->text : $text;
$text = self::filters($textObj); //run all filters
// Store in cache with a minimum expiration time of 1 day.
if ($cache)
{
Cache::instance('cache_filter')->set($cache_id, $text, null, time() + (60 * 60 * 24));
}
return $text;
}
public static function move_links_to_end($text, $auto_links = FALSE)
{
$search = '/<a [^>]*href="([^"]+)"[^>]*>(.*?)<\/a>/ie';
$replace = 'self::_links_list("\\1", "\\2")';
if($auto_links)
{
$text = Text::auto_link($text);
}
$text = preg_replace($search, $replace, $text);
// Add link list
if ( !empty(self::$_link_list) )
{
$text .= __("\n\nLinks:\n") . self::$_link_list;
}
//reset these vars to defaults
self::$_link_list = '';
self::$_link_count = 0;
return $text;
}
Replace runs of multiple whitespace characters with a single space.
string
$string
required - The string to normalizestringpublic static function normalize_spaces($string)
{
$normalized = $string;
if ( ! empty($normalized))
{
$normalized = preg_replace('/[\s\n\r\t]+/', ' ', $string);
$normalized = UTF8::trim($normalized);
}
return $normalized;
}
Adds tags around any ordinals (nd / st / th / rd)
string
$text
required - String to be processedstringpublic static function ordinals($text)
{
// Adds <span class="ordinal"> tags around any ordinals (nd / st / th / rd).
// One or more numbers in front ok, but ignore if ordinal is immediately followed by a number or letter.
$processed_text = preg_replace('/([0-9]+)(nd|st|th|rd)([^a-zA-Z0-9]+)/', '$1<span class="ordinal">$2</span>$3', $text);
return $processed_text;
}
Encode special characters in a plain-text string for display as HTML.
Also validates strings as UTF-8 to prevent cross site scripting attacks on Internet Explorer 6.
The text to be checked or processed.
An HTML safe version of $text, or an empty string if $text is not valid UTF-8.
$text
$text
requiredpublic static function plain($text)
{
return HTML::chars($text);
}
Standardize newlines
string
$value
required - The valuestringpublic static function standardize($value)
{
if (strpos($value, "\r") !== FALSE)
{
// Standardize newlines
$value = str_replace(array("\r\n", "\r"), "\n", $value);
}
return $value;
}
Alternates between two or more strings.
echo Text::alternate('one', 'two'); // "one"
echo Text::alternate('one', 'two'); // "two"
echo Text::alternate('one', 'two'); // "one"
Note that using multiple iterations of different strings may produce unexpected results.
stringpublic static function alternate()
{
static $i;
if (func_num_args() === 0)
{
$i = 0;
return '';
}
$args = func_get_args();
return $args[($i++ % count($args))];
}
Converts text email addresses and anchors into links. Existing links will not be altered.
echo Text::auto_link($text);
This method is not foolproof since it uses regex to parse HTML.
string
$text
required - Text to auto linkstringpublic static function auto_link($text)
{
// Auto link emails first to prevent problems with "www.domain.com@example.com"
return Text::auto_link_urls(Text::auto_link_emails($text));
}
Converts text email addresses into links. Existing links will not be altered.
echo Text::auto_link_emails($text);
This method is not foolproof since it uses regex to parse HTML.
string
$text
required - Text to auto linkstringpublic static function auto_link_emails($text)
{
// Find and replace all email addresses that are not part of an existing html mailto anchor
// Note: The "58;" negative lookbehind prevents matching of existing encoded html mailto anchors
// The html entity for a colon (:) is : or : or : etc.
return preg_replace_callback('~\b(?<!href="mailto:|58;)(?!\.)[-+_a-z0-9.]++(?<!\.)@(?![-.])[-a-z0-9.]+(?<!\.)\.[a-z]{2,6}\b(?!</a>)~i', 'Text::_auto_link_emails_callback', $text);
}
Converts text anchors into links. Existing links will not be altered.
echo Text::auto_link_urls($text);
This method is not foolproof since it uses regex to parse HTML.
string
$text
required - Text to auto linkstringpublic static function auto_link_urls($text)
{
// Find and replace all http/https/ftp/ftps links that are not part of an existing html anchor
$text = preg_replace_callback('~\b(?<!href="|">)(?:ht|f)tps?://[^<\s]+(?:/|\b)~i', 'Text::_auto_link_urls_callback1', $text);
// Find and replace all naked www.links.com (without http://)
return preg_replace_callback('~\b(?<!://|">)www(?:\.[a-z0-9][-a-z0-9]*+)+\.[a-z]{2,6}[^<\s]*\b~i', 'Text::_auto_link_urls_callback2', $text);
}
Automatically applies "p" and "br" markup to text. Basically nl2br on steroids.
echo Text::auto_p($text);
This method is not foolproof since it uses regex to parse HTML.
string
$str
required - Subjectboolean
$br
= bool TRUE - Convert single linebreaks to stringpublic static function auto_p($str, $br = TRUE)
{
// Trim whitespace
if (($str = trim($str)) === '')
return '';
// Standardize newlines
$str = str_replace(array("\r\n", "\r"), "\n", $str);
// Trim whitespace on each line
$str = preg_replace('~^[ \t]+~m', '', $str);
$str = preg_replace('~[ \t]+$~m', '', $str);
// The following regexes only need to be executed if the string contains html
if ($html_found = (strpos($str, '<') !== FALSE))
{
// Elements that should not be surrounded by p tags
$no_p = '(?:p|div|h[1-6r]|ul|ol|li|blockquote|d[dlt]|pre|t[dhr]|t(?:able|body|foot|head)|c(?:aption|olgroup)|form|s(?:elect|tyle)|a(?:ddress|rea)|ma(?:p|th))';
// Put at least two linebreaks before and after $no_p elements
$str = preg_replace('~^<'.$no_p.'[^>]*+>~im', "\n$0", $str);
$str = preg_replace('~</'.$no_p.'\s*+>$~im', "$0\n", $str);
}
// Do the <p> magic!
$str = '<p>'.trim($str).'</p>';
$str = preg_replace('~\n{2,}~', "</p>\n\n<p>", $str);
// The following regexes only need to be executed if the string contains html
if ($html_found !== FALSE)
{
// Remove p tags around $no_p elements
$str = preg_replace('~<p>(?=</?'.$no_p.'[^>]*+>)~i', '', $str);
$str = preg_replace('~(</?'.$no_p.'[^>]*+>)</p>~i', '$1', $str);
}
// Convert single linebreaks to <br />
if ($br === TRUE)
{
$str = preg_replace('~(?<!\n)\n(?!\n)~', "<br />\n", $str);
}
return $str;
}
Returns human readable sizes. Based on original functions written by Aidan Lister and Quentin Zervaas.
echo Text::bytes(filesize($file));
integer
$bytes
required - Size in bytesstring
$force_unit
= NULL - A definitive unitstring
$format
= NULL - The return string formatboolean
$si
= bool TRUE - Whether to use SI prefixes or IECstringpublic static function bytes($bytes, $force_unit = NULL, $format = NULL, $si = TRUE)
{
// Format string
$format = ($format === NULL) ? '%01.2f %s' : (string) $format;
// IEC prefixes (binary)
if ($si == FALSE OR strpos($force_unit, 'i') !== FALSE)
{
$units = array('B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB');
$mod = 1024;
}
// SI prefixes (decimal)
else
{
$units = array('B', 'kB', 'MB', 'GB', 'TB', 'PB');
$mod = 1000;
}
// Determine unit to use
if (($power = array_search( (string) $force_unit, $units)) === FALSE)
{
$power = ($bytes > 0) ? floor(log($bytes, $mod)) : 0;
}
return sprintf($format, $bytes / pow($mod, $power), $units[$power]);
}
Replaces the given words with a string.
// Displays "What the #####, man!"
echo Text::censor('What the frick, man!', array(
'frick' => '#####',
));
string
$str
required - Phrase to replace words inarray
$badwords
required - Words to replacestring
$replacement
= string(1) "#" - Replacement stringboolean
$replace_partial_words
= bool TRUE - Replace words across word boundries (space, period, etc)stringpublic static function censor($str, $badwords, $replacement = '#', $replace_partial_words = TRUE)
{
foreach ( (array) $badwords as $key => $badword)
{
$badwords[$key] = str_replace('\*', '\S*?', preg_quote( (string) $badword));
}
$regex = '('.implode('|', $badwords).')';
if ($replace_partial_words === FALSE)
{
// Just using \b isn't sufficient when we need to replace a badword that already contains word boundaries itself
$regex = '(?<=\b|\s|^)'.$regex.'(?=\b|\s|$)';
}
$regex = '!'.$regex.'!ui';
if (UTF8::strlen($replacement) == 1)
{
$regex .= 'e';
return preg_replace($regex, 'str_repeat($replacement, UTF8::strlen(\'$1\'))', $str);
}
return preg_replace($regex, $replacement, $str);
}
Limits a phrase to a given number of characters.
$text = Text::limit_chars($text);
string
$str
required - Phrase to limit characters ofinteger
$limit
= integer 100 - Number of characters to limit tostring
$end_char
= NULL - End character or entityboolean
$preserve_words
= bool FALSE - Enable or disable the preservation of words while limitingstringpublic static function limit_chars($str, $limit = 100, $end_char = NULL, $preserve_words = FALSE)
{
$end_char = ($end_char === NULL) ? '…' : $end_char;
$limit = (int) $limit;
if (trim($str) === '' OR UTF8::strlen($str) <= $limit)
return $str;
if ($limit <= 0)
return $end_char;
if ($preserve_words === FALSE)
return rtrim(UTF8::substr($str, 0, $limit)).$end_char;
// Don't preserve words. The limit is considered the top limit.
// No strings with a length longer than $limit should be returned.
if ( ! preg_match('/^.{0,'.$limit.'}\s/us', $str, $matches))
return $end_char;
return rtrim($matches[0]).((strlen($matches[0]) === strlen($str)) ? '' : $end_char);
}
Limits a phrase to a given number of words.
$text = Text::limit_words($text);
string
$str
required - Phrase to limit words ofinteger
$limit
= integer 100 - Number of words to limit tostring
$end_char
= NULL - End character or entitystringpublic static function limit_words($str, $limit = 100, $end_char = NULL)
{
$limit = (int) $limit;
$end_char = ($end_char === NULL) ? '…' : $end_char;
if (trim($str) === '')
return $str;
if ($limit <= 0)
return $end_char;
preg_match('/^\s*+(?:\S++\s*+){1,'.$limit.'}/u', $str, $matches);
// Only attach the end character if the matched string is shorter
// than the starting string.
return rtrim($matches[0]).((strlen($matches[0]) === strlen($str)) ? '' : $end_char);
}
Format a number to human-readable text.
// Display: one thousand and twenty-four
echo Text::number(1024);
// Display: five million, six hundred and thirty-two
echo Text::number(5000632);
integer
$number
required - Number to formatstringpublic static function number($number)
{
// The number must always be an integer
$number = (int) $number;
// Uncompiled text version
$text = array();
// Last matched unit within the loop
$last_unit = NULL;
// The last matched item within the loop
$last_item = '';
foreach (Text::$units as $unit => $name)
{
if ($number / $unit >= 1)
{
// $value = the number of times the number is divisble by unit
$number -= $unit * ($value = (int) floor($number / $unit));
// Temporary var for textifying the current unit
$item = '';
if ($unit < 100)
{
if ($last_unit < 100 AND $last_unit >= 20)
{
$last_item .= '-'.$name;
}
else
{
$item = $name;
}
}
else
{
$item = Text::number($value).' '.$name;
}
// In the situation that we need to make a composite number (i.e. twenty-three)
// then we need to modify the previous entry
if (empty($item))
{
array_pop($text);
$item = $last_item;
}
$last_item = $text[] = $item;
$last_unit = $unit;
}
}
if (count($text) > 1)
{
$and = array_pop($text);
}
$text = implode(', ', $text);
if (isset($and))
{
$text .= ' and '.$and;
}
return $text;
}
Generates a random string of a given type and length.
$str = Text::random(); // 8 character random string
The following types are supported:
You can also create a custom type by providing the "pool" of characters as the type.
string
$type
= NULL - A type of pool, or a string of characters to use as the poolinteger
$length
= integer 8 - Length of string to returnstringpublic static function random($type = NULL, $length = 8)
{
if ($type === NULL)
{
// Default is to generate an alphanumeric string
$type = 'alnum';
}
$utf8 = FALSE;
switch ($type)
{
case 'alnum':
$pool = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
break;
case 'alpha':
$pool = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
break;
case 'hexdec':
$pool = '0123456789abcdef';
break;
case 'numeric':
$pool = '0123456789';
break;
case 'nozero':
$pool = '123456789';
break;
case 'distinct':
$pool = '2345679ACDEFHJKLMNPRSTUVWXYZ';
break;
default:
$pool = (string) $type;
$utf8 = ! UTF8::is_ascii($pool);
break;
}
// Split the pool into an array of characters
$pool = ($utf8 === TRUE) ? UTF8::str_split($pool, 1) : str_split($pool, 1);
// Largest pool key
$max = count($pool) - 1;
$str = '';
for ($i = 0; $i < $length; $i++)
{
// Select a random character from the pool and add it to the string
$str .= $pool[mt_rand(0, $max)];
}
// Make sure alnum strings contain at least one letter and one digit
if ($type === 'alnum' AND $length > 1)
{
if (ctype_alpha($str))
{
// Add a random digit
$str[mt_rand(0, $length - 1)] = chr(mt_rand(48, 57));
}
elseif (ctype_digit($str))
{
// Add a random letter
$str[mt_rand(0, $length - 1)] = chr(mt_rand(65, 90));
}
}
return $str;
}
Reduces multiple slashes in a string to single slashes.
$str = Text::reduce_slashes('foo//bar/baz'); // "foo/bar/baz"
string
$str
required - String to reduce slashes ofstringpublic static function reduce_slashes($str)
{
return preg_replace('#(?<!:)//+#', '/', $str);
}
Finds the text that is similar between a set of words.
$match = Text::similar(array('fred', 'fran', 'free'); // "fr"
array
$words
required - Words to find similar text ofstringpublic static function similar(array $words)
{
// First word is the word to match against
$word = current($words);
for ($i = 0, $max = strlen($word); $i < $max; ++$i)
{
foreach ($words as $w)
{
// Once a difference is found, break out of the loops
if ( ! isset($w[$i]) OR $w[$i] !== $word[$i])
break 2;
}
}
// Return the similar text
return substr($word, 0, $i);
}
Uppercase words that are not separated by spaces, using a custom delimiter or the default.
$str = Text::ucfirst('content-type'); // returns "Content-Type"
string
$string
required - String to transformstring
$delimiter
= string(1) "-" - Delemiter to usestringpublic static function ucfirst($string, $delimiter = '-')
{
// Put the keys back the Case-Convention expected
return implode($delimiter, array_map('ucfirst', explode($delimiter, $string)));
}
Prevents widow words by inserting a non-breaking space between the last two words.
echo Text::widont($text);
string
$str
required - Text to remove widows fromstringpublic static function widont($str)
{
$str = rtrim($str);
$space = strrpos($str, ' ');
if ($space !== FALSE)
{
$str = substr($str, 0, $space).' '.substr($str, $space + 1);
}
return $str;
}
protected static function _auto_link_emails_callback($matches)
{
return HTML::mailto($matches[0]);
}
protected static function _auto_link_urls_callback1($matches)
{
return HTML::anchor($matches[0]);
}
protected static function _auto_link_urls_callback2($matches)
{
return HTML::anchor('http://'.$matches[0], $matches[0]);
}
Adds comments around the <!CDATA section in a dom element.
DOMDocument::loadHTML in filter_dom_load() makes CDATA sections from the contents of inline script and style tags. This can cause HTML 4 browsers to throw exceptions.
This function attempts to solve the problem by creating a DocumentFragment, commenting the CDATA tag.
The DOMDocument containing the $dom_element. The element potentially containing a CDATA node. String to use as a comment start marker to escape the CDATA declaration. String to use as a comment end marker to escape the CDATA declaration.
$dom_document
$dom_document
required$dom_element
$dom_element
required$comment_start
$comment_start
= string(2) "//"$comment_end
$comment_end
= string(0) ""private static function _escape_cdata_element($dom_document, $dom_element, $comment_start = '//', $comment_end = '') {
foreach ($dom_element->childNodes as $node)
{
if (get_class($node) == 'DOMCdataSection')
{
$embed_prefix = "\n<!--{$comment_start}--><![CDATA[{$comment_start} ><!--{$comment_end}\n";
$embed_suffix = "\n{$comment_start}--><!]]>{$comment_end}\n";
$fragment = $dom_document->createDocumentFragment();
$fragment->appendXML($embed_prefix . $node->data . $embed_suffix);
$dom_element->appendChild($fragment);
$dom_element->removeChild($node);
}
}
}
private static function _links_list( $link, $display )
{
if ( substr($link, 0, 7) == 'http://' OR substr($link, 0, 8) == 'https://' OR
substr($link, 0, 7) == 'mailto:' )
{
self::$_link_count++;
self::$_link_list .= "[" . self::$_link_count . "] $link\n";
$additional = ' <sup>[' . self::$_link_count . ']</sup>';
}
elseif ( substr($link, 0, 11) == 'javascript:' )
{
// Don't count the link; ignore it
$additional = '';
// what about href="#anchor" ?
}
else
{
self::$_link_count++;
self::$_link_list .= "[" . self::$_link_count . "] " . URL::site(null, TRUE);
if ( substr($link, 0, 1) != '/' )
{
self::$_link_list .= '/';
}
self::$_link_list .= "$link\n";
$additional = ' <sup>[' . self::$_link_count . ']</sup>';
}
return $display . $additional;
}
private static function _replace_fraction($fraction, $html_fraction, $text)
{
// fraction can't be preceded or followed by a number or letter.
$search = '/([^0-9A-Z]+)' . preg_quote($fraction, '/') . '([^0-9A-Z]+)/i';
$replacement = '$1' . $html_fraction . '$2';
return preg_replace($search, $replacement, $text);
}
Converts a DOM object back to an HTML snippet.
The function serializes the body part of a DOMDocument back to an XHTML snippet.
The resulting XHTML snippet will be properly formatted to be compatible with HTML user agents.
A DOMDocument object to serialize, only the tags below the first
node will be converted.A valid (X)HTML snippet, as a string.
$dom_document
$dom_document
requiredprivate static function dom_serialize($dom_document)
{
$body_node = $dom_document->getElementsByTagName('body')->item(0);
$body_content = '';
h ($body_node->getElementsByTagName('script') as $node)
:_escape_cdata_element($dom_document, $node);
h ($body_node->getElementsByTagName('style') as $node)
:_escape_cdata_element($dom_document, $node, '/*', '*/');
foreach ($body_node->childNodes as $child_node)
{
$body_content .= $dom_document->saveXML($child_node);
}
e::debug($body_content);
preg_replace('|<([^> ]*)/>|i', '<$1 />', $body_content);
}