Modules

abstract Gleez_Text
extends Kohana_Text

Text helper for formatting text for output for security

Code taken from drupal filter module and anqh text class

package
Gleez
category
Text
author
Sandeep Sangamreddi - Gleez
copyright
© 2012 Gleez Technologies
license
http://gleezcms.org/license

Class declared in MODPATH/gleez/classes/gleez/text.php on line 13.

Properties

public static array $units

number units and text equivalents

array(31) (
    1000000000 => string(7) "billion"
    1000000 => string(7) "million"
    1000 => string(8) "thousand"
    100 => string(7) "hundred"
    90 => string(6) "ninety"
    80 => string(6) "eighty"
    70 => string(7) "seventy"
    60 => string(5) "sixty"
    50 => string(5) "fifty"
    40 => string(6) "fourty"
    30 => string(6) "thirty"
    20 => string(6) "twenty"
    19 => string(8) "nineteen"
    18 => string(8) "eighteen"
    17 => string(9) "seventeen"
    16 => string(7) "sixteen"
    15 => string(7) "fifteen"
    14 => string(8) "fourteen"
    13 => string(8) "thirteen"
    12 => string(6) "twelve"
    11 => string(6) "eleven"
    10 => string(3) "ten"
    9 => string(4) "nine"
    8 => string(5) "eight"
    7 => string(5) "seven"
    6 => string(3) "six"
    5 => string(4) "five"
    4 => string(4) "four"
    3 => string(5) "three"
    2 => string(3) "two"
    1 => string(3) "one"
)
integer 0
string(0) ""

Methods

Converts text email addresses and anchors into links. Existing links will not be altered.

echo Text::auto_link($text);

This method is not foolproof since it uses regex to parse HTML.

Parameters

  • string $text required - Text to auto link
  • unknown $format required
  • unknown $filter required

Tags

Return Values

  • string

Source Code

public static function autoLink($text, $format, $filter)
{
	// Auto link emails first to prevent problems with "www.domain.com@example.com"
	return Autolink::filter($text);
	//return parent::auto_link($text);
}

public static auto_p_revert( string $str ) (defined in Gleez_Text)

Reverts auto_p

Parameters

  • string $str required - String to be processed

Tags

  • Static -

Return Values

  • string

Source Code

public static function auto_p_revert($str)
{
    $br = preg_match('`<br>[\\n\\r]`', $str) ? '<br>' : '<br />';
    return preg_replace('`'.$br.'([\\n\\r])`', '$1', $str);
}

public static autop( string $text , $format , $filter ) (defined in Gleez_Text)

Automatically applies "p" and "br" markup to text.

echo Text::autop($text);

Parameters

  • string $text required - Subject
  • unknown $format required
  • unknown $filter required

Tags

  • See - http://api.drupal.org/api/drupal/modules--filter--filter.module/function/_filter_autop

Return Values

  • string

Source Code

public static function autop($text, $format, $filter)
{
	// Standardize newlines
	$text = str_replace(array("\r\n", "\r"), "\n", $text);

	// Trim whitespace on each line
	$text = preg_replace('~^[ \t]+~m', '', $text);
	$text = preg_replace('~[ \t]+$~m', '', $text);

	// All block level tags
	$block = 	'(?:table|thead|tfoot|caption|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|select|form|blockquote|address|p|h[1-6]|hr)';

	// Split at opening and closing PRE, SCRIPT, STYLE, OBJECT, IFRAME tags
	// and comments. We don't apply any processing to the contents of these tags
	// to avoid messing up code. We look for matched pairs and allow basic
	// nesting. For example:
	// "processed <pre> ignored <script> ignored </script> ignored </pre> processed"
	$chunks = preg_split('@(<!--.*?-->|</?(?:pre|script|style|object|iframe|!--)[^>]*>)@i', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
	// Note: PHP ensures the array consists of alternating delimiters and literals
	// and begins and ends with a literal (inserting NULL as required).
	$ignore = FALSE;
	$ignoretag = '';
	$output = '';

	foreach ($chunks as $i => $chunk)
	{
		if ($i % 2)
		{
			$comment = (substr($chunk, 0, 4) == '<!--');
			if ($comment)
			{
				// Nothing to do, this is a comment.
				$output .= $chunk;
				continue;
			}
			// Opening or closing tag?
			$open = ($chunk[1] != '/');
			list($tag) = preg_split('/[ >]/', substr($chunk, 2 - $open), 2);
			if (!$ignore)
			{
				if ($open)
				{
					$ignore = TRUE;
					$ignoretag = $tag;
				}
			}
			// Only allow a matching tag to close it.
			elseif (!$open && $ignoretag == $tag)
			{
				$ignore = FALSE;
				$ignoretag = '';
			}
		}
		elseif (!$ignore)
		{
			// just to make things a little easier, pad the end
			$chunk = preg_replace('|\n*$|', '', $chunk) . "\n\n"; 
			$chunk = preg_replace('|<br />\s*<br />|', "\n\n", $chunk);
			$chunk = preg_replace('!(<' . $block . '[^>]*>)!', "\n$1", $chunk); // Space things out a little
			$chunk = preg_replace('!(</' . $block . '>)!', "$1\n\n", $chunk); // Space things out a little
			$chunk = preg_replace("/\n\n+/", "\n\n", $chunk); // take care of duplicates
			$chunk = preg_replace('/^\n|\n\s*\n$/', '', $chunk);
			// make paragraphs, including one at the end
			$chunk = '<p>' . preg_replace('/\n\s*\n\n?(.)/', "</p>\n<p>$1", $chunk) . "</p>\n"; 
			$chunk = preg_replace("|<p>(<li.+?)</p>|", "$1", $chunk); // problem with nested lists
			$chunk = preg_replace('|<p><blockquote([^>]*)>|i', "<blockquote$1><p>", $chunk);
			$chunk = str_replace('</blockquote></p>', '</p></blockquote>', $chunk);
			// under certain strange conditions it could create a P of entirely whitespace
			$chunk = preg_replace('|<p>\s*</p>\n?|', '', $chunk); 
			$chunk = preg_replace('!<p>\s*(</?' . $block . '[^>]*>)!', "$1", $chunk);
			$chunk = preg_replace('!(</?' . $block . '[^>]*>)\s*</p>!', "$1", $chunk);
			$chunk = preg_replace('|(?<!<br />)\s*\n|', "<br />\n", $chunk); // make line breaks
			$chunk = preg_replace('!(</?' . $block . '[^>]*>)\s*<br />!', "$1", $chunk);
			$chunk = preg_replace('!<br />(\s*</?(?:p|li|div|th|pre|td|ul|ol)>)!', '$1', $chunk);
			$chunk = preg_replace('/&([^#])(?![A-Za-z0-9]{1,8};)/', '&amp;$1', $chunk);
		}
		$output .= $chunk;
	}

	return $output;
}

public static convert_accented_characters( string $string [, string $replacement = string(1) "-" ] ) (defined in Gleez_Text)

Returns a string with all spaces converted to underscores (by default), accented characters converted to non-accented characters, and non word characters removed.

Parameters

  • string $string required - The string you want to slug
  • string $replacement = string(1) "-" - Will replace keys in map

Return Values

  • string

Source Code

public static function convert_accented_characters($string, $replacement = '-')
{
    $string = strtolower($string);
    
    $foreign_characters = array(
        '/ä|æ|ǽ/' => 'ae',
        '/ö|œ/' => 'oe',
        '/ü/' => 'ue',
        '/Ä/' => 'Ae',
        '/Ü/' => 'Ue',
        '/Ö/' => 'Oe',
        '/À|Á|Â|Ã|Ä|Å|Ǻ|Ā|Ă|Ą|Ǎ/' => 'A',
        '/à |á|â|ã|å|ǻ|ā|ă|ą|ǎ|ª/' => 'a',
        '/Ç|Ć|Ĉ|Ċ|Č/' => 'C',
        '/ç|ć|ĉ|ċ|č/' => 'c',
        '/Ð|Ď|Đ/' => 'D',
        '/ð|ď|đ/' => 'd',
        '/È|É|Ê|Ë|Ē|Ĕ|Ė|Ę|Ě/' => 'E',
        '/è|é|ê|ë|ē|ĕ|ė|ę|ě/' => 'e',
        '/Äœ|Äž|Ä |Ä¢/' => 'G',
        '/ĝ|ğ|ġ|ģ/' => 'g',
        '/Ĥ|Ħ/' => 'H',
        '/ĥ|ħ/' => 'h',
        '/Ì|Í|Î|Ï|Ĩ|Ī|Ĭ|Ǐ|Į|İ/' => 'I',
        '/ì|í|î|ï|ĩ|ī|ĭ|ǐ|į|ı/' => 'i',
        '/Ä´/' => 'J',
        '/ĵ/' => 'j',
        '/Ķ/' => 'K',
        '/Ä·/' => 'k',
        '/Ĺ|Ļ|Ľ|Ŀ|Ł/' => 'L',
        '/ĺ|ļ|ľ|ŀ|ł/' => 'l',
        '/Ñ|Ń|Ņ|Ň/' => 'N',
        '/ñ|ń|ņ|ň|ʼn/' => 'n',
        '/Ã’|Ó|Ô|Õ|ÅŒ|ÅŽ|Ç‘|Ő|Æ |Ø|Ǿ/' => 'O',
        '/ò|ó|ô|õ|ō|ŏ|ǒ|ő|ơ|ø|ǿ|º/' => 'o',
        '/Ŕ|Ŗ|Ř/' => 'R',
        '/Å•|Å—|Å™/' => 'r',
        '/Ś|Ŝ|Ş|Š/' => 'S',
        '/ś|ŝ|ş|š|ſ/' => 's',
        '/Ţ|Ť|Ŧ/' => 'T',
        '/ţ|ť|ŧ/' => 't',
        '/Ù|Ú|Û|Ũ|Ū|Ŭ|Ů|Ű|Ų|Ư|Ǔ|Ǖ|Ǘ|Ǚ|Ǜ/' => 'U',
        '/ù|ú|û|ũ|ū|ŭ|ů|ű|ų|ư|ǔ|ǖ|ǘ|ǚ|ǜ/' => 'u',
        '/Ý|Ÿ|Ŷ/' => 'Y',
        '/ý|ÿ|ŷ/' => 'y',
        '/Å´/' => 'W',
        '/ŵ/' => 'w',
        '/Ź|Ż|Ž/' => 'Z',
        '/ź|ż|ž/' => 'z',
        '/Æ|Ǽ/' => 'AE',
        '/ß/' => 'ss',
        '/IJ/' => 'IJ',
        '/ij/' => 'ij',
        '/Å’/' => 'OE',
        '/Æ’/' => 'f'
    );

    if (is_array($replacement))
    {
        $map         = $replacement;
        $replacement = '_';
    }

    $quotedReplacement = preg_quote($replacement, '/');
    
    $merge = array(
        '/[^\s\p{Ll}\p{Lm}\p{Lo}\p{Lt}\p{Lu}\p{Nd}]/mu' => ' ',
        '/\\s+/' => $replacement,
        sprintf('/^[%s]+|[%s]+$/', $quotedReplacement, $quotedReplacement) => ''
    );

    $map = $foreign_characters + $merge;
    return preg_replace(array_keys($map), array_values($map), $string);
}

public static dom_load( string $text ) (defined in Gleez_Text)

Parses an HTML snippet and returns it as a DOM object.

This function loads the body part of a partial (X)HTML document and returns a full DOMDocument object that represents this document. You can use dom_serialize() to serialize this DOMDocument back to a XHTML snippet.

The partial (X)HTML snippet to load. Invalid mark-up will be corrected on import.

A DOMDocument that represents the loaded (X)HTML snippet.

Parameters

  • string $text required - Text string to filter html

Return Values

Source Code

static function dom_load($text)
{
ore warnings during HTML soup loading.
ocument = @DOMDocument::loadHTML('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
p://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html
s="http://www.w3.org/1999/xhtml"><head><meta http-equiv="Content-Type"
ent="text/html; charset=utf-8" /></head><body>' . $text . '</body></html>');
 $dom_document;
}

public static emptyparagraph( $text $text ) (defined in Gleez_Text)

Empty paragraph killer: because users are sometimes overzealous with the return key. Multiple returns will not break the site's style.

When entering more than one carriage return, only the first will be honored.

The text to be checked or processed.

Parameters

  • $text $text required

Source Code

public static function emptyparagraph($text)
{
	return preg_replace('#<p[^>]*>(\s|&nbsp;?)*</p>#', '', $text); 
}

public static filters( ) (defined in Gleez_Text)

Core function to run all enabled filters by the format id on given string

Source Code

static function filters($text)
{
	$config = Kohana::$config->load('inputfilter');
	if(!array_key_exists($text->format, $config->get('formats') ) OR !isset($text->format))
	{
		//make sure a valid format id exists, if not set default format id
		$text->format = (int) $config->get('default_format', 1);
	}


	$filters = $config->formats[$text->format]['filters'];
	$filter_info = InputFilter::filters();

	//sort filters by weight
	$filters = Arr::array_sort($filters, 'weight');

	// Give filters the chance to escape HTML-like data such as code or formulas.
	foreach ($filters as $name => $filter)
	{
		if ($filter['status'] AND !empty($filter_info[$name]['prepare callback']))
		{
			$text->text = InputFilter::callback( $filter_info[$name]['prepare callback'],
								$text->text, $text->format, $filter
							);
		}
	}

	// Perform filtering
	foreach ($filters as $name => $filter)
	{
		if ($filter['status'] AND !empty($filter_info[$name]['process callback']))
		{
			$text->text = InputFilter::callback( $filter_info[$name]['process callback'],
								$text->text, $text->format, $filter
							);
		}
	}
	
	return $text->text;
}

public static fractions( string $text ) (defined in Gleez_Text)

Converts fractions to their html equivalent (for example, 1/4 will become ¼)

Parameters

  • string $text required - String to be processed

Tags

  • Static -
  • See - http://drupal.org/project/more_filters

Return Values

  • string

Source Code

public static function fractions($text)
{
	// Converts fractions to their html equivalent (for example, 1/4 will become &frac14;).
	$processed_text = $text;
	$processed_text = self::_replace_fraction('1/4', '&frac14;', $processed_text);
	$processed_text = self::_replace_fraction('3/4', '&frac34;', $processed_text);
	$processed_text = self::_replace_fraction('1/2', '&frac12;', $processed_text);
	$processed_text = self::_replace_fraction('1/3', '&#8531;', $processed_text);
	$processed_text = self::_replace_fraction('2/3', '&#8532;', $processed_text);
	$processed_text = self::_replace_fraction('1/8', '&#8539;', $processed_text);
	$processed_text = self::_replace_fraction('3/8', '&#8540;', $processed_text);
	$processed_text = self::_replace_fraction('5/8', '&#8541;', $processed_text);
	$processed_text = self::_replace_fraction('7/8', '&#8542;', $processed_text);
	
	return $processed_text;
}

public static get_urls( string $html [, boolean $unique = bool FALSE ] ) (defined in Gleez_Text)

Extract link URLs from HTML content.

Parameters

  • string $html required - The HTML
  • boolean $unique = bool FALSE - Remove duplicate URLs?

Return Values

  • array

Source Code

public static function get_urls($html, $unique = FALSE)
{
	$regexp = "/<a[^>]+href\s*=\s*[\"|']([^\s\"']+)[\"|'][^>]*>[^<]*<\/a>/i";
	preg_match_all($regexp, stripslashes($html), $matches);
	$matches = $matches[1];

	if ($unique)
	{
		$matches = array_values(array_unique($matches));
	}

	return $matches;
}

public static highlight( ) (defined in Gleez_Text)

Source Code

public static function highlight($str, $keywords)
{
	// Trim, strip tags, and replace multiple spaces with single spaces
	$keywords = preg_replace('/\s\s+/', ' ', strip_tags(trim($keywords)));

	// Highlight partial matches
	$var = '';

	foreach (explode(' ', $keywords) as $keyword)
	{
		$replacement = '<span class="highlight-partial">'.$keyword.'</span>';
		$var .= $replacement." ";

		$str = str_ireplace($keyword, $replacement, $str);
	}

	// Highlight full matches
	$str = str_ireplace(rtrim($var), '<span class="highlight">'.$keywords.'</span>', $str);

	return $str;
}

public static html( ) (defined in Gleez_Text)

HTML filter. Provides filtering of input into accepted HTML.

Source Code

static function html($text, $format, $filter) {
	
	$text = (string) InputFilter::factory($text, $format, $filter)->render();

	if ($filter['settings']['html_nofollow'])
	{
		$html_dom = self::dom_load($text);
		$links = $html_dom->getElementsByTagName('a');
		foreach ($links as $link)
		{
			$link->setAttribute('rel', 'nofollow');
			
			//Shortens long URLs to http://www.example.com/long/url...
			if ($filter['settings']['url_length'])
			{
				$link->nodeValue = Text::limit_chars($link->nodeValue,
								     (int) $filter['settings']['url_length'], '....');
			}
		}
		$text = self::dom_serialize($html_dom);
	}//Message::debug($text);

	return trim($text);
}

public static htmlcorrector( string $text ) (defined in Gleez_Text)

Scan input and make sure that all HTML tags are properly closed and nested.

Parameters

  • string $text required - Text string to filter html

Source Code

public static function htmlcorrector($text)
{
        return Text::dom_serialize(Text::dom_load($text));
}

public static initialcaps( string $text ) (defined in Gleez_Text)

Adds tag around the initial letter of each paragraph

Parameters

  • string $text required - String to be processed

Tags

  • Static -
  • See - http://drupal.org/project/more_filters

Return Values

  • string

Source Code

public static function initialcaps($text)
{
	// Adds <span class="initial"> tag around the initial letter of each paragraph.
	// Only add after an opening <p> tag, ignoring any leading spaces. First letter must be a letter or number (no symbols).
	// Works with contractions.
	$processed_text = preg_replace('/(<p[^>]*>\s*)([A-Z0-9])([A-Z\'\s]{1})/i', '$1<span class="initial">$2</span>$3', $text);
	return $processed_text;
}

public static markup( $text $text [, $format_id $format_id = bool FALSE , $langcode $langcode = bool FALSE , $cache $cache = bool FALSE ] ) (defined in Gleez_Text)

Run all the enabled filters on a piece of text.

Note: Because filters can inject JavaScript or execute PHP code, security is vital here. When a user supplies a text format, you should validate it using filter_access() before accepting/using it. This is normally done in the validation stage of the Form API. You should for example never make a preview of content in a disallowed format.

The text to be filtered. The format id of the text to be filtered. If no format is assigned, the fallback format will be used. Optional: the language code of the text to be filtered, e.g. 'en' for English. This allows filters to be language aware so language specific text replacement can be implemented. Boolean whether to cache the filtered output in the {cache_filter} table. The caller may set this to FALSE when the output is already cached elsewhere to avoid duplicate cache lookups and storage.

Parameters

  • $text $text required
  • $format_id $format_id = bool FALSE
  • $langcode $langcode = bool FALSE
  • $cache $cache = bool FALSE

Source Code

public static function markup($text, $format_id = FALSE, $langcode = FALSE, $cache = FALSE)
{
g = Kohana::$config->load('inputfilter');
t_id = isset($format_id) ? (int) $format_id : (int) $config->get('default_format', 1);
	$langcode  = isset($langcode) ? $langcode : I18n::$lang;

        // Check for a cached version of this piece of text.
        $cache_id = $format_id . ':' . $langcode . ':' . hash('sha256', $text);
        if ($cache AND $cached = Cache::instance('cache_filter')->get($cache_id))
        {
                return $cached;
        }
       
        // Convert all Windows and Mac newlines to a single newline, so filters
        // only need to deal with one possibility.
        $text = str_replace(array("\r\n", "\r"), "\n", $text);
t = str_replace('<!--break-->', '', $text);

        $textObj = new ArrayObject(array(
t' 	   => (string) $text,
mat'   => (int)    $format_id,
gcode' => (string) $langcode,
he'    => (bool)   $cache,
he_id' => (string) $cache_id
        ), ArrayObject::ARRAY_AS_PROPS);
        
        Module::event('inputfilter', $textObj);

        $text = (is_string($textObj->text)) ? $textObj->text : $text;

        $text = self::filters($textObj); //run all filters

        // Store in cache with a minimum expiration time of 1 day.
        if ($cache)
        {
                Cache::instance('cache_filter')->set($cache_id, $text, null, time() + (60 * 60 * 24));
        }
        
        return $text;
}

Source Code

public static function move_links_to_end($text, $auto_links = FALSE)
{
	$search  = '/<a [^>]*href="([^"]+)"[^>]*>(.*?)<\/a>/ie';
	$replace = 'self::_links_list("\\1", "\\2")';

	if($auto_links)
	{
		$text = Text::auto_link($text);
	}

	$text = preg_replace($search, $replace, $text);

	// Add link list
	if ( !empty(self::$_link_list) )
	{
		$text .= __("\n\nLinks:\n") . self::$_link_list;
	}

	//reset these vars to defaults
	self::$_link_list  = '';
	self::$_link_count = 0;

	return $text;
}

public static normalize_spaces( string $string ) (defined in Gleez_Text)

Replace runs of multiple whitespace characters with a single space.

Parameters

  • string $string required - The string to normalize

Return Values

  • string

Source Code

public static function normalize_spaces($string)
{
	$normalized = $string;
	if ( ! empty($normalized))
	{
		$normalized = preg_replace('/[\s\n\r\t]+/', ' ', $string);
		$normalized = UTF8::trim($normalized);
	}
	return $normalized;
}

public static ordinals( string $text ) (defined in Gleez_Text)

Adds tags around any ordinals (nd / st / th / rd)

Parameters

  • string $text required - String to be processed

Tags

  • Static -
  • See - http://drupal.org/project/more_filters

Return Values

  • string

Source Code

public static function ordinals($text)
{
	// Adds <span class="ordinal"> tags around any ordinals (nd / st / th / rd).
	// One or more numbers in front ok, but ignore if ordinal is immediately followed by a number or letter.
	$processed_text = preg_replace('/([0-9]+)(nd|st|th|rd)([^a-zA-Z0-9]+)/', '$1<span class="ordinal">$2</span>$3', $text);
	return $processed_text;
}

public static plain( $text $text ) (defined in Gleez_Text)

Encode special characters in a plain-text string for display as HTML.

Also validates strings as UTF-8 to prevent cross site scripting attacks on Internet Explorer 6.

The text to be checked or processed.

An HTML safe version of $text, or an empty string if $text is not valid UTF-8.

Parameters

  • $text $text required

Return Values

Source Code

public static function plain($text)
{
	return HTML::chars($text);
}

public static standardize( string $value ) (defined in Gleez_Text)

Standardize newlines

Parameters

  • string $value required - The value

Return Values

  • string

Source Code

public static function standardize($value)
{
	if (strpos($value, "\r") !== FALSE)
	{
		// Standardize newlines
		$value = str_replace(array("\r\n", "\r"), "\n", $value);
	}

	return $value;
}

public static alternate( ) (defined in Kohana_Text)

Alternates between two or more strings.

echo Text::alternate('one', 'two'); // "one"
echo Text::alternate('one', 'two'); // "two"
echo Text::alternate('one', 'two'); // "one"

Note that using multiple iterations of different strings may produce unexpected results.

Return Values

  • string

Source Code

public static function alternate()
{
	static $i;

	if (func_num_args() === 0)
	{
		$i = 0;
		return '';
	}

	$args = func_get_args();
	return $args[($i++ % count($args))];
}

Converts text email addresses and anchors into links. Existing links will not be altered.

echo Text::auto_link($text);

This method is not foolproof since it uses regex to parse HTML.

Parameters

  • string $text required - Text to auto link

Tags

Return Values

  • string

Source Code

public static function auto_link($text)
{
	// Auto link emails first to prevent problems with "www.domain.com@example.com"
	return Text::auto_link_urls(Text::auto_link_emails($text));
}

Converts text email addresses into links. Existing links will not be altered.

echo Text::auto_link_emails($text);

This method is not foolproof since it uses regex to parse HTML.

Parameters

  • string $text required - Text to auto link

Tags

Return Values

  • string

Source Code

public static function auto_link_emails($text)
{
	// Find and replace all email addresses that are not part of an existing html mailto anchor
	// Note: The "58;" negative lookbehind prevents matching of existing encoded html mailto anchors
	//       The html entity for a colon (:) is &#58; or &#058; or &#0058; etc.
	return preg_replace_callback('~\b(?<!href="mailto:|58;)(?!\.)[-+_a-z0-9.]++(?<!\.)@(?![-.])[-a-z0-9.]+(?<!\.)\.[a-z]{2,6}\b(?!</a>)~i', 'Text::_auto_link_emails_callback', $text);
}

Converts text anchors into links. Existing links will not be altered.

echo Text::auto_link_urls($text);

This method is not foolproof since it uses regex to parse HTML.

Parameters

  • string $text required - Text to auto link

Tags

Return Values

  • string

Source Code

public static function auto_link_urls($text)
{
	// Find and replace all http/https/ftp/ftps links that are not part of an existing html anchor
	$text = preg_replace_callback('~\b(?<!href="|">)(?:ht|f)tps?://[^<\s]+(?:/|\b)~i', 'Text::_auto_link_urls_callback1', $text);

	// Find and replace all naked www.links.com (without http://)
	return preg_replace_callback('~\b(?<!://|">)www(?:\.[a-z0-9][-a-z0-9]*+)+\.[a-z]{2,6}[^<\s]*\b~i', 'Text::_auto_link_urls_callback2', $text);
}

public static auto_p( string $str [, boolean $br = bool TRUE ] ) (defined in Kohana_Text)

Automatically applies "p" and "br" markup to text. Basically nl2br on steroids.

echo Text::auto_p($text);

This method is not foolproof since it uses regex to parse HTML.

Parameters

  • string $str required - Subject
  • boolean $br = bool TRUE - Convert single linebreaks to

Return Values

  • string

Source Code

public static function auto_p($str, $br = TRUE)
{
	// Trim whitespace
	if (($str = trim($str)) === '')
		return '';

	// Standardize newlines
	$str = str_replace(array("\r\n", "\r"), "\n", $str);

	// Trim whitespace on each line
	$str = preg_replace('~^[ \t]+~m', '', $str);
	$str = preg_replace('~[ \t]+$~m', '', $str);

	// The following regexes only need to be executed if the string contains html
	if ($html_found = (strpos($str, '<') !== FALSE))
	{
		// Elements that should not be surrounded by p tags
		$no_p = '(?:p|div|h[1-6r]|ul|ol|li|blockquote|d[dlt]|pre|t[dhr]|t(?:able|body|foot|head)|c(?:aption|olgroup)|form|s(?:elect|tyle)|a(?:ddress|rea)|ma(?:p|th))';

		// Put at least two linebreaks before and after $no_p elements
		$str = preg_replace('~^<'.$no_p.'[^>]*+>~im', "\n$0", $str);
		$str = preg_replace('~</'.$no_p.'\s*+>$~im', "$0\n", $str);
	}

	// Do the <p> magic!
	$str = '<p>'.trim($str).'</p>';
	$str = preg_replace('~\n{2,}~', "</p>\n\n<p>", $str);

	// The following regexes only need to be executed if the string contains html
	if ($html_found !== FALSE)
	{
		// Remove p tags around $no_p elements
		$str = preg_replace('~<p>(?=</?'.$no_p.'[^>]*+>)~i', '', $str);
		$str = preg_replace('~(</?'.$no_p.'[^>]*+>)</p>~i', '$1', $str);
	}

	// Convert single linebreaks to <br />
	if ($br === TRUE)
	{
		$str = preg_replace('~(?<!\n)\n(?!\n)~', "<br />\n", $str);
	}

	return $str;
}

public static bytes( integer $bytes [, string $force_unit = NULL , string $format = NULL , boolean $si = bool TRUE ] ) (defined in Kohana_Text)

Returns human readable sizes. Based on original functions written by Aidan Lister and Quentin Zervaas.

echo Text::bytes(filesize($file));

Parameters

  • integer $bytes required - Size in bytes
  • string $force_unit = NULL - A definitive unit
  • string $format = NULL - The return string format
  • boolean $si = bool TRUE - Whether to use SI prefixes or IEC

Return Values

  • string

Source Code

public static function bytes($bytes, $force_unit = NULL, $format = NULL, $si = TRUE)
{
	// Format string
	$format = ($format === NULL) ? '%01.2f %s' : (string) $format;

	// IEC prefixes (binary)
	if ($si == FALSE OR strpos($force_unit, 'i') !== FALSE)
	{
		$units = array('B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB');
		$mod   = 1024;
	}
	// SI prefixes (decimal)
	else
	{
		$units = array('B', 'kB', 'MB', 'GB', 'TB', 'PB');
		$mod   = 1000;
	}

	// Determine unit to use
	if (($power = array_search( (string) $force_unit, $units)) === FALSE)
	{
		$power = ($bytes > 0) ? floor(log($bytes, $mod)) : 0;
	}

	return sprintf($format, $bytes / pow($mod, $power), $units[$power]);
}

public static censor( string $str , array $badwords [, string $replacement = string(1) "#" , boolean $replace_partial_words = bool TRUE ] ) (defined in Kohana_Text)

Replaces the given words with a string.

// Displays "What the #####, man!"
echo Text::censor('What the frick, man!', array(
    'frick' => '#####',
));

Parameters

  • string $str required - Phrase to replace words in
  • array $badwords required - Words to replace
  • string $replacement = string(1) "#" - Replacement string
  • boolean $replace_partial_words = bool TRUE - Replace words across word boundries (space, period, etc)

Tags

Return Values

  • string

Source Code

public static function censor($str, $badwords, $replacement = '#', $replace_partial_words = TRUE)
{
	foreach ( (array) $badwords as $key => $badword)
	{
		$badwords[$key] = str_replace('\*', '\S*?', preg_quote( (string) $badword));
	}

	$regex = '('.implode('|', $badwords).')';

	if ($replace_partial_words === FALSE)
	{
		// Just using \b isn't sufficient when we need to replace a badword that already contains word boundaries itself
		$regex = '(?<=\b|\s|^)'.$regex.'(?=\b|\s|$)';
	}

	$regex = '!'.$regex.'!ui';

	if (UTF8::strlen($replacement) == 1)
	{
		$regex .= 'e';
		return preg_replace($regex, 'str_repeat($replacement, UTF8::strlen(\'$1\'))', $str);
	}

	return preg_replace($regex, $replacement, $str);
}

public static limit_chars( string $str [, integer $limit = integer 100 , string $end_char = NULL , boolean $preserve_words = bool FALSE ] ) (defined in Kohana_Text)

Limits a phrase to a given number of characters.

$text = Text::limit_chars($text);

Parameters

  • string $str required - Phrase to limit characters of
  • integer $limit = integer 100 - Number of characters to limit to
  • string $end_char = NULL - End character or entity
  • boolean $preserve_words = bool FALSE - Enable or disable the preservation of words while limiting

Tags

Return Values

  • string

Source Code

public static function limit_chars($str, $limit = 100, $end_char = NULL, $preserve_words = FALSE)
{
	$end_char = ($end_char === NULL) ? '…' : $end_char;

	$limit = (int) $limit;

	if (trim($str) === '' OR UTF8::strlen($str) <= $limit)
		return $str;

	if ($limit <= 0)
		return $end_char;

	if ($preserve_words === FALSE)
		return rtrim(UTF8::substr($str, 0, $limit)).$end_char;

	// Don't preserve words. The limit is considered the top limit.
	// No strings with a length longer than $limit should be returned.
	if ( ! preg_match('/^.{0,'.$limit.'}\s/us', $str, $matches))
		return $end_char;

	return rtrim($matches[0]).((strlen($matches[0]) === strlen($str)) ? '' : $end_char);
}

public static limit_words( string $str [, integer $limit = integer 100 , string $end_char = NULL ] ) (defined in Kohana_Text)

Limits a phrase to a given number of words.

$text = Text::limit_words($text);

Parameters

  • string $str required - Phrase to limit words of
  • integer $limit = integer 100 - Number of words to limit to
  • string $end_char = NULL - End character or entity

Return Values

  • string

Source Code

public static function limit_words($str, $limit = 100, $end_char = NULL)
{
	$limit = (int) $limit;
	$end_char = ($end_char === NULL) ? '…' : $end_char;

	if (trim($str) === '')
		return $str;

	if ($limit <= 0)
		return $end_char;

	preg_match('/^\s*+(?:\S++\s*+){1,'.$limit.'}/u', $str, $matches);

	// Only attach the end character if the matched string is shorter
	// than the starting string.
	return rtrim($matches[0]).((strlen($matches[0]) === strlen($str)) ? '' : $end_char);
}

public static number( integer $number ) (defined in Kohana_Text)

Format a number to human-readable text.

// Display: one thousand and twenty-four
echo Text::number(1024);

// Display: five million, six hundred and thirty-two
echo Text::number(5000632);

Parameters

  • integer $number required - Number to format

Tags

  • Since - 3.0.8

Return Values

  • string

Source Code

public static function number($number)
{
	// The number must always be an integer
	$number = (int) $number;

	// Uncompiled text version
	$text = array();

	// Last matched unit within the loop
	$last_unit = NULL;

	// The last matched item within the loop
	$last_item = '';

	foreach (Text::$units as $unit => $name)
	{
		if ($number / $unit >= 1)
		{
			// $value = the number of times the number is divisble by unit
			$number -= $unit * ($value = (int) floor($number / $unit));
			// Temporary var for textifying the current unit
			$item = '';

			if ($unit < 100)
			{
				if ($last_unit < 100 AND $last_unit >= 20)
				{
					$last_item .= '-'.$name;
				}
				else
				{
					$item = $name;
				}
			}
			else
			{
				$item = Text::number($value).' '.$name;
			}

			// In the situation that we need to make a composite number (i.e. twenty-three)
			// then we need to modify the previous entry
			if (empty($item))
			{
				array_pop($text);

				$item = $last_item;
			}

			$last_item = $text[] = $item;
			$last_unit = $unit;
		}
	}

	if (count($text) > 1)
	{
		$and = array_pop($text);
	}

	$text = implode(', ', $text);

	if (isset($and))
	{
		$text .= ' and '.$and;
	}

	return $text;
}

public static random( [ string $type = NULL , integer $length = integer 8 ] ) (defined in Kohana_Text)

Generates a random string of a given type and length.

$str = Text::random(); // 8 character random string

The following types are supported:

alnum
Upper and lower case a-z, 0-9 (default)
alpha
Upper and lower case a-z
hexdec
Hexadecimal characters a-f, 0-9
distinct
Uppercase characters and numbers that cannot be confused

You can also create a custom type by providing the "pool" of characters as the type.

Parameters

  • string $type = NULL - A type of pool, or a string of characters to use as the pool
  • integer $length = integer 8 - Length of string to return

Tags

Return Values

  • string

Source Code

public static function random($type = NULL, $length = 8)
{
	if ($type === NULL)
	{
		// Default is to generate an alphanumeric string
		$type = 'alnum';
	}

	$utf8 = FALSE;

	switch ($type)
	{
		case 'alnum':
			$pool = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
		break;
		case 'alpha':
			$pool = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
		break;
		case 'hexdec':
			$pool = '0123456789abcdef';
		break;
		case 'numeric':
			$pool = '0123456789';
		break;
		case 'nozero':
			$pool = '123456789';
		break;
		case 'distinct':
			$pool = '2345679ACDEFHJKLMNPRSTUVWXYZ';
		break;
		default:
			$pool = (string) $type;
			$utf8 = ! UTF8::is_ascii($pool);
		break;
	}

	// Split the pool into an array of characters
	$pool = ($utf8 === TRUE) ? UTF8::str_split($pool, 1) : str_split($pool, 1);

	// Largest pool key
	$max = count($pool) - 1;

	$str = '';
	for ($i = 0; $i < $length; $i++)
	{
		// Select a random character from the pool and add it to the string
		$str .= $pool[mt_rand(0, $max)];
	}

	// Make sure alnum strings contain at least one letter and one digit
	if ($type === 'alnum' AND $length > 1)
	{
		if (ctype_alpha($str))
		{
			// Add a random digit
			$str[mt_rand(0, $length - 1)] = chr(mt_rand(48, 57));
		}
		elseif (ctype_digit($str))
		{
			// Add a random letter
			$str[mt_rand(0, $length - 1)] = chr(mt_rand(65, 90));
		}
	}

	return $str;
}

public static reduce_slashes( string $str ) (defined in Kohana_Text)

Reduces multiple slashes in a string to single slashes.

$str = Text::reduce_slashes('foo//bar/baz'); // "foo/bar/baz"

Parameters

  • string $str required - String to reduce slashes of

Return Values

  • string

Source Code

public static function reduce_slashes($str)
{
	return preg_replace('#(?<!:)//+#', '/', $str);
}

public static similar( array $words ) (defined in Kohana_Text)

Finds the text that is similar between a set of words.

$match = Text::similar(array('fred', 'fran', 'free'); // "fr"

Parameters

  • array $words required - Words to find similar text of

Return Values

  • string

Source Code

public static function similar(array $words)
{
	// First word is the word to match against
	$word = current($words);

	for ($i = 0, $max = strlen($word); $i < $max; ++$i)
	{
		foreach ($words as $w)
		{
			// Once a difference is found, break out of the loops
			if ( ! isset($w[$i]) OR $w[$i] !== $word[$i])
				break 2;
		}
	}

	// Return the similar text
	return substr($word, 0, $i);
}

public static ucfirst( string $string [, string $delimiter = string(1) "-" ] ) (defined in Kohana_Text)

Uppercase words that are not separated by spaces, using a custom delimiter or the default.

 $str = Text::ucfirst('content-type'); // returns "Content-Type"

Parameters

  • string $string required - String to transform
  • string $delimiter = string(1) "-" - Delemiter to use

Return Values

  • string

Source Code

public static function ucfirst($string, $delimiter = '-')
{
	// Put the keys back the Case-Convention expected
	return implode($delimiter, array_map('ucfirst', explode($delimiter, $string)));
}

public static widont( string $str ) (defined in Kohana_Text)

Prevents widow words by inserting a non-breaking space between the last two words.

echo Text::widont($text);

Parameters

  • string $str required - Text to remove widows from

Return Values

  • string

Source Code

public static function widont($str)
{
	$str = rtrim($str);
	$space = strrpos($str, ' ');

	if ($space !== FALSE)
	{
		$str = substr($str, 0, $space).'&nbsp;'.substr($str, $space + 1);
	}

	return $str;
}

Source Code

protected static function _auto_link_emails_callback($matches)
{
	return HTML::mailto($matches[0]);
}

Source Code

protected static function _auto_link_urls_callback1($matches)
{
	return HTML::anchor($matches[0]);
}

Source Code

protected static function _auto_link_urls_callback2($matches)
{
	return HTML::anchor('http://'.$matches[0], $matches[0]);
}

private static _escape_cdata_element( $dom_document $dom_document , $dom_element $dom_element [, $comment_start $comment_start = string(2) "//" , $comment_end $comment_end = string(0) "" ] ) (defined in Gleez_Text)

Adds comments around the <!CDATA section in a dom element.

DOMDocument::loadHTML in filter_dom_load() makes CDATA sections from the contents of inline script and style tags. This can cause HTML 4 browsers to throw exceptions.

This function attempts to solve the problem by creating a DocumentFragment, commenting the CDATA tag.

The DOMDocument containing the $dom_element. The element potentially containing a CDATA node. String to use as a comment start marker to escape the CDATA declaration. String to use as a comment end marker to escape the CDATA declaration.

Parameters

  • $dom_document $dom_document required
  • $dom_element $dom_element required
  • $comment_start $comment_start = string(2) "//"
  • $comment_end $comment_end = string(0) ""

Source Code

private static function _escape_cdata_element($dom_document, $dom_element, $comment_start = '//', $comment_end = '') {
	foreach ($dom_element->childNodes as $node)
	{
		if (get_class($node) == 'DOMCdataSection')
		{
			$embed_prefix = "\n<!--{$comment_start}--><![CDATA[{$comment_start} ><!--{$comment_end}\n";
			$embed_suffix = "\n{$comment_start}--><!]]>{$comment_end}\n";
			$fragment = $dom_document->createDocumentFragment();
			$fragment->appendXML($embed_prefix . $node->data . $embed_suffix);
			$dom_element->appendChild($fragment);
			$dom_element->removeChild($node);
		}
	}
}

Source Code

private static function _links_list( $link, $display )
{
	if ( substr($link, 0, 7) == 'http://' OR substr($link, 0, 8) == 'https://' OR
	    substr($link, 0, 7) == 'mailto:' )
	{
		self::$_link_count++;
		self::$_link_list .= "[" . self::$_link_count . "] $link\n";
		$additional = ' <sup>[' . self::$_link_count . ']</sup>';
	}
	elseif ( substr($link, 0, 11) == 'javascript:' )
	{
		// Don't count the link; ignore it
		$additional = '';
		// what about href="#anchor" ?
	}
	else
	{
		self::$_link_count++;
		self::$_link_list .= "[" . self::$_link_count . "] " . URL::site(null, TRUE);
	
		if ( substr($link, 0, 1) != '/' )
		{
			self::$_link_list .= '/';
		}
	
		self::$_link_list .= "$link\n";
		$additional = ' <sup>[' . self::$_link_count . ']</sup>';
	}

	return $display . $additional;
}

private static _replace_fraction( ) (defined in Gleez_Text)

Source Code

private static function _replace_fraction($fraction, $html_fraction, $text)
{
	// fraction can't be preceded or followed by a number or letter.
	$search = '/([^0-9A-Z]+)' . preg_quote($fraction, '/') . '([^0-9A-Z]+)/i';
	$replacement = '$1' . $html_fraction . '$2';
	return preg_replace($search, $replacement, $text);
}

private static dom_serialize( $dom_document $dom_document ) (defined in Gleez_Text)

Converts a DOM object back to an HTML snippet.

The function serializes the body part of a DOMDocument back to an XHTML snippet.

The resulting XHTML snippet will be properly formatted to be compatible with HTML user agents.

A DOMDocument object to serialize, only the tags below the first node will be converted.

A valid (X)HTML snippet, as a string.

Parameters

  • $dom_document $dom_document required

Return Values

Source Code

private static function dom_serialize($dom_document)
{
        $body_node    = $dom_document->getElementsByTagName('body')->item(0);
        $body_content = '';

h ($body_node->getElementsByTagName('script') as $node)

:_escape_cdata_element($dom_document, $node);


h ($body_node->getElementsByTagName('style') as $node)

:_escape_cdata_element($dom_document, $node, '/*', '*/');


        foreach ($body_node->childNodes as $child_node)
        {
                $body_content .= $dom_document->saveXML($child_node);
        }
e::debug($body_content);
 preg_replace('|<([^> ]*)/>|i', '<$1 />', $body_content);
}
Documentation comments powered by Disqus