-
Notifications
You must be signed in to change notification settings - Fork 26
Inflector
Replace the default CI Inflector helper with this more versatile inflector library.
I was looking for an inflector class to greatly improve the results of a search for a CI built media e-commerce site similar to iStockPhoto. The results need to show the maximum number of images that are relevant based on the filtering criteria established by the user and/or defaults. The Zend/Cake and other frameworks had very similar inflectors which i’m sure borrowed from each other but did not fully encompass the vocabulary. I feel this class does now have a good amount of array elements to parse and make the changes.
Essentially this inflection class has been able to produce more search results that are relevant based on the medias tags and keywords. The CI inflector was just too simplistic and did not generate many inflections other than the plural “s”. So, it solved a problem and the search performance at this point did not suffer. I will look into ways (as suggested) to improve the performance but for anyone looking for an up-to-date inflector in any framework may want to use this class.
The example will only provide the inflector (plural or singular) as more controller work would be necessary to determine the use of such a class as in my case for searching.
<?php if (!defined('BASEPATH')) exit('No direct script access allowed');
/**
* Inflector Library for CodeIgniter
*
* Thanks to Akelos Framework, Zend and Ruby on Rails.
*
*
* @author Jason Hamilton-Mascioli <hamiltonmascioli at Gmail dot com>
* @copyright Copyright (c) 2008, Custom-Mod Solutions Inc., http://www.custom-mod.com
* @license GNU Lesser General Public License <http://www.gnu.org/copyleft/lesser.html>
* @since 0.1
* @version $Revision 0.1 $
*/
class Inflector
{
/**
* Pluralizes English nouns.
*
* @access public
* @static
* @param string $word English noun to pluralize
* @return string Plural noun
*/
function pluralize($word)
{
$plural = array(
'/(s)tatus$/i' => '\1\2tatuses',
'/(quiz)$/i' => '\1zes',
'/^(ox)$/i' => '\1\2en',
'/([m|l])ouse$/i' => '\1ice',
'/(matr|vert|ind)(ix|ex)$/i' => '\1ices',
'/(x|ch|ss|sh)$/i' => '\1es',
'/([^aeiouy]|qu)y$/i' => '\1ies',
'/(hive)$/i' => '\1s',
'/(?:([^f])fe|([lr])f)$/i' => '\1\2ves',
'/sis$/i' => 'ses',
'/([ti])um$/i' => '\1a',
'/(p)erson$/i' => '\1eople',
'/(m)an$/i' => '\1en',
'/(c)hild$/i' => '\1hildren',
'/(buffal|tomat)o$/i' => '\1\2oes',
'/(alumn|bacill|cact|foc|fung|nucle|radi|stimul|syllab|termin|vir)us$/i' => '\1i',
'/us$/' => 'uses',
'/(alias)$/i' => '\1es',
'/(ax|cri|test)is$/i' => '\1es',
'/s$/' => 's',
'/^$/' => '',
'/$/' => 's');
$uncountable = array('.*[nrlm]ese', '.*deer', '.*fish', '.*measles', '.*ois', '.*pox', '.*sheep', 'Amoyese', 'bison', 'Borghese', 'bream', 'breeches', 'britches', 'buffalo', 'cantus', 'carp', 'chassis', 'clippers', 'cod', 'coitus', 'Congoese', 'contretemps', 'corps', 'debris', 'diabetes', 'djinn', 'eland', 'elk', 'equipment', 'Faroese', 'flounder', 'Foochowese', 'gallows', 'Genevese', 'Genoese', 'Gilbertese', 'graffiti', 'headquarters', 'herpes', 'hijinks', 'Hottentotese', 'information', 'innings', 'jackanapes', 'Kiplingese', 'Kongoese', 'Lucchese', 'mackerel', 'Maltese', 'media', 'mews', 'moose', 'mumps', 'Nankingese', 'news', 'nexus', 'Niasese', 'Pekingese', 'People', 'Piedmontese', 'pincers', 'Pistoiese', 'pliers', 'Portuguese', 'proceedings','rabies', 'rice', 'rhinoceros', 'salmon', 'Sarawakese', 'scissors', 'sea[- ]bass', 'series', 'Shavese', 'shears', 'siemens', 'species', 'swine', 'testes', 'trousers', 'trout', 'tuna','Vermontese', 'Wenchowese', 'whiting', 'wildebeest', 'Yengeese');
$irregular = array(
'atlas' => 'atlases',
'beef' => 'beefs',
'brother' => 'brothers',
'child' => 'children',
'corpus' => 'corpuses',
'cow' => 'cows',
'ganglion' => 'ganglions',
'genie' => 'genies',
'genus' => 'genera',
'graffito' => 'graffiti',
'hoof' => 'hoofs',
'loaf' => 'loaves',
'man' => 'men',
'money' => 'monies',
'mongoose' => 'mongooses',
'move' => 'moves',
'mythos' => 'mythoi',
'numen' => 'numina',
'occiput' => 'occiputs',
'octopus' => 'octopuses',
'opus' => 'opuses',
'ox' => 'oxen',
'penis' => 'penises',
'person' => 'people',
'sex' => 'sexes',
'soliloquy' => 'soliloquies',
'testis' => 'testes',
'trilby' => 'trilbys',
'turf' => 'turfs');
$lowercased_word = strtolower($word);
foreach ($uncountable as $_uncountable){
if(substr($lowercased_word,(-1*strlen($_uncountable))) == $_uncountable){
return $word;
}
}
foreach ($irregular as $_plural=> $_singular){
if (preg_match('/('.$_plural.')$/i', $word, $arr)) {
return preg_replace('/('.$_plural.')$/i', substr($arr[0],0,1).substr($_singular,1), $word);
}
}
foreach ($plural as $rule => $replacement) {
if (preg_match($rule, $word)) {
return preg_replace($rule, $replacement, $word);
}
}
return false;
}
/**
* Singularizes English nouns.
*
* @access public
* @static
* @param string $word English noun to singularize
* @return string Singular noun.
*/
function singularize($word)
{
$singular = array (
'/(s)tatuses$/i' => '\1\2tatus',
'/^(.*)(menu)s$/i' => '\1\2',
'/(quiz)zes$/i' => '\\1',
'/(matr)ices$/i' => '\1ix',
'/(vert|ind)ices$/i' => '\1ex',
'/^(ox)en/i' => '\1',
'/(alias)(es)*$/i' => '\1',
'/(alumn|bacill|cact|foc|fung|nucle|radi|stimul|syllab|termin|viri?)i$/i' => '\1us',
'/(cris|ax|test)es$/i' => '\1is',
'/(shoe)s$/i' => '\1',
'/(o)es$/i' => '\1',
'/ouses$/' => 'ouse',
'/uses$/' => 'us',
'/([m|l])ice$/i' => '\1ouse',
'/(x|ch|ss|sh)es$/i' => '\1',
'/(m)ovies$/i' => '\1\2ovie',
'/(s)eries$/i' => '\1\2eries',
'/([^aeiouy]|qu)ies$/i' => '\1y',
'/([lr])ves$/i' => '\1f',
'/(tive)s$/i' => '\1',
'/(hive)s$/i' => '\1',
'/(drive)s$/i' => '\1',
'/([^fo])ves$/i' => '\1fe',
'/(^analy)ses$/i' => '\1sis',
'/((a)naly|(b)a|(d)iagno|(p)arenthe|(p)rogno|(s)ynop|(t)he)ses$/i' => '\1\2sis',
'/([ti])a$/i' => '\1um',
'/(p)eople$/i' => '\1\2erson',
'/(m)en$/i' => '\1an',
'/(c)hildren$/i' => '\1\2hild',
'/(n)ews$/i' => '\1\2ews',
'/^(.*us)$/' => '\\1',
'/s$/i' => '');
$uncountable = array('.*[nrlm]ese', '.*deer', '.*fish', '.*measles', '.*ois', '.*pox', '.*sheep', '.*ss', 'Amoyese','bison', 'Borghese', 'bream', 'breeches', 'britches', 'buffalo', 'cantus', 'carp', 'chassis', 'clippers','cod', 'coitus', 'Congoese', 'contretemps', 'corps', 'debris', 'diabetes', 'djinn', 'eland', 'elk', 'equipment', 'Faroese', 'flounder', 'Foochowese', 'gallows', 'Genevese', 'Genoese', 'Gilbertese', 'graffiti','headquarters', 'herpes', 'hijinks', 'Hottentotese', 'information', 'innings', 'jackanapes', 'Kiplingese', 'Kongoese', 'Lucchese', 'mackerel', 'Maltese', 'media', 'mews', 'moose', 'mumps', 'Nankingese', 'news', 'nexus', 'Niasese', 'Pekingese', 'Piedmontese', 'pincers', 'Pistoiese', 'pliers', 'Portuguese', 'proceedings','rabies', 'rice', 'rhinoceros', 'salmon', 'Sarawakese', 'scissors', 'sea[- ]bass', 'series', 'Shavese', 'shears','siemens', 'species', 'swine', 'testes', 'trousers', 'trout', 'tuna', 'Vermontese', 'Wenchowese',
'whiting', 'wildebeest', 'Yengeese');
// 'equipment', 'information', 'rice', 'money', 'species', 'series', 'fish', 'sheep');
$irregular = array(
'atlases' => 'atlas',
'beefs' => 'beef',
'brothers' => 'brother',
'children' => 'child',
'corpuses' => 'corpus',
'cows' => 'cow',
'ganglions' => 'ganglion',
'genies' => 'genie',
'genera' => 'genus',
'graffiti' => 'graffito',
'hoofs' => 'hoof',
'loaves' => 'loaf',
'men' => 'man',
'monies' => 'money',
'mongooses' => 'mongoose',
'moves' => 'move',
'mythoi' => 'mythos',
'numina' => 'numen',
'occiputs' => 'occiput',
'octopuses' => 'octopus',
'opuses' => 'opus',
'oxen' => 'ox',
'penises' => 'penis',
'people' => 'person',
'sexes' => 'sex',
'soliloquies' => 'soliloquy',
'testes' => 'testis',
'trilbys' => 'trilby',
'turfs' => 'turf');
$lowercased_word = strtolower($word);
foreach ($uncountable as $_uncountable){
if(substr($lowercased_word,(-1*strlen($_uncountable))) == $_uncountable){
return $word;
}
}
foreach ($irregular as $_plural=> $_singular){
if (preg_match('/('.$_singular.')$/i', $word, $arr)) {
return preg_replace('/('.$_singular.')$/i', substr($arr[0],0,1).substr($_plural,1), $word);
}
}
foreach ($singular as $rule => $replacement) {
if (preg_match($rule, $word)) {
return preg_replace($rule, $replacement, $word);
}
}
return $word;
}
}
?>
Example Usage
// LOAD LIBRARY
$this->load->library('Inflector');
// Pluralize keyword
$plural = $this->inflector->pluralize($keyword);
// Singularize keyword
$singular = $this->inflector->singularize($keyword);