EnglishInflector.java
package org.andromda.utils.inflector;
import java.util.LinkedHashMap;
import java.util.Map;
/**
* Language utility for transforming English words.
* See also <a href="http://www.csse.monash.edu.au/~damian/papers/HTML/Plurals.html">
* http://www.csse.monash.edu.au/~damian/papers/HTML/Plurals.html</a>
*
* @author maetl@coretxt.net.nz
* @author wouter@andromda.org
* @author bob@andromda.org
*/
public class EnglishInflector
{
/**
* Converts an English word to plural form. Leaves it alone if already plural.
*
* @param word an English word
* @return the pluralization of the argument English word, or the argument in case it is <code>null</code>
*/
public static String pluralize(String word)
{
if (word == null) return null;
final Map<String, String> rules = EnglishInflector.getPluralRules();
for (final Map.Entry<String, String> rule : rules.entrySet())
{
final String pattern = rule.getKey();
final String replace = rule.getValue();
if (word.matches(pattern))
{
return word.replaceFirst(pattern, replace);
}
}
return word.replaceFirst("([\\w]+)([^s])$", "$1$2s");
}
/**
* Returns map of plural patterns
*/
private static Map<String, String> getPluralRules()
{
// Rules are checked in order through LinkedHashMap.
final Map<String, String> rules = new LinkedHashMap<String, String>();
// Check first if special case word is already plural
rules.put("(\\w*)people$", "$1people");
rules.put("(\\w*)children$", "$1children");
rules.put("(\\w*)feet$", "$1feet");
rules.put("(\\w*)teeth$", "$1teeth");
rules.put("(\\w*)men$", "$1men");
rules.put("(\\w*)equipment$", "$1equipment");
rules.put("(\\w*)information$", "$1information");
rules.put("(\\w*)rice$", "$1rice");
rules.put("(\\w*)money$", "$1money");
rules.put("(\\w*)fish$", "$fish");
rules.put("(\\w*)sheep$", "$1sheep");
rules.put("(\\w+)(es)$", "$1es");
// Check exception special case words
rules.put("(\\w*)person$", "$1people");
rules.put("(\\w*)child$", "$1children");
rules.put("(\\w*)foot$", "$1feet");
rules.put("(\\w*)tooth$", "$1teeth");
rules.put("(\\w*)bus$", "$1buses");
rules.put("(\\w*)man$", "$1men");
rules.put("(\\w*)(ox|oxen)$", "$1$2");
rules.put("(\\w*)(buffal|tomat)o$", "$1$2oes");
rules.put("(\\w*)quiz$", "$1$2zes");
// Greek endings
rules.put("(\\w+)(matr|vert|ind)ix|ex$", "$1$2ices");
rules.put("(\\w+)(sis)$", "$1ses");
rules.put("(\\w+)(um)$", "$1a");
// Old English. hoof -> hooves, leaf -> leaves
rules.put("(\\w*)(fe)$", "$1ves");
rules.put("(\\w*)(f)$", "$1ves");
rules.put("(\\w*)([m|l])ouse$", "$1$2ice");
// Y preceded by a consonant changes to ies
rules.put("(\\w+)([^aeiou]|qu)y$", "$1$2ies");
// Voiced consonants add es instead of s
rules.put("(\\w+)(z|ch|sh|as|ss|us|x)$", "$1$2es");
// Check exception special case words
rules.put("(\\w*)cactus$", "$1cacti");
rules.put("(\\w*)focus$", "$1foci");
rules.put("(\\w*)fungus$", "$1fungi");
rules.put("(\\w*)octopus$", "$1octopi");
rules.put("(\\w*)radius$", "$1radii");
// If nothing else matches, and word ends in s, assume plural already
rules.put("(\\w+)(s)$", "$1s");
// Otherwise, just add s at the end in pluralize()
return rules;
}
/**
* Converts an English word to singular form. Leaves it alone if already singular.
*
* @param word an English word
* @return the singularization of the argument English word, or the argument in case it is <code>null</code>
*/
public static String singularize(String word)
{
if (word == null) return null;
final Map<String, String> rules = EnglishInflector.getSingularRules();
for (final Map.Entry<String, String> rule : rules.entrySet())
{
final String pattern = rule.getKey();
final String replace = rule.getValue();
if (word.matches(pattern))
{
return word.replaceFirst(pattern, replace);
}
}
return word.replaceFirst("([\\w]+)s$", "$1");
}
/**
* Returns map of singular patterns
*/
private static Map<String, String> getSingularRules()
{
final Map<String, String> rules = new LinkedHashMap<String, String>();
rules.put("(\\w*)people$", "$1person");
rules.put("(\\w*)children$", "$1child");
rules.put("(\\w*)series$", "$1series");
rules.put("(\\w*)feet$", "$1foot");
rules.put("(\\w*)teeth$", "$1tooth");
rules.put("(\\w*)buses$", "$1bus");
rules.put("(\\w*)men$", "$1man");
rules.put("(\\w*)person$", "$1person");
rules.put("(\\w*)child$", "$1child");
rules.put("(\\w*)foot$", "$1foot");
rules.put("(\\w*)tooth$", "$1tooth");
rules.put("(\\w*)bus$", "$1bus");
rules.put("(\\w*)man$", "$1man");
rules.put("(\\w+)(sis)$", "$1sis");
rules.put("(\\w+)([ll])f", "$1$2f");
rules.put("(\\w+)([^l])fe", "$1$2fe");
rules.put("(\\w+)(ses)$", "$1sis");
rules.put("(\\w+)([ll])ves", "$1$2f");
rules.put("(\\w+)([^l])ves", "$1$2fe");
rules.put("(\\w+)([^aeiou])y", "$1$2y");
rules.put("(\\w+)([^aeiou])ies", "$1$2y");
rules.put("(\\w+)(z|ch|ss|sh|x)es$", "$1$2");
return rules;
}
}