001package org.andromda.utils.inflector;
002
003import java.util.LinkedHashMap;
004import java.util.Map;
005
006/**
007 * Language utility for transforming English words.
008 * See also <a href="http://www.csse.monash.edu.au/~damian/papers/HTML/Plurals.html">
009 * http://www.csse.monash.edu.au/~damian/papers/HTML/Plurals.html</a>
010 *
011 * @author maetl@coretxt.net.nz
012 * @author wouter@andromda.org
013 * @author bob@andromda.org
014 */
015public class EnglishInflector
016{
017    /**
018     * Converts an English word to plural form. Leaves it alone if already plural.
019     *
020     * @param word an English word
021     * @return the pluralization of the argument English word, or the argument in case it is <code>null</code>
022     */
023    public static String pluralize(String word)
024    {
025        if (word == null) return null;
026
027        final Map<String, String> rules = EnglishInflector.getPluralRules();
028        for (final Map.Entry<String, String> rule : rules.entrySet())
029        {
030            final String pattern = rule.getKey();
031            final String replace = rule.getValue();
032            if (word.matches(pattern))
033            {
034                return word.replaceFirst(pattern, replace);
035            }
036        }
037        return word.replaceFirst("([\\w]+)([^s])$", "$1$2s");
038    }
039
040    /**
041     * Returns map of plural patterns
042     */
043    private static Map<String, String> getPluralRules()
044    {
045        // Rules are checked in order through LinkedHashMap.
046        final Map<String, String> rules = new LinkedHashMap<String, String>();
047        // Check first if special case word is already plural
048        rules.put("(\\w*)people$", "$1people");
049        rules.put("(\\w*)children$", "$1children");
050        rules.put("(\\w*)feet$", "$1feet");
051        rules.put("(\\w*)teeth$", "$1teeth");
052        rules.put("(\\w*)men$", "$1men");
053        rules.put("(\\w*)equipment$", "$1equipment");
054        rules.put("(\\w*)information$", "$1information");
055        rules.put("(\\w*)rice$", "$1rice");
056        rules.put("(\\w*)money$", "$1money");
057        rules.put("(\\w*)fish$", "$fish");
058        rules.put("(\\w*)sheep$", "$1sheep");
059        rules.put("(\\w+)(es)$", "$1es");
060        // Check exception special case words
061        rules.put("(\\w*)person$", "$1people");
062        rules.put("(\\w*)child$", "$1children");
063        rules.put("(\\w*)foot$", "$1feet");
064        rules.put("(\\w*)tooth$", "$1teeth");
065        rules.put("(\\w*)bus$", "$1buses");
066        rules.put("(\\w*)man$", "$1men");
067        rules.put("(\\w*)(ox|oxen)$", "$1$2");
068        rules.put("(\\w*)(buffal|tomat)o$", "$1$2oes");
069        rules.put("(\\w*)quiz$", "$1$2zes");
070        // Greek endings
071        rules.put("(\\w+)(matr|vert|ind)ix|ex$", "$1$2ices");
072        rules.put("(\\w+)(sis)$", "$1ses");
073        rules.put("(\\w+)(um)$", "$1a");
074        // Old English. hoof -> hooves, leaf -> leaves
075        rules.put("(\\w*)(fe)$", "$1ves");
076        rules.put("(\\w*)(f)$", "$1ves");
077        rules.put("(\\w*)([m|l])ouse$", "$1$2ice");
078        // Y preceded by a consonant changes to ies
079        rules.put("(\\w+)([^aeiou]|qu)y$", "$1$2ies");
080        // Voiced consonants add es instead of s
081        rules.put("(\\w+)(z|ch|sh|as|ss|us|x)$", "$1$2es");
082        // Check exception special case words
083        rules.put("(\\w*)cactus$", "$1cacti");
084        rules.put("(\\w*)focus$", "$1foci");
085        rules.put("(\\w*)fungus$", "$1fungi");
086        rules.put("(\\w*)octopus$", "$1octopi");
087        rules.put("(\\w*)radius$", "$1radii");
088        // If nothing else matches, and word ends in s, assume plural already
089        rules.put("(\\w+)(s)$", "$1s");
090        // Otherwise, just add s at the end in pluralize()
091        return rules;
092    }
093
094    /**
095     * Converts an English word to singular form. Leaves it alone if already singular.
096     *
097     * @param word an English word
098     * @return the singularization of the argument English word, or the argument in case it is <code>null</code>
099     */
100    public static String singularize(String word)
101    {
102        if (word == null) return null;
103
104        final Map<String, String> rules = EnglishInflector.getSingularRules();
105        for (final Map.Entry<String, String> rule : rules.entrySet())
106        {
107            final String pattern = rule.getKey();
108            final String replace = rule.getValue();
109            if (word.matches(pattern))
110            {
111                return word.replaceFirst(pattern, replace);
112            }
113        }
114        return word.replaceFirst("([\\w]+)s$", "$1");
115    }
116
117    /**
118     * Returns map of singular patterns
119     */
120    private static Map<String, String> getSingularRules()
121    {
122        final Map<String, String> rules = new LinkedHashMap<String, String>();
123        rules.put("(\\w*)people$", "$1person");
124        rules.put("(\\w*)children$", "$1child");
125        rules.put("(\\w*)series$", "$1series");
126        rules.put("(\\w*)feet$", "$1foot");
127        rules.put("(\\w*)teeth$", "$1tooth");
128        rules.put("(\\w*)buses$", "$1bus");
129        rules.put("(\\w*)men$", "$1man");
130        rules.put("(\\w*)person$", "$1person");
131        rules.put("(\\w*)child$", "$1child");
132        rules.put("(\\w*)foot$", "$1foot");
133        rules.put("(\\w*)tooth$", "$1tooth");
134        rules.put("(\\w*)bus$", "$1bus");
135        rules.put("(\\w*)man$", "$1man");
136        rules.put("(\\w+)(sis)$", "$1sis");
137        rules.put("(\\w+)([ll])f", "$1$2f");
138        rules.put("(\\w+)([^l])fe", "$1$2fe");
139        rules.put("(\\w+)(ses)$", "$1sis");
140        rules.put("(\\w+)([ll])ves", "$1$2f");
141        rules.put("(\\w+)([^l])ves", "$1$2fe");
142        rules.put("(\\w+)([^aeiou])y", "$1$2y");
143        rules.put("(\\w+)([^aeiou])ies", "$1$2y");
144        rules.put("(\\w+)(z|ch|ss|sh|x)es$", "$1$2");
145        return rules;
146    }
147}