View Javadoc
1   package org.andromda.utils.inflector;
2   
3   import java.util.LinkedHashMap;
4   import java.util.Map;
5   
6   /**
7    * Language utility for transforming English words.
8    * See also <a href="http://www.csse.monash.edu.au/~damian/papers/HTML/Plurals.html">
9    * http://www.csse.monash.edu.au/~damian/papers/HTML/Plurals.html</a>;
10   *
11   * @author maetl@coretxt.net.nz
12   * @author wouter@andromda.org
13   * @author bob@andromda.org
14   */
15  public class EnglishInflector
16  {
17      /**
18       * Converts an English word to plural form. Leaves it alone if already plural.
19       *
20       * @param word an English word
21       * @return the pluralization of the argument English word, or the argument in case it is <code>null</code>
22       */
23      public static String pluralize(String word)
24      {
25          if (word == null) return null;
26  
27          final Map<String, String> rules = EnglishInflector.getPluralRules();
28          for (final Map.Entry<String, String> rule : rules.entrySet())
29          {
30              final String pattern = rule.getKey();
31              final String replace = rule.getValue();
32              if (word.matches(pattern))
33              {
34                  return word.replaceFirst(pattern, replace);
35              }
36          }
37          return word.replaceFirst("([\\w]+)([^s])$", "$1$2s");
38      }
39  
40      /**
41       * Returns map of plural patterns
42       */
43      private static Map<String, String> getPluralRules()
44      {
45          // Rules are checked in order through LinkedHashMap.
46          final Map<String, String> rules = new LinkedHashMap<String, String>();
47          // Check first if special case word is already plural
48          rules.put("(\\w*)people$", "$1people");
49          rules.put("(\\w*)children$", "$1children");
50          rules.put("(\\w*)feet$", "$1feet");
51          rules.put("(\\w*)teeth$", "$1teeth");
52          rules.put("(\\w*)men$", "$1men");
53          rules.put("(\\w*)equipment$", "$1equipment");
54          rules.put("(\\w*)information$", "$1information");
55          rules.put("(\\w*)rice$", "$1rice");
56          rules.put("(\\w*)money$", "$1money");
57          rules.put("(\\w*)fish$", "$fish");
58          rules.put("(\\w*)sheep$", "$1sheep");
59          rules.put("(\\w+)(es)$", "$1es");
60          // Check exception special case words
61          rules.put("(\\w*)person$", "$1people");
62          rules.put("(\\w*)child$", "$1children");
63          rules.put("(\\w*)foot$", "$1feet");
64          rules.put("(\\w*)tooth$", "$1teeth");
65          rules.put("(\\w*)bus$", "$1buses");
66          rules.put("(\\w*)man$", "$1men");
67          rules.put("(\\w*)(ox|oxen)$", "$1$2");
68          rules.put("(\\w*)(buffal|tomat)o$", "$1$2oes");
69          rules.put("(\\w*)quiz$", "$1$2zes");
70          // Greek endings
71          rules.put("(\\w+)(matr|vert|ind)ix|ex$", "$1$2ices");
72          rules.put("(\\w+)(sis)$", "$1ses");
73          rules.put("(\\w+)(um)$", "$1a");
74          // Old English. hoof -> hooves, leaf -> leaves
75          rules.put("(\\w*)(fe)$", "$1ves");
76          rules.put("(\\w*)(f)$", "$1ves");
77          rules.put("(\\w*)([m|l])ouse$", "$1$2ice");
78          // Y preceded by a consonant changes to ies
79          rules.put("(\\w+)([^aeiou]|qu)y$", "$1$2ies");
80          // Voiced consonants add es instead of s
81          rules.put("(\\w+)(z|ch|sh|as|ss|us|x)$", "$1$2es");
82          // Check exception special case words
83          rules.put("(\\w*)cactus$", "$1cacti");
84          rules.put("(\\w*)focus$", "$1foci");
85          rules.put("(\\w*)fungus$", "$1fungi");
86          rules.put("(\\w*)octopus$", "$1octopi");
87          rules.put("(\\w*)radius$", "$1radii");
88          // If nothing else matches, and word ends in s, assume plural already
89          rules.put("(\\w+)(s)$", "$1s");
90          // Otherwise, just add s at the end in pluralize()
91          return rules;
92      }
93  
94      /**
95       * Converts an English word to singular form. Leaves it alone if already singular.
96       *
97       * @param word an English word
98       * @return the singularization of the argument English word, or the argument in case it is <code>null</code>
99       */
100     public static String singularize(String word)
101     {
102         if (word == null) return null;
103 
104         final Map<String, String> rules = EnglishInflector.getSingularRules();
105         for (final Map.Entry<String, String> rule : rules.entrySet())
106         {
107             final String pattern = rule.getKey();
108             final String replace = rule.getValue();
109             if (word.matches(pattern))
110             {
111                 return word.replaceFirst(pattern, replace);
112             }
113         }
114         return word.replaceFirst("([\\w]+)s$", "$1");
115     }
116 
117     /**
118      * Returns map of singular patterns
119      */
120     private static Map<String, String> getSingularRules()
121     {
122         final Map<String, String> rules = new LinkedHashMap<String, String>();
123         rules.put("(\\w*)people$", "$1person");
124         rules.put("(\\w*)children$", "$1child");
125         rules.put("(\\w*)series$", "$1series");
126         rules.put("(\\w*)feet$", "$1foot");
127         rules.put("(\\w*)teeth$", "$1tooth");
128         rules.put("(\\w*)buses$", "$1bus");
129         rules.put("(\\w*)men$", "$1man");
130         rules.put("(\\w*)person$", "$1person");
131         rules.put("(\\w*)child$", "$1child");
132         rules.put("(\\w*)foot$", "$1foot");
133         rules.put("(\\w*)tooth$", "$1tooth");
134         rules.put("(\\w*)bus$", "$1bus");
135         rules.put("(\\w*)man$", "$1man");
136         rules.put("(\\w+)(sis)$", "$1sis");
137         rules.put("(\\w+)([ll])f", "$1$2f");
138         rules.put("(\\w+)([^l])fe", "$1$2fe");
139         rules.put("(\\w+)(ses)$", "$1sis");
140         rules.put("(\\w+)([ll])ves", "$1$2f");
141         rules.put("(\\w+)([^l])ves", "$1$2fe");
142         rules.put("(\\w+)([^aeiou])y", "$1$2y");
143         rules.put("(\\w+)([^aeiou])ies", "$1$2y");
144         rules.put("(\\w+)(z|ch|ss|sh|x)es$", "$1$2");
145         return rules;
146     }
147 }