1 | // Copyright (C) 2009 Google Inc. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | // you may not use this file except in compliance with the License. |
5 | // You may obtain a copy of the License at |
6 | // |
7 | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | // |
9 | // Unless required by applicable law or agreed to in writing, software |
10 | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | // See the License for the specific language governing permissions and |
13 | // limitations under the License. |
14 | |
15 | package com.google.caja.util; |
16 | |
17 | import java.util.HashMap; |
18 | import java.util.Map; |
19 | import java.util.Set; |
20 | |
21 | /** |
22 | * A mapping from a group of strings with a particular format to suffixes that |
23 | * unambiguously identify the original. |
24 | * |
25 | * <p> |
26 | * If we needed to abbreviate the set of file paths: |
27 | * <ul> |
28 | * <li><tt>/tmp/foo/bar.txt</tt> |
29 | * <li><tt>/tmp/baz.txt</tt> |
30 | * <li><tt>/tmp/boo/bar.txt</tt> |
31 | * </ul> |
32 | * This class would come up with the mapping |
33 | * <ul> |
34 | * <li><tt>/tmp/foo/bar.txt</tt> → <tt>foo/bar.txt</tt> |
35 | * <li><tt>/tmp/baz.txt</tt> → <tt>baz.txt</tt> |
36 | * <li><tt>/tmp/boo/bar.txt</tt> → <tt>boo/bar.txt</tt> |
37 | * </ul> |
38 | * by choosing the shortest suffixes that start after the <code>/</code> |
39 | * separator for each item that are not suffixes of any other element in the |
40 | * set. |
41 | * |
42 | * @author mikesamuel@gmail.com |
43 | */ |
44 | public final class Abbreviator { |
45 | private final Map<String, String> itemToAbbrev = new HashMap<String, String>(); |
46 | private final String sep; |
47 | |
48 | /** |
49 | * @param items items to generate abbreviations for. |
50 | * @param sep a string that is likely to appear as a substring of an item |
51 | * repeatedly. A suitable sep for file paths or URIs might be {@code "/"}. |
52 | */ |
53 | public Abbreviator(Set<String> items, String sep) { |
54 | this.sep = sep; |
55 | Map<String, String> abbrevToUri = new HashMap<String, String>(); |
56 | for (String item : items) { insert(abbrevToUri, item, ""); } |
57 | for (Map.Entry<String, String> e : abbrevToUri.entrySet()) { |
58 | if (e.getValue() != null) { |
59 | itemToAbbrev.put(e.getValue(), e.getKey()); |
60 | } |
61 | } |
62 | } |
63 | |
64 | /** |
65 | * If item was in the input set, then the shortest unambiguous suffix of item |
66 | * that is not a suffix of any other item in the input set, or item if item |
67 | * was not in the input set. |
68 | */ |
69 | public String unambiguousAbbreviationFor(String item) { |
70 | String abbrev = itemToAbbrev.get(item); |
71 | return abbrev != null ? abbrev : item; |
72 | } |
73 | |
74 | private void insert( |
75 | Map<String, String> abbrevToUri, String item, String abbrev) { |
76 | abbrev = expand(item, abbrev); |
77 | if (!abbrevToUri.containsKey(abbrev)) { |
78 | abbrevToUri.put(abbrev, item); |
79 | } else { |
80 | String other = abbrevToUri.get(abbrev); |
81 | if (!item.equals(other)) { |
82 | if (!abbrev.equals(other)) { |
83 | abbrevToUri.put(abbrev, null); |
84 | if (other != null) { insert(abbrevToUri, other, abbrev); } |
85 | } |
86 | insert(abbrevToUri, item, abbrev); |
87 | } |
88 | } |
89 | } |
90 | |
91 | /** |
92 | * Returns a longer suffix of original. |
93 | * @param item a path like <tt>/a/b/c</tt>. |
94 | * @param abbrev a suffix of item like <tt>c</tt>. |
95 | * @return item or a suffix of item that is longer than abbrev and which |
96 | * starts after a '/' character in item. |
97 | */ |
98 | private String expand(String item, String abbrev) { |
99 | int seplen = sep.length(); |
100 | int suffixStart = item.length() - (abbrev.length() + 1); |
101 | int slash = item.lastIndexOf(sep, suffixStart - seplen); |
102 | return slash >= 0 ? item.substring(slash + seplen) : item; |
103 | } |
104 | } |