/* * Copyright (C) 2009 The Guava Authors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.google.common.net; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkNotNull; import static com.google.common.base.Preconditions.checkState; import com.google.common.annotations.Beta; import com.google.common.annotations.GwtCompatible; import com.google.common.base.Ascii; import com.google.common.base.CharMatcher; import com.google.common.base.Joiner; import com.google.common.base.Splitter; import com.google.common.collect.ImmutableList; import com.google.thirdparty.publicsuffix.PublicSuffixPatterns; import java.util.List; import javax.annotation.Nullable; /** * An immutable well-formed internet domain name, such as {@code com} or {@code * foo.co.uk}. Only syntactic analysis is performed; no DNS lookups or other * network interactions take place. Thus there is no guarantee that the domain * actually exists on the internet. * *
One common use of this class is to determine whether a given string is * likely to represent an addressable domain on the web -- that is, for a * candidate string {@code "xxx"}, might browsing to {@code "http://xxx/"} * result in a webpage being displayed? In the past, this test was frequently * done by determining whether the domain ended with a {@linkplain * #isPublicSuffix() public suffix} but was not itself a public suffix. However, * this test is no longer accurate. There are many domains which are both public * suffixes and addressable as hosts; {@code "uk.com"} is one example. As a * result, the only useful test to determine if a domain is a plausible web host * is {@link #hasPublicSuffix()}. This will return {@code true} for many domains * which (currently) are not hosts, such as {@code "com"}, but given that any * public suffix may become a host without warning, it is better to err on the * side of permissiveness and thus avoid spurious rejection of valid sites. * *
During construction, names are normalized in two ways: *
The normalized values will be returned from {@link #toString()} and * {@link #parts()}, and will be reflected in the result of * {@link #equals(Object)}. * *
* Internationalized domain names such as {@code 网络.cn} are supported, as
* are the equivalent IDNA
* Punycode-encoded versions.
*
* @author Craig Berry
* @since 5.0
*/
@Beta
@GwtCompatible
public final class InternetDomainName {
private static final CharMatcher DOTS_MATCHER =
CharMatcher.anyOf(".\u3002\uFF0E\uFF61");
private static final Splitter DOT_SPLITTER = Splitter.on('.');
private static final Joiner DOT_JOINER = Joiner.on('.');
/**
* Value of {@link #publicSuffixIndex} which indicates that no public suffix
* was found.
*/
private static final int NO_PUBLIC_SUFFIX_FOUND = -1;
private static final String DOT_REGEX = "\\.";
/**
* Maximum parts (labels) in a domain name. This value arises from
* the 255-octet limit described in
* RFC 2181 part 11 with
* the fact that the encoding of each part occupies at least two bytes
* (dot plus label externally, length byte plus label internally). Thus, if
* all labels have the minimum size of one byte, 127 of them will fit.
*/
private static final int MAX_PARTS = 127;
/**
* Maximum length of a full domain name, including separators, and
* leaving room for the root label. See
* RFC 2181 part 11.
*/
private static final int MAX_LENGTH = 253;
/**
* Maximum size of a single part of a domain name. See
* RFC 2181 part 11.
*/
private static final int MAX_DOMAIN_PART_LENGTH = 63;
/**
* The full domain name, converted to lower case.
*/
private final String name;
/**
* The parts of the domain name, converted to lower case.
*/
private final ImmutableList ASCII characters in the part are expected to be valid per RFC 1035,
* with underscore also being allowed due to widespread practice.
*/
String asciiChars = CharMatcher.ASCII.retainFrom(part);
if (!PART_CHAR_MATCHER.matchesAllOf(asciiChars)) {
return false;
}
// No initial or final dashes or underscores.
if (DASH_MATCHER.matches(part.charAt(0))
|| DASH_MATCHER.matches(part.charAt(part.length() - 1))) {
return false;
}
/*
* Note that we allow (in contravention of a strict interpretation of the
* relevant RFCs) domain parts other than the last may begin with a digit
* (for example, "3com.com"). It's important to disallow an initial digit in
* the last part; it's the only thing that stops an IPv4 numeric address
* like 127.0.0.1 from looking like a valid domain name.
*/
if (isFinalPart && CharMatcher.DIGIT.matches(part.charAt(0))) {
return false;
}
return true;
}
/**
* Returns the individual components of this domain name, normalized to all
* lower case. For example, for the domain name {@code mail.google.com}, this
* method returns the list {@code ["mail", "google", "com"]}.
*/
public ImmutableList Warning: a {@code false} result from this method does not imply
* that the domain does not represent an addressable host, as many public
* suffixes are also addressable hosts. Use {@link #hasPublicSuffix()} for
* that test.
*
* This method can be used to determine whether it will probably be
* possible to set cookies on the domain, though even that depends on
* individual browsers' implementations of cookie controls. See
* RFC 2109 for details.
*
* @since 6.0
*/
public boolean isUnderPublicSuffix() {
return publicSuffixIndex > 0;
}
/**
* Indicates whether this domain name is composed of exactly one subdomain
* component followed by a {@linkplain #isPublicSuffix() public suffix}. For
* example, returns {@code true} for {@code google.com} and {@code foo.co.uk},
* but not for {@code www.google.com} or {@code co.uk}.
*
* Warning: A {@code true} result from this method does not imply
* that the domain is at the highest level which is addressable as a host, as
* many public suffixes are also addressable hosts. For example, the domain
* {@code bar.uk.com} has a public suffix of {@code uk.com}, so it would
* return {@code true} from this method. But {@code uk.com} is itself an
* addressable host.
*
* This method can be used to determine whether a domain is probably the
* highest level for which cookies may be set, though even that depends on
* individual browsers' implementations of cookie controls. See
* RFC 2109 for details.
*
* @since 6.0
*/
public boolean isTopPrivateDomain() {
return publicSuffixIndex == 1;
}
/**
* Returns the portion of this domain name that is one level beneath the
* public suffix. For example, for {@code x.adwords.google.co.uk} it returns
* {@code google.co.uk}, since {@code co.uk} is a public suffix.
*
* If {@link #isTopPrivateDomain()} is true, the current domain name
* instance is returned.
*
* This method should not be used to determine the topmost parent domain
* which is addressable as a host, as many public suffixes are also
* addressable hosts. For example, the domain {@code foo.bar.uk.com} has
* a public suffix of {@code uk.com}, so it would return {@code bar.uk.com}
* from this method. But {@code uk.com} is itself an addressable host.
*
* This method can be used to determine the probable highest level parent
* domain for which cookies may be set, though even that depends on individual
* browsers' implementations of cookie controls.
*
* @throws IllegalStateException if this domain does not end with a
* public suffix
* @since 6.0
*/
public InternetDomainName topPrivateDomain() {
if (isTopPrivateDomain()) {
return this;
}
checkState(isUnderPublicSuffix(), "Not under a public suffix: %s", name);
return ancestor(publicSuffixIndex - 1);
}
/**
* Indicates whether this domain is composed of two or more parts.
*/
public boolean hasParent() {
return parts.size() > 1;
}
/**
* Returns an {@code InternetDomainName} that is the immediate ancestor of
* this one; that is, the current domain with the leftmost part removed. For
* example, the parent of {@code www.google.com} is {@code google.com}.
*
* @throws IllegalStateException if the domain has no parent, as determined
* by {@link #hasParent}
*/
public InternetDomainName parent() {
checkState(hasParent(), "Domain '%s' has no parent", name);
return ancestor(1);
}
/**
* Returns the ancestor of the current domain at the given number of levels
* "higher" (rightward) in the subdomain list. The number of levels must be
* non-negative, and less than {@code N-1}, where {@code N} is the number of
* parts in the domain.
*
* TODO: Reasonable candidate for addition to public API.
*/
private InternetDomainName ancestor(int levels) {
return from(DOT_JOINER.join(parts.subList(levels, parts.size())));
}
/**
* Creates and returns a new {@code InternetDomainName} by prepending the
* argument and a dot to the current name. For example, {@code
* InternetDomainName.from("foo.com").child("www.bar")} returns a new
* {@code InternetDomainName} with the value {@code www.bar.foo.com}. Only
* lenient validation is performed, as described {@link #from(String) here}.
*
* @throws NullPointerException if leftParts is null
* @throws IllegalArgumentException if the resulting name is not valid
*/
public InternetDomainName child(String leftParts) {
return from(checkNotNull(leftParts) + "." + name);
}
/**
* Indicates whether the argument is a syntactically valid domain name using
* lenient validation. Specifically, validation against RFC 3490
* ("Internationalizing Domain Names in Applications") is skipped.
*
* The following two code snippets are equivalent:
*
*
*
*
*
* @param domain A domain name (not IP address)
* @throws IllegalArgumentException if {@code name} is not syntactically valid
* according to {@link #isValid}
* @since 10.0 (previously named {@code fromLenient})
*/
public static InternetDomainName from(String domain) {
return new InternetDomainName(checkNotNull(domain));
}
/**
* Validation method used by {@from} to ensure that the domain name is
* syntactically valid according to RFC 1035.
*
* @return Is the domain name syntactically valid?
*/
private static boolean validateSyntax(List {@code
* domainName = InternetDomainName.isValid(name)
* ? InternetDomainName.from(name)
* : DEFAULT_DOMAIN;}
*
* {@code
* try {
* domainName = InternetDomainName.from(name);
* } catch (IllegalArgumentException e) {
* domainName = DEFAULT_DOMAIN;
* }}
*
* @since 8.0 (previously named {@code isValidLenient})
*/
public static boolean isValid(String name) {
try {
from(name);
return true;
} catch (IllegalArgumentException e) {
return false;
}
}
/**
* Does the domain name match one of the "wildcard" patterns (e.g.
* {@code "*.ar"})?
*/
private static boolean matchesWildcardPublicSuffix(String domain) {
final String[] pieces = domain.split(DOT_REGEX, 2);
return pieces.length == 2 && PublicSuffixPatterns.UNDER.containsKey(pieces[1]);
}
/**
* Returns the domain name, normalized to all lower case.
*/
@Override
public String toString() {
return name;
}
/**
* Equality testing is based on the text supplied by the caller,
* after normalization as described in the class documentation. For
* example, a non-ASCII Unicode domain name and the Punycode version
* of the same domain name would not be considered equal.
*
*/
@Override
public boolean equals(@Nullable Object object) {
if (object == this) {
return true;
}
if (object instanceof InternetDomainName) {
InternetDomainName that = (InternetDomainName) object;
return this.name.equals(that.name);
}
return false;
}
@Override
public int hashCode() {
return name.hashCode();
}
}