1/*
2 * Copyright (c) 2003 Constantin S. Svintsoff <kostik@iclub.nsu.ru>
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 * 3. The names of the authors may not be used to endorse or promote
13 *    products derived from this software without specific prior written
14 *    permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include "readlink.h"
30
31#include <string>
32
33#include <errno.h>
34#include <sys/param.h>
35#include <sys/stat.h>
36#include <unistd.h>
37
38/**
39 * This differs from realpath(3) mainly in its behavior when a path element does not exist or can
40 * not be searched. realpath(3) treats that as an error and gives up, but we have Java-compatible
41 * behavior where we just assume the path element was not a symbolic link. This leads to a textual
42 * treatment of ".." from that point in the path, which may actually lead us back to a path we
43 * can resolve (as in "/tmp/does-not-exist/../blah.txt" which would be an error for realpath(3)
44 * but "/tmp/blah.txt" under the traditional Java interpretation).
45 *
46 * This implementation also removes all the fixed-length buffers of the C original.
47 */
48bool canonicalize_path(const char* path, std::string& resolved) {
49    // 'path' must be an absolute path.
50    if (path[0] != '/') {
51        errno = EINVAL;
52        return false;
53    }
54
55    resolved = "/";
56    if (path[1] == '\0') {
57        return true;
58    }
59
60    // Iterate over path components in 'left'.
61    int symlinkCount = 0;
62    std::string left(path + 1);
63    while (!left.empty()) {
64        // Extract the next path component.
65        size_t nextSlash = left.find('/');
66        std::string nextPathComponent = left.substr(0, nextSlash);
67        if (nextSlash != std::string::npos) {
68            left.erase(0, nextSlash + 1);
69        } else {
70            left.clear();
71        }
72        if (nextPathComponent.empty()) {
73            continue;
74        } else if (nextPathComponent == ".") {
75            continue;
76        } else if (nextPathComponent == "..") {
77            // Strip the last path component except when we have single "/".
78            if (resolved.size() > 1) {
79                resolved.erase(resolved.rfind('/'));
80            }
81            continue;
82        }
83
84        // Append the next path component.
85        if (resolved[resolved.size() - 1] != '/') {
86            resolved += '/';
87        }
88        resolved += nextPathComponent;
89
90        // See if we've got a symbolic link, and resolve it if so.
91        struct stat sb;
92        if (lstat(resolved.c_str(), &sb) == 0 && S_ISLNK(sb.st_mode)) {
93            if (symlinkCount++ > MAXSYMLINKS) {
94                errno = ELOOP;
95                return false;
96            }
97
98            std::string symlink;
99            if (!readlink(resolved.c_str(), symlink)) {
100                return false;
101            }
102            if (symlink[0] == '/') {
103                // The symbolic link is absolute, so we need to start from scratch.
104                resolved = "/";
105            } else if (resolved.size() > 1) {
106                // The symbolic link is relative, so we just lose the last path component (which
107                // was the link).
108                resolved.erase(resolved.rfind('/'));
109            }
110
111            if (!left.empty()) {
112                const char* maybeSlash = (symlink[symlink.size() - 1] != '/') ? "/" : "";
113                left = symlink + maybeSlash + left;
114            } else {
115                left = symlink;
116            }
117        }
118    }
119
120    // Remove trailing slash except when the resolved pathname is a single "/".
121    if (resolved.size() > 1 && resolved[resolved.size() - 1] == '/') {
122        resolved.erase(resolved.size() - 1, 1);
123    }
124    return true;
125}
126