//
// Copyright 2016 Pixar
//
// Licensed under the terms set forth in the LICENSE.txt file available at
// https://openusd.org/license.
//
#ifndef PXR_BASE_TF_STRING_UTILS_H
#define PXR_BASE_TF_STRING_UTILS_H

/// \file tf/stringUtils.h
/// \ingroup group_tf_String
/// Definitions of basic string utilities in tf.

#include "pxr/pxr.h"

#include "pxr/base/arch/attributes.h"
#include "pxr/base/arch/hints.h"
#include "pxr/base/arch/inttypes.h"
#include "pxr/base/tf/api.h"
#include "pxr/base/tf/enum.h"

#include <cstdarg>
#include <cstring>
#include <list>
#include <set>
#include <sstream>
#include <string>
#include <type_traits>
#include <vector>

PXR_NAMESPACE_OPEN_SCOPE

class TfToken;

/// \addtogroup group_tf_String
///@{

/// Returns a string formed by a printf()-like specification.
///
/// \c TfStringPrintf() is a memory-safe way of forming a string using
/// printf()-like formatting.  For example,
/// \code
///  string formatMsg(const string& caller, int i, double val[])
///  {
///     return TfStringfPrintf("%s: val[%d] = %g\n", caller.c_str(), i, val[i]);
///  }
/// \endcode
///
/// The function is safe only to the extent that the arguments match the
/// formatting string.  In particular, be careful to pass strings themselves
/// into \c TfStringPrintf() as in the above example (i.e. \c caller.c_str()
/// as opposed to just passing \c caller).
///
/// \note \c TfStringPrintf() is just a wrapper for \c ArchStringPrintf().
TF_API
std::string TfStringPrintf(const char *fmt, ...)
#ifndef doxygen
    ARCH_PRINTF_FUNCTION(1, 2)
#endif /* doxygen */
    ;

/// Returns a string formed by a printf()-like specification.
///
/// \c TfVStringPrintf() is equivalent to \c TfStringPrintf() except that it
/// is called with a \c va_list instead of a variable number of arguments. \c
/// TfVStringPrintf() does not call the \c va_end macro. Consequently, the
/// value of \c ap is undefined after the call. A functions that calls \c
/// TfVStringPrintf() should call \c va_end(ap) itself afterwards.
///
/// \note \c TfVStringPrintf() is just a wrapper for \c ArchVStringPrintf().
TF_API
std::string TfVStringPrintf(const std::string& fmt, va_list ap);

/// Bloat-avoidance version of TfVStringPrintf()

TF_API
std::string TfVStringPrintf(const char *fmt, va_list ap)
#ifndef doxygen
    ARCH_PRINTF_FUNCTION(1, 0)
#endif /* doxygen */
    ;

/// Safely create a std::string from a (possibly NULL) char*.
///
/// If \p ptr is NULL, the empty string is safely returned.
inline std::string TfSafeString(const char* ptr) {
    return ptr ? std::string(ptr) : std::string();
}

/// Returns the given integer as a string.
inline std::string TfIntToString(int i) {
    return TfStringPrintf("%d", i);
}

/// Converts text string to double
///
/// This method converts strings to floating point numbers.  It is similar to
/// libc's atof(), but performs the conversion much more quickly.
///
/// It expects somewhat valid input: it will continue parsing the input until
/// it hits an unrecognized character, as described by the regexp below, and
/// at that point will return the results up to that point.
///
///  (-?[0-9]+(\.[0-9]*)?|-?\.[0-9]+)([eE][-+]?[0-9]+)?
///
/// It will not check to see if there is any input at all, or whitespace
/// after the digits.  Ie:
///    TfStringToDouble("") == 0.0
///    TfStringToDouble("blah") == 0.0
///    TfStringToDouble("-") == -0.0
///    TfStringToDouble("1.2foo") == 1.2
///
/// \note \c TfStringToDouble is a wrapper around the extern-c TfStringToDouble
TF_API double TfStringToDouble(const std::string& txt);

/// \overload
TF_API double TfStringToDouble(const char *text);

/// \overload
TF_API double TfStringToDouble(const char *text, int len);

/// Convert a sequence of digits in \p txt to a long int value.  Caller is
/// responsible for ensuring that \p txt has content matching:
///
/// \code
/// -?[0-9]+
/// \endcode
///
/// If the digit sequence's value is out of range, set \p *outOfRange to true
/// (if \p outOfRange is not NULL) and return either
/// std::numeric_limits<long>::min() or max(), whichever is closest to the
/// true value.
TF_API
long TfStringToLong(const std::string &txt, bool *outOfRange=NULL);

/// \overload

TF_API
long TfStringToLong(const char *txt, bool *outOfRange=NULL);

/// Convert a sequence of digits in \p txt to an unsigned long value.  Caller
/// is responsible for ensuring that \p txt has content matching:
///
/// \code
/// [0-9]+
/// \endcode
///
/// If the digit sequence's value is out of range, set \p *outOfRange to true
/// (if \p outOfRange is not NULL) and return std::numeric_limits<unsigned
/// long>::max().
TF_API
unsigned long TfStringToULong(const std::string &txt, bool *outOfRange=NULL);

/// \overload

TF_API
unsigned long TfStringToULong(const char *txt, bool *outOfRange=NULL);

/// Convert a sequence of digits in \p txt to an int64_t value.  Caller must
/// ensure that \p txt has content matching:
///
/// \code
/// -?[0-9]+
/// \endcode
///
/// If the digit sequence's value is out of range, set \p *outOfRange to true
/// (if \p outOfRange is not NULL) and return either
/// std::numeric_limits<int64_t>::min() or max(), whichever is closest to the
/// true value.
TF_API
int64_t TfStringToInt64(const std::string &txt, bool *outOfRange=NULL);

/// \overload
TF_API
int64_t TfStringToInt64(const char *txt, bool *outOfRange=NULL);

/// Convert a sequence of digits in \p txt to a uint64_t value.  Caller is
/// responsible for ensuring that \p txt has content matching:
///
/// \code
/// [0-9]+
/// \endcode
///
/// If the digit sequence's value is out of range, set \p *outOfRange to true
/// (if \p outOfRange is not NULL) and return std::numeric_limits<unsigned
/// long>::max().
TF_API
uint64_t TfStringToUInt64(const std::string &txt, bool *outOfRange=NULL);

/// \overload
TF_API
uint64_t TfStringToUInt64(const char *txt, bool *outOfRange=NULL);

inline bool
Tf_StringStartsWithImpl(char const *s, size_t slen,
                        char const *prefix, size_t prelen)
{
    return slen >= prelen && strncmp(s, prefix, prelen) == 0;
}

/// Returns true if \p s starts with \p prefix.
inline bool
TfStringStartsWith(const std::string& s, const char *prefix)
{
    return Tf_StringStartsWithImpl(
        s.c_str(), s.length(), prefix, strlen(prefix));
}

/// \overload
inline bool
TfStringStartsWith(const std::string& s, const std::string& prefix) {
    return TfStringStartsWith(s, prefix.c_str());
}

inline bool
Tf_StringEndsWithImpl(char const *s, size_t slen,
                      char const *suffix, size_t suflen)
{
    return slen >= suflen && strcmp(s + (slen - suflen), suffix) == 0;
}

/// Returns true if \p s ends with \p suffix.
inline bool TfStringEndsWith(const std::string& s, const char *suffix)
{
    return Tf_StringEndsWithImpl(s.c_str(), s.length(),
                                 suffix, strlen(suffix));
}

/// \overload
inline bool
TfStringEndsWith(const std::string& s, const std::string& suffix)
{
    return TfStringEndsWith(s, suffix.c_str());
}

/// Returns true if \p s contains \p substring.
// \ingroup group_tf_String
TF_API
bool TfStringContains(const std::string& s, const char *substring);

/// \overload
inline bool
TfStringContains(const std::string &s, const std::string &substring) {
    return TfStringContains(s, substring.c_str());
}

/// \overload
TF_API
bool TfStringContains(const std::string &s, const TfToken& substring);

/// Makes all characters in \p source lowercase, and returns the result.
TF_API
std::string TfStringToLower(const std::string& source);

/// Makes all characters in \p source uppercase, and returns the result.
TF_API
std::string TfStringToUpper(const std::string& source);

/// Returns a copy of the \p source string with only its first character
/// capitalized. This emulates the behavior of Python's \c str.capitalize().
TF_API
std::string TfStringCapitalize(const std::string& source);

/// Locale-independent case folding of [A-Z] for ASCII or UTF-8 encoded
/// \p source strings
///
/// This can be used for case insensitive matching where one of the strings
/// being compared either known to be ASCII only by specification (like a URI
/// scheme or an explicit token) or where the specification explicitly notes
/// that only [A-Z] will be matched case insensitively.
///
/// \code
/// TfStringEndsWith(TfStringToLowerAscii("ü.JPG"), ".jpg")
/// \endcode
TF_API
std::string TfStringToLowerAscii(const std::string& source);

/// Trims characters (by default, whitespace) from the left.
///
/// Characters from the beginning of \p s are removed until a character not in
/// \p trimChars is found; the result is returned.
TF_API
std::string TfStringTrimLeft(const std::string& s,
                             const char* trimChars = " \n\t\r");

/// Trims characters (by default, whitespace) from the right.
///
/// Characters at the end of \p s are removed until a character not in \p
/// trimChars is found; the result is returned.
TF_API
std::string TfStringTrimRight(const std::string& s,
                              const char* trimChars = " \n\t\r");

/// Trims characters (by default, whitespace) from the beginning and end of
/// string.
///
/// Characters at the beginning and end of \p s are removed until a character
/// not in \p trimChars is found; the result is returned.
TF_API
std::string TfStringTrim(const std::string& s,
                         const char* trimChars = " \n\t\r");

/// Returns the common prefix of the input strings, if any.
///
/// Copies of the input strings are compared.  Returns a new string which is
/// the longest prefix common to both input strings.  If the strings have no
/// common prefix, an empty string is returned.
TF_API
std::string TfStringGetCommonPrefix(std::string a, std::string b);

/// Returns the suffix of a string
///
/// Returns characters after the final character \c delimiter (default ".") of
/// a string.  Thus suffix of "abc.def" is "def" using "." as the delimiter.
/// If the delimiter does not occur, the empty string is returned.
TF_API
std::string TfStringGetSuffix(const std::string& name, char delimiter = '.');

/// Returns everything up to the suffix of a string
///
/// Returns characters before the final character \c delimiter (default ".")
/// of a string.  Thus not-suffix of "abc.def" is "abc" using "." as the
/// delimiter.  If the delimiter does not occur, the original string is
/// returned.
TF_API
std::string TfStringGetBeforeSuffix(const std::string& name, char delimiter = '.');

/// Returns the base name of a file (final component of the path).
TF_API
std::string TfGetBaseName(const std::string& fileName);

/// Returns the path component of a file (complement of TfGetBaseName()).
///
/// The returned string ends in a '/' (or possibly a '\' on Windows), unless
/// none was found in \c fileName, in which case the empty string is returned.
/// In particular, \c TfGetPathName(s)+TfGetBaseName(s) == \c s for any string
/// \c s (as long as \c s doesn't end with multiple adjacent slashes, which is
/// illegal).
TF_API
std::string TfGetPathName(const std::string& fileName);

/// Replaces all occurrences of string \p from with \p to in \p source
///
/// Returns a new string which is created by copying \p source and replacing
/// every occurrence of \p from with \p to. Correctly handles the case in which
/// \p to contains \p from.
TF_API
std::string TfStringReplace(const std::string& source, const std::string& from,
                            const std::string& to);

/// Concatenates the strings (\p begin, \p end), with default separator.
///
/// Returns the concatenation of the strings in the range \p begin to \p end,
/// with \p separator (by default, a space) added between each successive pair
/// of strings.
template <class ForwardIterator>
std::string TfStringJoin(
    ForwardIterator begin, ForwardIterator end,
    const char* separator = " ")
{
    if (begin == end)
        return std::string();

    size_t distance = std::distance(begin, end);
    if (distance == 1)
        return *begin;

    std::string retVal;

    size_t sum = 0;
    ForwardIterator i = begin;
    for (i = begin; i != end; ++i)
        sum += i->size();
    retVal.reserve(sum + strlen(separator) * (distance - 1));

    i = begin;
    retVal.append(*i);
    while (++i != end) {
        retVal.append(separator);
        retVal.append(*i);
    }

    return retVal;
}

/// Concatenates \p strings, with default separator.
///
/// Returns the concatenation of the strings in \p strings, with \p separator
/// (by default, a space) added between each successive pair of strings.
TF_API
std::string TfStringJoin(const std::vector<std::string>& strings,
                         const char* separator = " ");

/// Concatenates \p strings, with default separator.
///
/// Returns the concatenation of the strings in \p strings, with \p separator
/// (by default, a space) added between each successive pair of strings.
TF_API
std::string TfStringJoin(const std::set<std::string>& strings,
                         const char* separator = " ");

/// Breaks the given string apart, returning a vector of strings.
///
/// The string \p source is broken apart into individual words, where a word
/// is delimited by the string \p separator. This function behaves like
/// pythons string split method.
TF_API
std::vector<std::string> TfStringSplit(std::string const &src,
                                       std::string const &separator);

/// Breaks the given string apart, returning a vector of strings.
///
/// The string \p source is broken apart into individual words, where a word
/// is delimited by the characters in \p delimiters.  Delimiters default to
/// white space (space, tab, and newline).
///
/// No empty strings are returned: delimiters at the start or end are ignored,
/// consecutive delimiters are treated as though they were one, and an empty
/// input will result in an empty return vector.
TF_API
std::vector<std::string> TfStringTokenize(const std::string& source,
                                          const char* delimiters = " \t\n");

/// Breaks the given string apart, returning a set of strings.
///
/// Same as TfStringTokenize, except this one returns a set.
TF_API
std::set<std::string> TfStringTokenizeToSet(const std::string& source,
                                            const char* delimiters = " \t\n");

/// Breaks the given quoted string apart, returning a vector of strings.
///
/// The string \p source is broken apart into individual words, where a word
/// is delimited by the characters in \p delimiters.  This function is similar
/// to \c TfStringTokenize, except it considers a quoted string as a single
/// word. The function will preserve quotes that are nested within other
/// quotes or are preceded by a backslash character. \p errors, if provided,
/// contains any error messages. Delimiters default to white space (space,
/// tab, and newline).
TF_API
std::vector<std::string> 
TfQuotedStringTokenize(const std::string& source, 
                       const char* delimiters = " \t\n", 
                       std::string *errors = NULL);

/// Breaks the given string apart by matching delimiters.
///
/// The string \p source is broken apart into individual words, where a word
/// begins with \p openDelimiter and ends with a matching \p closeDelimiter.
/// Any delimiters within the matching delimiters become part of the word, and
/// anything outside matching delimiters gets dropped. For example, \c
/// TfMatchedStringTokenize("{a} string {to {be} split}", '{', '}') would
/// return a vector containing "a" and "to {be} split". If \p openDelimiter and
/// \p closeDelimiter cannot be the same. \p errors, if provided, contains any
/// error messages.
TF_API
std::vector<std::string> 
TfMatchedStringTokenize(const std::string& source, 
                        char openDelimiter, 
                        char closeDelimiter, 
                        char escapeCharacter = '\0',
                        std::string *errors = NULL);

/// This overloaded version of \c TfMatchedStringTokenize does not take an \c
/// escapeCharacter parameter but does take \param errors.  It allows \c
/// TfMatchedStringTokenize to be called with or without an \c escapeCharacter
/// and with or without \c errors.
///
/// \overload
inline
std::vector<std::string> 
TfMatchedStringTokenize(const std::string& source, 
                        char openDelimiter, 
                        char closeDelimiter, 
                        std::string *errors)
{
    return TfMatchedStringTokenize(source, openDelimiter,
                                   closeDelimiter, '\0', errors);
}

/// \class TfDictionaryLessThan
///
/// Provides dictionary ordering binary predicate function on strings.
///
/// The \c TfDictionaryLessThan class is a functor as defined by the STL
/// standard.  It compares strings using "dictionary" order: for example, the
/// following strings are in dictionary order:
/// ["abacus", "Albert", "albert", "baby", "Bert", "file01", "file001", "file2",
/// "file10"]
///
/// Note that capitalization matters only if the strings differ by
/// capitalization alone.
///
/// Characters whose ASCII value are inbetween upper- and lowercase letters,
/// such as underscore, are sorted to come after all letters.
///
/// \note This comparison is used for the runtime to give a deterministic
/// ordering to strings.
///
/// ASCII strings will sort lexicographically according to the rules below.
/// Strings with other Unicode characters will follow these same rules until a
/// multi-byte codepoint is encountered in which case it will be byte compared
/// with the bytes in the other string.  Multi-byte encoded characters will
/// operate this way for each of the bytes.
///
/// Note that this results in a non-lexicographic ordering of strings that
/// contain non-ASCII characters.  Clients interested in sorting strings
/// lexicographically should not rely on this function for doing so and should
/// instead use a custom sort function (or use one provided by an already
/// existing library such as Qt or ICU).
struct TfDictionaryLessThan {
    /// Return true if \p lhs is less than \p rhs in dictionary order.
    ///
    /// Normally this functor is used to supply an ordering functor for STL
    /// containers: for example,
    /// \code
    ///   map<string, DataType, TfDictionaryLessThan>  table;
    /// \endcode
    ///
    /// If you simply need to compare two strings, you can do so as follows:
    /// \code
    ///     bool aIsFirst = TfDictionaryLessThan()(aString, bString);
    /// \endcode
    inline bool operator()(const std::string &lhs,
                           const std::string &rhs) const {
        // Check first chars first.  By far, it is most common that these
        // characters are ASCII letters that differ.  It is very rare that we
        // have to account for different cases, or numerical comparisons, or
        // UTF-8 characters so we special-case this first.
        const unsigned char l = lhs.c_str()[0], r = rhs.c_str()[0];
        const bool bothAscii = l < 0x80 && r < 0x80;
        const bool differsIgnoringCase = (l & ~0x20) != (r & ~0x20);
        const bool inLetterZone = (l >= 0x40) && (r >= 0x40);
        if (ARCH_LIKELY(bothAscii && differsIgnoringCase && inLetterZone)) {
            // This bit about add 5 mod 32 makes it so that '_' sorts less than
            // all letters, which preserves existing behavior.
            return ((l + 5) & 31) < ((r + 5) & 31);
        }
        else {
            return _LessImpl(lhs, rhs);
        }
    }
private:
    TF_API bool _LessImpl(const std::string &lhs,
                          const std::string &rhs) const;
};

/// Convert an arbitrary type into a string
///
/// Use the type's stream output operator to convert it into a string. You are
/// free to use the stream operators in ostreamMethods.h, but are not required
/// to do so.
template <typename T>
std::string
TfStringify(const T& v)
{
    if constexpr (std::is_enum<T>::value) {
        return TfEnum::GetName(v);
    }
    else {
        std::ostringstream stream;
        stream << v;
        return stream.str();
    }
}

/// \overload
TF_API std::string TfStringify(bool v);
/// \overload
TF_API std::string TfStringify(std::string const&);
/// \overload
TF_API std::string TfStringify(float);
/// \overload
TF_API std::string TfStringify(double);

/// Writes the string representation of \c d to \c buffer of length \c len. 
/// If \c emitTrailingZero is true, the string representation will end with .0 
/// in the case where d is an integer otherwise it will be omitted.
/// The buffer length must be at least 25 in order to ensure that all doubles 
/// values can be represented.
/// Returns whether the conversion was successful.
TF_API bool TfDoubleToString(
    double d, char* buffer, int len, bool emitTrailingZero);

/// \struct TfStreamFloat
/// 
/// A type which offers streaming for floats in a canonical
/// format that can safely roundtrip with the minimal number of digits.
struct TfStreamFloat {
    explicit TfStreamFloat(float f) : value(f) {}
    float value;
};

TF_API std::ostream& operator<<(std::ostream& o, TfStreamFloat t);

/// \struct TfStreamDouble
///
/// A type which offers streaming for doubles in a canonical
/// format that can safely roundtrip with the minimal number of digits.
struct TfStreamDouble {
    explicit TfStreamDouble(double d) : value(d) {}
    double value;
};

TF_API std::ostream& operator<<(std::ostream& o, TfStreamDouble t);

/// Convert a string to an arbitrary type
///
/// Use the type's stream input operator to get it from a string. If \p status
/// is non-NULL and \p instring cannot be converted to a \c T, \p *status is
/// set to \c false; otherwise, \p *status is not modified.
template <typename T>
T
TfUnstringify(const std::string &instring, bool* status = NULL)
{
    T v = T();
    std::istringstream stream(instring);
    stream >> v;
    if (status && !stream)
        *status = false;
    return v;
}

/// \overload
template <>
TF_API 
bool TfUnstringify(const std::string &instring, bool* status);
/// \overload
template <>
TF_API 
std::string TfUnstringify(const std::string &instring, bool* status);

/// Returns a string with glob characters converted to their regular
/// expression equivalents.
///
/// Currently, this transforms strings by replacing all instances of '.' with
/// '\.', '*' with '.*', and '?' with '.', in that order.
TF_API 
std::string TfStringGlobToRegex(const std::string& s);

/// Process escape sequences in ANSI C string constants.
///
/// The following escape sequences are accepted:
///
/// \li `\\`:    backslash
/// \li `\a`:     ring the bell
/// \li `\b`:     backspace
/// \li `\f`:     form feed
/// \li `\n`:     new line
/// \li `\r`:     carriage return
/// \li `\t`:     tab
/// \li `\v`:     vertical tab
/// \li `\xdd`:   hex constant
/// \li `\ddd`:   octal constant
///
/// So, if the two-character sequence `\n` appears in the string, it is
/// replaced by an actual newline.  Each hex and octal constant translates into
/// one character in the output string.  Hex constants can be up to 2 digits,
/// octal constants can be up to 3 digits.  Both are terminated by a character
/// that is not a valid constant.  Note that it is good practice to encode hex
/// and octal constants with maximum width (2 and 3 digits, respectively) using
/// leading zeroes if necessary.  This avoids problems where characters after
/// the hex/octal constant that shouldn't be part of the constant get
/// interpreted as part of it.  For example, the sequence `\x2defaced` will
/// produce the characters "-efaced" when what was probably intended was the
/// character 0x02 (STX) followed by "defaced".
//
/// Illegal escape sequences are replaced by the character following the
/// backslash, so the two character sequence `\c` would become "c".  Processing
/// continues until the input hits a NUL character in the input string -
/// anything appearing after the NUL will be ignored.
TF_API std::string TfEscapeString(const std::string &in);
TF_API void TfEscapeStringReplaceChar(const char** in, char** out);

/// Concatenate two strings containing '/' and '..' tokens like a file path or
/// scope name.
///
/// Tokenize the input strings using a '/' delimiter. Look for '..' tokens in
/// the suffix and construct the appropriate result.
///
/// Examples:
/// 
/// \li TfStringCatPaths( "foo/bar", "jive" ) => "foo/bar/jive"
/// \li TfStringCatPaths( "foo/bar", "../jive" ) => "foo/jive"
TF_API
std::string TfStringCatPaths( const std::string &prefix, 
                              const std::string &suffix );

/// Test whether \a identifier is valid.
///
/// An identifier is valid if it follows the C/Python identifier convention;
/// that is, it must be at least one character long, must start with a letter
/// or underscore, and must contain only letters, underscores, and numerals.
inline bool
TfIsValidIdentifier(std::string const &identifier)
{
    char const *p = identifier.c_str();
    auto letter = [](unsigned c) { return ((c-'A') < 26) || ((c-'a') < 26); };
    auto number = [](unsigned c) { return (c-'0') < 10; };
    auto under = [](unsigned c) { return c == '_'; };
    unsigned x = *p;
    if (!x || number(x)) {
        return false;
    }
    while (letter(x) || number(x) || under(x)) {
        x = *p++;
    };
    return x == 0;
}

/// Produce a valid identifier (see TfIsValidIdentifier) from \p in by
/// replacing invalid characters with '_'.  If \p in is empty, return "_".
TF_API
std::string
TfMakeValidIdentifier(const std::string &in);

/// Escapes characters in \a in so that they are valid XML.
///
/// Returns the name with special characters (&, <, >, ", ') replaced with the
/// corresponding escape sequences.
TF_API
std::string TfGetXmlEscapedString(const std::string &in);

///@}

PXR_NAMESPACE_CLOSE_SCOPE

#endif // PXR_BASE_TF_STRING_UTILS_H 
