public final class Token
extends java.lang.Object
This class provides convenience operations for handling 'Tokens'. Tokens are UTF-8 encoded strings, stored in a byte array.
Note that, to guarantee a consistent string representation, all string conversions should be done via the methods of this class.
| Modifier and Type | Field and Description |
|---|---|
static byte[] |
COLON
Colon.
|
static java.util.Comparator<byte[]> |
COMP
Comparator for byte arrays.
|
static byte[] |
EMPTY
Empty token.
|
static byte[] |
FALSE
Token 'false'.
|
static byte[] |
HEX
Hex codes.
|
static byte[] |
INF
Token 'INF'.
|
static java.util.Comparator<byte[]> |
LC_COMP
Case-insensitive comparator for byte arrays.
|
static byte[] |
NINF
Token '-INF'.
|
static byte[] |
NULL
Token 'null'.
|
static byte[] |
ONE
Digit '1'.
|
static byte[] |
SLASH
Slash.
|
static byte[] |
SPACE
Space.
|
static byte[] |
TRUE
Token 'true'.
|
static java.lang.String |
UTF16
UTF16 encoding string.
|
static java.lang.String |
UTF162
UTF16 encoding string.
|
static java.lang.String |
UTF16BE
UTF16BE (=UTF16) encoding string.
|
static java.lang.String |
UTF16LE
UTF16 encoding string.
|
static java.lang.String |
UTF32
UTF16 encoding string.
|
static java.lang.String |
UTF322
UTF16 encoding string.
|
static java.lang.String |
UTF8
UTF8 encoding string.
|
static java.lang.String |
UTF82
UTF8 encoding string (variant).
|
static byte[] |
XML
XML token.
|
static byte[] |
XMLC
XML token with colon.
|
static byte[] |
XMLNS
XMLNS token.
|
static byte[] |
XMLNSC
XMLNS token with colon.
|
static byte[] |
ZERO
Digit '0'.
|
| Modifier and Type | Method and Description |
|---|---|
static boolean |
ascii(byte[] token)
Checks if the specified token only consists of ASCII characters.
|
static byte[] |
chop(byte[] token,
int max)
Chops a token to the specified length and adds dots.
|
static byte[] |
chopNumber(byte[] token)
Finishes the numeric token, removing trailing zeroes.
|
static int |
cl(byte cp)
Returns the length of the specified UTF8 byte.
|
static int |
cl(byte[] token,
int pos)
Returns the length of a UTF8 character at the specified position.
|
static byte[] |
concat(byte[] token1,
byte[] token2)
Concatenates two tokens.
|
static byte[] |
concat(byte[] token1,
byte[] token2,
byte[] token3)
Concatenates three tokens.
|
static boolean |
contains(byte[] token,
byte[] sub)
Checks if the first token contains the second token.
|
static boolean |
contains(byte[] token,
int c)
Checks if the first token contains the specified character.
|
static int |
cp(byte[] token,
int pos)
Returns the codepoint (unicode value) of the specified token, starting at
the specified position.
|
static int[] |
cps(byte[] token)
Converts a token to a sequence of codepoints.
|
static byte[] |
delete(byte[] token,
int ch)
Deletes the specified character from the token.
|
static int |
diff(byte[] token,
byte[] compare)
Calculates the difference of two tokens.
|
static int |
diff(byte char1,
byte char2)
Calculates the difference of two characters.
|
static boolean |
digit(int ch)
Checks if the specified character is a digit (0 - 9).
|
static boolean |
endsWith(byte[] token,
byte[] sub)
Checks if the first token ends with the second token.
|
static boolean |
endsWith(byte[] token,
int ch)
Checks if the first token starts with the specified character.
|
static boolean |
eq(byte[] token,
byte[]... tokens)
Compares several tokens for equality.
|
static boolean |
eq(byte[] token1,
byte[] token2)
Compares two tokens for equality.
|
static boolean |
eq(java.lang.String str,
java.lang.String... strings)
Compares several strings for equality.
|
static boolean |
eqic(java.lang.String str,
java.lang.String... strings)
Compares several strings for equality, ignoring the case.
|
static byte[] |
escape(byte[] token)
Escapes the specified token.
|
static boolean |
ftChar(int ch)
Returns true if the specified character is a full-text letter or digit.
|
static int |
hash(byte[] token)
Calculates a hash code for the specified token.
|
static byte[] |
hex(byte[] val,
boolean uc)
Returns a hex representation of the specified byte array.
|
static int |
indexOf(byte[] token,
byte[] sub)
Returns the position of the specified token or -1.
|
static int |
indexOf(byte[] token,
byte[] sub,
int pos)
Returns the position of the specified token or -1.
|
static int |
indexOf(byte[] token,
int c)
Returns the position of the specified character or -1.
|
static int |
lastIndexOf(byte[] token,
int c)
Returns the last position of the specified character or -1.
|
static byte[] |
lc(byte[] token)
Converts the specified token to lower case.
|
static int |
lc(int ch)
Converts a character to lower case.
|
static int |
len(byte[] token)
Returns the token length.
|
static boolean |
letter(int ch)
Checks if the specified character is a computer letter (A - Z, a - z, _).
|
static boolean |
letterOrDigit(int ch)
Checks if the specified character is a computer letter or digit.
|
static byte[] |
local(byte[] name)
Returns the local name of the specified name.
|
static java.lang.String |
md5(java.lang.String string)
Returns an MD5 hash.
|
static byte[] |
norm(byte[] token)
Normalizes all whitespace occurrences from the specified token.
|
static int |
norm(int ch)
Returns a normalized character without diacritics.
|
static java.lang.String |
normEncoding(java.lang.String encoding,
java.lang.String old)
Returns a unified representation of the specified encoding.
|
static int |
numDigits(int integer)
Checks number of digits of the specified integer.
|
static byte[] |
prefix(byte[] name)
Returns the prefix of the specified token.
|
static byte[] |
replace(byte[] token,
int search,
int replace)
Replaces the specified character and returns the result token.
|
static byte[][] |
split(byte[] token,
int sep)
Splits the token at all whitespaces and returns an array with all tokens.
|
static boolean |
startsWith(byte[] token,
byte[] sub)
Checks if the first token starts with the second token.
|
static boolean |
startsWith(byte[] token,
int ch)
Checks if the first token starts with the specified character.
|
static java.lang.String |
string(byte[] token)
Returns the specified token as string.
|
static java.lang.String |
string(byte[] token,
int start,
int length)
Returns the specified token as string.
|
static byte[] |
substring(byte[] token,
int start)
Returns a substring of the specified token.
|
static byte[] |
substring(byte[] token,
int start,
int end)
Returns a substring of the specified token.
|
static byte[] |
subtoken(byte[] token,
int start)
Returns a partial token.
|
static byte[] |
subtoken(byte[] token,
int start,
int end)
Returns a partial token.
|
static boolean |
supported(java.lang.String encoding)
Checks if the specified encoding is supported.
|
static double |
toDouble(byte[] token)
Converts the specified token into a double value.
|
static int |
toInt(byte[] token)
Converts the specified token into an integer value.
|
static int |
toInt(byte[] token,
int start,
int end)
Converts the specified token into an integer value.
|
static int |
toInt(java.lang.String string)
Converts the specified string into an integer value.
|
static byte[] |
token(boolean bool)
Creates a byte array representation of the specified boolean value.
|
static byte[] |
token(double dbl)
Creates a byte array representation from the specified double value;
inspired by Xavier Franc's Qizx.
|
static byte[] |
token(float flt)
Creates a byte array representation from the specified float value.
|
static byte[] |
token(int integer)
Creates a byte array representation of the specified integer value.
|
static byte[] |
token(long integer)
Creates a byte array representation from the specified long value,
using Java's standard method.
|
static byte[] |
token(java.lang.String string)
Converts a string to a byte array.
|
static byte[][] |
tokens(java.lang.String... strings)
Converts the specified strings to tokens.
|
static long |
toLong(byte[] token)
Converts the specified token into an long value.
|
static long |
toLong(byte[] token,
int start,
int end)
Converts the specified token into an long value.
|
static long |
toLong(java.lang.String string)
Converts the specified string into an long value.
|
static int |
toSimpleInt(byte[] token)
Converts the specified token into a positive integer value.
|
static byte[] |
trim(byte[] token)
Removes leading and trailing whitespaces from the specified token.
|
static byte[] |
uc(byte[] token)
Converts the specified token to upper case.
|
static int |
uc(int ch)
Converts a character to upper case.
|
static byte[] |
uri(byte[] token,
boolean iri)
Returns a URI encoded token.
|
static byte[] |
utf8(byte[] token,
java.lang.String encoding)
Converts a token from the input encoding to UTF8.
|
static boolean |
ws(byte[] token)
Checks if the specified token has only whitespaces.
|
static boolean |
ws(int ch)
Checks if the specified character is a whitespace.
|
public static final byte[] EMPTY
public static final byte[] XML
public static final byte[] XMLC
public static final byte[] XMLNS
public static final byte[] XMLNSC
public static final byte[] TRUE
public static final byte[] FALSE
public static final byte[] NULL
public static final byte[] INF
public static final byte[] NINF
public static final byte[] SPACE
public static final byte[] ZERO
public static final byte[] ONE
public static final byte[] SLASH
public static final byte[] COLON
public static final byte[] HEX
public static final java.lang.String UTF8
public static final java.lang.String UTF82
public static final java.lang.String UTF16
public static final java.lang.String UTF162
public static final java.lang.String UTF16BE
public static final java.lang.String UTF16LE
public static final java.lang.String UTF32
public static final java.lang.String UTF322
public static final java.util.Comparator<byte[]> COMP
public static final java.util.Comparator<byte[]> LC_COMP
public static java.lang.String string(byte[] token)
token - tokenpublic static java.lang.String string(byte[] token,
int start,
int length)
token - tokenstart - start positionlength - lengthpublic static boolean ascii(byte[] token)
token - tokenpublic static byte[] token(java.lang.String string)
string - string to be convertedpublic static byte[][] tokens(java.lang.String... strings)
strings - stringspublic static byte[] utf8(byte[] token,
java.lang.String encoding)
token - token to be convertedencoding - input encodingpublic static java.lang.String normEncoding(java.lang.String encoding,
java.lang.String old)
encoding - input encoding (UTF-8 is returned for a null reference)old - previous encoding (optional)public static boolean supported(java.lang.String encoding)
encoding - encodingpublic static int cp(byte[] token,
int pos)
token - tokenpos - character positionpublic static int cl(byte cp)
cp - codepointpublic static int cl(byte[] token,
int pos)
token - tokenpos - positionpublic static int[] cps(byte[] token)
token - tokenpublic static int len(byte[] token)
token - tokenpublic static byte[] token(boolean bool)
bool - boolean value to be convertedpublic static byte[] token(int integer)
integer - int value to be convertedpublic static int numDigits(int integer)
integer - number to be checkedpublic static byte[] token(long integer)
integer - int value to be convertedpublic static byte[] token(double dbl)
dbl - double value to be convertedpublic static byte[] token(float flt)
flt - float value to be convertedpublic static byte[] chopNumber(byte[] token)
token - token to be modifiedpublic static double toDouble(byte[] token)
Double.NaN is returned if the input is invalid.token - token to be convertedpublic static long toLong(java.lang.String string)
Long.MIN_VALUE is returned when the input is invalid.string - string to be convertedpublic static long toLong(byte[] token)
Long.MIN_VALUE is returned when the input is invalid.token - token to be convertedpublic static long toLong(byte[] token,
int start,
int end)
Long.MIN_VALUE is returned when the input is invalid.token - token to be convertedstart - first byte to be parsedend - last byte to be parsed - exclusivepublic static int toInt(java.lang.String string)
Integer.MIN_VALUE is returned when the input is invalid.string - string to be convertedpublic static int toInt(byte[] token)
Integer.MIN_VALUE is returned when the input is invalid.token - token to be convertedpublic static int toInt(byte[] token,
int start,
int end)
Integer.MIN_VALUE is returned when the input is invalid.token - token to be convertedstart - first byte to be parsedend - last byte to be parsed (exclusive)public static int toSimpleInt(byte[] token)
Integer.MIN_VALUE is returned if non-digits are found
or if the input is longer than nine characters.token - token to be convertedpublic static int hash(byte[] token)
token - specified tokenpublic static boolean eq(byte[] token1,
byte[] token2)
token1 - first tokentoken2 - token to be comparedpublic static boolean eq(byte[] token,
byte[]... tokens)
token - tokentokens - tokens to be comparedpublic static boolean eq(java.lang.String str,
java.lang.String... strings)
str - first stringstrings - strings to be comparedpublic static boolean eqic(java.lang.String str,
java.lang.String... strings)
str - first stringstrings - strings to be comparedpublic static int diff(byte[] token,
byte[] compare)
token - first tokencompare - token to be comparedpublic static int diff(byte char1,
byte char2)
char1 - first characterchar2 - character to be comparedpublic static boolean contains(byte[] token,
byte[] sub)
token - tokensub - token to be foundpublic static boolean contains(byte[] token,
int c)
token - tokenc - character to be foundpublic static int indexOf(byte[] token,
int c)
token - tokenc - character to be foundpublic static int lastIndexOf(byte[] token,
int c)
token - tokenc - character to be foundpublic static int indexOf(byte[] token,
byte[] sub)
token - tokensub - token to be foundpublic static int indexOf(byte[] token,
byte[] sub,
int pos)
token - tokensub - token to be foundpos - start positionpublic static boolean startsWith(byte[] token,
int ch)
token - tokench - character to be foundpublic static boolean startsWith(byte[] token,
byte[] sub)
token - tokensub - token to be foundpublic static boolean endsWith(byte[] token,
int ch)
token - tokench - character to be boundpublic static boolean endsWith(byte[] token,
byte[] sub)
token - tokensub - token to be foundpublic static byte[] substring(byte[] token,
int start)
subtoken(byte[], int) instead.token - input tokenstart - start positionpublic static byte[] substring(byte[] token,
int start,
int end)
subtoken(byte[], int) instead.token - input tokenstart - start positionend - end positionpublic static byte[] subtoken(byte[] token,
int start)
token - input tokenstart - start positionpublic static byte[] subtoken(byte[] token,
int start,
int end)
token - input textstart - start positionend - end positionpublic static byte[][] split(byte[] token,
int sep)
token - token to be splitsep - separation characterpublic static boolean ws(byte[] token)
token - tokenpublic static byte[] replace(byte[] token,
int search,
int replace)
token - token to be checkedsearch - the character to be replacedreplace - the new characterpublic static byte[] trim(byte[] token)
token - token to be trimmedpublic static byte[] chop(byte[] token,
int max)
token - token to be choppedmax - maximum lengthpublic static byte[] concat(byte[] token1,
byte[] token2)
token1 - first tokentoken2 - second tokenpublic static byte[] concat(byte[] token1,
byte[] token2,
byte[] token3)
TokenBuilder instance can be used to
concatenate more than three tokens.token1 - first tokentoken2 - second tokentoken3 - third tokenpublic static byte[] delete(byte[] token,
int ch)
token - tokench - character to be removedpublic static byte[] norm(byte[] token)
token - tokenpublic static boolean ws(int ch)
ch - the letter to be checkedpublic static boolean letter(int ch)
ch - the letter to be checkedpublic static boolean digit(int ch)
ch - the letter to be checkedpublic static boolean letterOrDigit(int ch)
ch - the letter to be checkedpublic static boolean ftChar(int ch)
ch - character to be testedpublic static byte[] uc(byte[] token)
token - token to be convertedpublic static int uc(int ch)
ch - character to be convertedpublic static byte[] lc(byte[] token)
token - token to be convertedpublic static int lc(int ch)
ch - character to be convertedpublic static byte[] prefix(byte[] name)
name - namepublic static byte[] local(byte[] name)
name - namepublic static byte[] uri(byte[] token,
boolean iri)
token - tokeniri - inputpublic static byte[] escape(byte[] token)
token - tokenpublic static java.lang.String md5(java.lang.String string)
string - string to be hashedpublic static byte[] hex(byte[] val,
boolean uc)
val - values to be mappeduc - upper casepublic static int norm(int ch)
ch - character to be normalized