Skip to content

Commit

Permalink
Merge pull request #6614 from Marcono1234/marcono1234/char-literal-co…
Browse files Browse the repository at this point in the history
…depoint

Java: Add `CharacterLiteral.getCodePointValue()`
  • Loading branch information
aschackmull authored Nov 1, 2021
2 parents da5d10f + 301a907 commit 64acd02
Show file tree
Hide file tree
Showing 9 changed files with 79 additions and 59 deletions.
27 changes: 20 additions & 7 deletions java/ql/lib/semmle/code/java/Expr.qll
Original file line number Diff line number Diff line change
Expand Up @@ -298,18 +298,15 @@ class CompileTimeConstantExpr extends Expr {
*
* Note that this does not handle the following cases:
*
* - values of type `long`,
* - `char` literals.
* - values of type `long`.
*/
cached
int getIntValue() {
exists(IntegralType t | this.getType() = t | t.getName().toLowerCase() != "long") and
(
exists(string lit | lit = this.(Literal).getValue() |
// `char` literals may get parsed incorrectly, so disallow.
not this instanceof CharacterLiteral and
result = lit.toInt()
)
result = this.(IntegerLiteral).getIntValue()
or
result = this.(CharacterLiteral).getCodePointValue()
or
exists(CastExpr cast, int val |
cast = this and val = cast.getExpr().(CompileTimeConstantExpr).getIntValue()
Expand Down Expand Up @@ -719,6 +716,22 @@ class DoubleLiteral extends Literal, @doubleliteral {
/** A character literal. For example, `'\n'`. */
class CharacterLiteral extends Literal, @characterliteral {
override string getAPrimaryQlClass() { result = "CharacterLiteral" }

/**
* Gets a string which consists of the single character represented by
* this literal.
*
* Unicode surrogate characters (U+D800 to U+DFFF) have the replacement character
* U+FFFD as result instead.
*/
override string getValue() { result = super.getValue() }

/**
* Gets the Unicode code point value of the character represented by
* this literal. The result is the same as if the Java code had cast
* the character to an `int`.
*/
int getCodePointValue() { result.toUnicode() = this.getValue() }
}

/**
Expand Down
5 changes: 4 additions & 1 deletion java/ql/lib/semmle/code/java/Type.qll
Original file line number Diff line number Diff line change
Expand Up @@ -1123,7 +1123,10 @@ predicate erasedHaveIntersection(RefType t1, RefType t2) {
t2 = erase(_)
}

/** An integral type, which may be either a primitive or a boxed type. */
/**
* An integral type, which may be either a primitive or a boxed type.
* This includes the types `char` and `Character`.
*/
class IntegralType extends Type {
IntegralType() {
exists(string name |
Expand Down
4 changes: 2 additions & 2 deletions java/ql/test/library-tests/constants/constants/Values.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ void values(final int notConstant) {
int binary_literal = 0b101010; //42
int negative_binary_literal = -0b101010; //-42
int binary_literal_underscores = 0b1_0101_0; //42
char char_literal = '*'; //Not handled
char char_literal = '*'; //42
long long_literal = 42L; //Not handled
boolean boolean_literal = true; //true
Integer boxed_int = new Integer(42); //Not handled
Expand All @@ -30,7 +30,7 @@ void values(final int notConstant) {
byte downcast_byte_4 = (byte) 214; // -42
byte downcast_byte_5 = (byte) (-214); // 42
short downcast_short = (short) 32768; // -32768
int cast_of_non_constant = (int) '*'; //Not handled
int cast_of_non_constant = (int) '*'; //42
long cast_to_long = (long) 42; //Not handled

int unary_plus = +42; //42
Expand Down
2 changes: 2 additions & 0 deletions java/ql/test/library-tests/constants/getIntValue.expected
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
| constants/Values.java:16:30:16:37 | 0b101010 | 42 |
| constants/Values.java:17:39:17:47 | -... | -42 |
| constants/Values.java:18:42:18:51 | 0b1_0101_0 | 42 |
| constants/Values.java:19:29:19:31 | '*' | 42 |
| constants/Values.java:25:20:25:27 | (...)... | 42 |
| constants/Values.java:26:25:26:33 | (...)... | 42 |
| constants/Values.java:27:32:27:43 | (...)... | -42 |
Expand All @@ -17,6 +18,7 @@
| constants/Values.java:30:32:30:41 | (...)... | -42 |
| constants/Values.java:31:32:31:44 | (...)... | 42 |
| constants/Values.java:32:32:32:44 | (...)... | -32768 |
| constants/Values.java:33:36:33:44 | (...)... | 42 |
| constants/Values.java:36:26:36:28 | +... | 42 |
| constants/Values.java:39:27:39:29 | -... | -42 |
| constants/Values.java:43:27:43:28 | ~... | -1 |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ public class CharLiterals {
'\\',
'\'',
'\123', // octal escape sequence for 'S'
// CodeQL uses U+FFFD for unpaired surrogates, see https://github.com/github/codeql/issues/6611
'\uD800', // high surrogate
'\uDC00', // low surrogate
// Using Unicode escapes (which are handled during pre-processing)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
| CharLiterals.java:5:3:5:5 | 'a' | a |
| CharLiterals.java:6:3:6:10 | '\\u0061' | a |
| CharLiterals.java:7:3:7:10 | '\\u0000' | \u0000 |
| CharLiterals.java:8:3:8:10 | '\\uFFFF' | \uffff |
| CharLiterals.java:9:3:9:10 | '\\ufFfF' | \uffff |
| CharLiterals.java:10:3:10:6 | '\\0' | \u0000 |
| CharLiterals.java:11:3:11:6 | '\\n' | \n |
| CharLiterals.java:12:3:12:5 | '"' | " |
| CharLiterals.java:13:3:13:6 | '\\\\' | \\ |
| CharLiterals.java:14:3:14:6 | '\\'' | ' |
| CharLiterals.java:15:3:15:8 | '\\123' | S |
| CharLiterals.java:16:3:16:10 | '\\uD800' | \ufffd |
| CharLiterals.java:17:3:17:10 | '\\uDC00' | \ufffd |
| CharLiterals.java:19:3:19:16 | '\\u005C\\u005C' | \\ |
| CharLiterals.java:20:3:20:16 | '\\u005C\\u0027' | ' |
| CharLiterals.java:21:8:21:15 | 7a\\u0027 | a |
| CharLiterals.java:26:4:26:6 | 'a' | a |
| CharLiterals.java:27:4:27:6 | 'a' | a |
| CharLiterals.java:32:3:32:5 | 'a' | a |
| CharLiterals.java:32:9:32:11 | 'b' | b |
| CharLiterals.java:5:3:5:5 | 'a' | a | 97 |
| CharLiterals.java:6:3:6:10 | '\\u0061' | a | 97 |
| CharLiterals.java:7:3:7:10 | '\\u0000' | \u0000 | 0 |
| CharLiterals.java:8:3:8:10 | '\\uFFFF' | \uffff | 65535 |
| CharLiterals.java:9:3:9:10 | '\\ufFfF' | \uffff | 65535 |
| CharLiterals.java:10:3:10:6 | '\\0' | \u0000 | 0 |
| CharLiterals.java:11:3:11:6 | '\\n' | \n | 10 |
| CharLiterals.java:12:3:12:5 | '"' | " | 34 |
| CharLiterals.java:13:3:13:6 | '\\\\' | \\ | 92 |
| CharLiterals.java:14:3:14:6 | '\\'' | ' | 39 |
| CharLiterals.java:15:3:15:8 | '\\123' | S | 83 |
| CharLiterals.java:17:3:17:10 | '\\uD800' | \ufffd | 65533 |
| CharLiterals.java:18:3:18:10 | '\\uDC00' | \ufffd | 65533 |
| CharLiterals.java:20:3:20:16 | '\\u005C\\u005C' | \\ | 92 |
| CharLiterals.java:21:3:21:16 | '\\u005C\\u0027' | ' | 39 |
| CharLiterals.java:22:8:22:15 | 7a\\u0027 | a | 97 |
| CharLiterals.java:27:4:27:6 | 'a' | a | 97 |
| CharLiterals.java:28:4:28:6 | 'a' | a | 97 |
| CharLiterals.java:33:3:33:5 | 'a' | a | 97 |
| CharLiterals.java:33:9:33:11 | 'b' | b | 98 |
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import semmle.code.java.Expr

from CharacterLiteral lit
select lit, lit.getValue()
select lit, lit.getValue(), lit.getCodePointValue()
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ public class StringLiterals {
"\uD800\uDC00", // surrogate pair
"\uDBFF\uDFFF", // U+10FFFF
// Unpaired surrogates
// CodeQL uses U+FFFD for them, see https://github.com/github/codeql/issues/6611
"\uD800",
"\uDC00",
"hello\uD800hello\uDC00world", // malformed surrogates
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,32 +17,32 @@
| StringLiterals.java:23:3:23:18 | "\\uaBcDeF\\u0aB1" | \uabcdeF\u0ab1 | \uabcdeF\u0ab1 | |
| StringLiterals.java:24:3:24:16 | "\\uD800\\uDC00" | \ud800\udc00 | \ud800\udc00 | |
| StringLiterals.java:25:3:25:16 | "\\uDBFF\\uDFFF" | \udbff\udfff | \udbff\udfff | |
| StringLiterals.java:27:3:27:10 | "\\uD800" | \ufffd | \ufffd | |
| StringLiterals.java:28:3:28:10 | "\\uDC00" | \ufffd | \ufffd | |
| StringLiterals.java:29:3:29:31 | "hello\\uD800hello\\uDC00world" | hello\ufffdhello\ufffdworld | hello\ufffdhello\ufffdworld | |
| StringLiterals.java:31:3:31:16 | "\\u005C\\u0022" | " | " | |
| StringLiterals.java:32:8:32:20 | 2\\u0061\\u0022 | a | a | |
| StringLiterals.java:37:3:39:5 | """ \t \n\t\ttest "text" and escaped \\u0022\n\t\t""" | test "text" and escaped "\n | test "text" and escaped "\n | text-block |
| StringLiterals.java:41:3:43:5 | """\n\t\t\tindented\n\t\t""" | \tindented\n | \tindented\n | text-block |
| StringLiterals.java:44:3:46:5 | """\n\tno indentation last line\n\t\t""" | no indentation last line\n | no indentation last line\n | text-block |
| StringLiterals.java:47:3:49:7 | """\n\tindentation last line\n\t\t\\s""" | indentation last line\n\t | indentation last line\n\t | text-block |
| StringLiterals.java:50:3:52:6 | """\n\t\t\tnot-indented\n\t\t\t""" | not-indented\n | not-indented\n | text-block |
| StringLiterals.java:53:3:55:4 | """\n\t\tindented\n\t""" | \tindented\n | \tindented\n | text-block |
| StringLiterals.java:56:4:58:5 | """\n\t\tnot-indented\n\t\t""" | not-indented\n | not-indented\n | text-block |
| StringLiterals.java:59:3:62:6 | """\n\t\t spaces (only single space is trimmed)\n\t\t\ttab\n\t\t\t""" | spaces (only single space is trimmed)\ntab\n | spaces (only single space is trimmed)\ntab\n | text-block |
| StringLiterals.java:63:3:64:22 | """\n\t\t\tend on same line""" | end on same line | end on same line | text-block |
| StringLiterals.java:65:3:68:5 | """\n\t\ttrailing spaces ignored: \t \n\t\tnot ignored: \t \\s\n\t\t""" | trailing spaces ignored:\nnot ignored: \t \n | trailing spaces ignored:\nnot ignored: \t \n | text-block |
| StringLiterals.java:69:3:70:18 | """\n\t\t3 quotes:""\\"""" | 3 quotes:""" | 3 quotes:""" | text-block |
| StringLiterals.java:71:3:74:5 | """\n\t\tline \\\n\t\tcontinuation \\\n\t\t""" | line continuation | line continuation | text-block |
| StringLiterals.java:75:3:79:5 | """\n\t\tExplicit line breaks:\\n\n\t\t\\r\\n\n\t\t\\r\n\t\t""" | Explicit line breaks:\n\n\r\n\n\r\n | Explicit line breaks:\n\n\r\n\n\r\n | text-block |
| StringLiterals.java:82:10:84:16 | 2"\\u0022\n\t\ttest\n\t\t\\u0022\\uu0022" | test\n | test\n | |
| StringLiterals.java:90:3:90:19 | "hello" + "world" | helloworld | helloworld | |
| StringLiterals.java:91:3:92:20 | """\n\t\thello""" + "world" | helloworld | helloworld | text-block |
| StringLiterals.java:93:10:93:12 | "a" | a | a | |
| StringLiterals.java:94:3:94:5 | "a" | a | a | |
| StringLiterals.java:28:3:28:10 | "\\uD800" | \ufffd | \ufffd | |
| StringLiterals.java:29:3:29:10 | "\\uDC00" | \ufffd | \ufffd | |
| StringLiterals.java:30:3:30:31 | "hello\\uD800hello\\uDC00world" | hello\ufffdhello\ufffdworld | hello\ufffdhello\ufffdworld | |
| StringLiterals.java:32:3:32:16 | "\\u005C\\u0022" | " | " | |
| StringLiterals.java:33:8:33:20 | 2\\u0061\\u0022 | a | a | |
| StringLiterals.java:38:3:40:5 | """ \t \n\t\ttest "text" and escaped \\u0022\n\t\t""" | test "text" and escaped "\n | test "text" and escaped "\n | text-block |
| StringLiterals.java:42:3:44:5 | """\n\t\t\tindented\n\t\t""" | \tindented\n | \tindented\n | text-block |
| StringLiterals.java:45:3:47:5 | """\n\tno indentation last line\n\t\t""" | no indentation last line\n | no indentation last line\n | text-block |
| StringLiterals.java:48:3:50:7 | """\n\tindentation last line\n\t\t\\s""" | indentation last line\n\t | indentation last line\n\t | text-block |
| StringLiterals.java:51:3:53:6 | """\n\t\t\tnot-indented\n\t\t\t""" | not-indented\n | not-indented\n | text-block |
| StringLiterals.java:54:3:56:4 | """\n\t\tindented\n\t""" | \tindented\n | \tindented\n | text-block |
| StringLiterals.java:57:4:59:5 | """\n\t\tnot-indented\n\t\t""" | not-indented\n | not-indented\n | text-block |
| StringLiterals.java:60:3:63:6 | """\n\t\t spaces (only single space is trimmed)\n\t\t\ttab\n\t\t\t""" | spaces (only single space is trimmed)\ntab\n | spaces (only single space is trimmed)\ntab\n | text-block |
| StringLiterals.java:64:3:65:22 | """\n\t\t\tend on same line""" | end on same line | end on same line | text-block |
| StringLiterals.java:66:3:69:5 | """\n\t\ttrailing spaces ignored: \t \n\t\tnot ignored: \t \\s\n\t\t""" | trailing spaces ignored:\nnot ignored: \t \n | trailing spaces ignored:\nnot ignored: \t \n | text-block |
| StringLiterals.java:70:3:71:18 | """\n\t\t3 quotes:""\\"""" | 3 quotes:""" | 3 quotes:""" | text-block |
| StringLiterals.java:72:3:75:5 | """\n\t\tline \\\n\t\tcontinuation \\\n\t\t""" | line continuation | line continuation | text-block |
| StringLiterals.java:76:3:80:5 | """\n\t\tExplicit line breaks:\\n\n\t\t\\r\\n\n\t\t\\r\n\t\t""" | Explicit line breaks:\n\n\r\n\n\r\n | Explicit line breaks:\n\n\r\n\n\r\n | text-block |
| StringLiterals.java:83:10:85:16 | 2"\\u0022\n\t\ttest\n\t\t\\u0022\\uu0022" | test\n | test\n | |
| StringLiterals.java:91:3:91:19 | "hello" + "world" | helloworld | helloworld | |
| StringLiterals.java:92:3:93:20 | """\n\t\thello""" + "world" | helloworld | helloworld | text-block |
| StringLiterals.java:94:10:94:12 | "a" | a | a | |
| StringLiterals.java:95:3:95:5 | "a" | a | a | |
| StringLiterals.java:96:7:96:9 | "a" | a | a | |
| StringLiterals.java:97:3:97:5 | "a" | a | a | |
| StringLiterals.java:98:10:98:12 | "a" | a | a | |
| StringLiterals.java:99:3:99:5 | "a" | a | a | |
| StringLiterals.java:100:9:100:11 | "a" | a | a | |
| StringLiterals.java:96:3:96:5 | "a" | a | a | |
| StringLiterals.java:97:7:97:9 | "a" | a | a | |
| StringLiterals.java:98:3:98:5 | "a" | a | a | |
| StringLiterals.java:99:10:99:12 | "a" | a | a | |
| StringLiterals.java:100:3:100:5 | "a" | a | a | |
| StringLiterals.java:101:9:101:11 | "a" | a | a | |

0 comments on commit 64acd02

Please sign in to comment.