mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-04-01 12:43:47 +00:00
[analyzer] TaintPropagation checker strlen() should not propagate (#66086)
strlen(..) call should not propagate taintedness, because it brings in many false positive findings. It is a common pattern to copy user provided input to another buffer. In these cases we always get warnings about tainted data used as the malloc parameter: buf = malloc(strlen(tainted_txt) + 1); // false warning This pattern can lead to a denial of service attack only, when the attacker can directly specify the size of the allocated area as an arbitrary large number (e.g. the value is converted from a user provided string). Later, we could reintroduce strlen() as a taint propagating function with the consideration not to emit warnings when the tainted value cannot be "arbitrarily large" (such as the size of an already allocated memory block). The change has been evaluated on the following open source projects: - memcached: [1 lost false positive](https://codechecker-demo.eastus.cloudapp.azure.com/Default/reports?run=memcached_1.6.8_ednikru_taint_nostrlen_baseline&newcheck=memcached_1.6.8_ednikru_taint_nostrlen_new&is-unique=on&diff-type=Resolved) - tmux: 0 lost reports - twin [3 lost false positives](https://codechecker-demo.eastus.cloudapp.azure.com/Default/reports?run=twin_v0.8.1_ednikru_taint_nostrlen_baseline&newcheck=twin_v0.8.1_ednikru_taint_nostrlen_new&is-unique=on&diff-type=Resolved) - vim [1 lost false positive](https://codechecker-demo.eastus.cloudapp.azure.com/Default/reports?run=vim_v8.2.1920_ednikru_taint_nostrlen_baseline&newcheck=vim_v8.2.1920_ednikru_taint_nostrlen_new&is-unique=on&diff-type=Resolved) - openssl 0 lost reports - sqliste [2 lost false positives](https://codechecker-demo.eastus.cloudapp.azure.com/Default/reports?run=sqlite_version-3.33.0_ednikru_taint_nostrlen_baseline&newcheck=sqlite_version-3.33.0_ednikru_taint_nostrlen_new&is-unique=on&diff-type=Resolved) - ffmpeg 0 lost repots - postgresql [3 lost false positives](https://codechecker-demo.eastus.cloudapp.azure.com/Default/reports?run=postgres_REL_13_0_ednikru_taint_nostrlen_baseline&newcheck=postgres_REL_13_0_ednikru_taint_nostrlen_new&is-unique=on&diff-type=Resolved) - tinyxml 0 lost reports - libwebm 0 lost reports - xerces 0 lost reports In all cases the lost reports are originating from copying untrusted environment variables into another buffer. There are 2 types of lost false positive reports: 1) [Where the warning is emitted at the malloc call by the TaintPropagation Checker ](https://codechecker-demo.eastus.cloudapp.azure.com/Default/report-detail?run=memcached_1.6.8_ednikru_taint_nostrlen_baseline&newcheck=memcached_1.6.8_ednikru_taint_nostrlen_new&is-unique=on&diff-type=Resolved&report-id=2648506&report-hash=2079221954026f17e1ecb614f5f054db&report-filepath=%2amemcached.c) ` len = strlen(portnumber_filename)+4+1; temp_portnumber_filename = malloc(len); ` 2) When pointers are set based on the length of the tainted string by the ArrayOutofBoundsv2 checker. For example [this ](https://codechecker-demo.eastus.cloudapp.azure.com/Default/report-detail?run=vim_v8.2.1920_ednikru_taint_nostrlen_baseline&newcheck=vim_v8.2.1920_ednikru_taint_nostrlen_new&is-unique=on&diff-type=Resolved&report-id=2649310&report-hash=79dc8522d2cd34ca8e1b2dc2db64b2df&report-filepath=%2aos_unix.c)case.
This commit is contained in:
parent
c809051215
commit
97495d3159
@ -450,6 +450,13 @@ Static Analyzer
|
||||
bitwise shift operators produce undefined behavior (because some operand is
|
||||
negative or too large).
|
||||
|
||||
- The ``alpha.security.taint.TaintPropagation`` checker no longer propagates
|
||||
taint on ``strlen`` and ``strnlen`` calls, unless these are marked
|
||||
explicitly propagators in the user-provided taint configuration file.
|
||||
This removal empirically reduces the number of false positive reports.
|
||||
Read the PR for the details.
|
||||
(`#66086 <https://github.com/llvm/llvm-project/pull/66086>`_)
|
||||
|
||||
.. _release-notes-sanitizers:
|
||||
|
||||
Sanitizers
|
||||
|
@ -2599,8 +2599,8 @@ Default propagations rules:
|
||||
``memcpy``, ``memmem``, ``memmove``, ``mbtowc``, ``pread``, ``qsort``,
|
||||
``qsort_r``, ``rawmemchr``, ``read``, ``recv``, ``recvfrom``, ``rindex``,
|
||||
``strcasestr``, ``strchr``, ``strchrnul``, ``strcasecmp``, ``strcmp``,
|
||||
``strcspn``, ``strlen``, ``strncasecmp``, ``strncmp``, ``strndup``,
|
||||
``strndupa``, ``strnlen``, ``strpbrk``, ``strrchr``, ``strsep``, ``strspn``,
|
||||
``strcspn``, ``strncasecmp``, ``strncmp``, ``strndup``,
|
||||
``strndupa``, ``strpbrk``, ``strrchr``, ``strsep``, ``strspn``,
|
||||
``strstr``, ``strtol``, ``strtoll``, ``strtoul``, ``strtoull``, ``tolower``,
|
||||
``toupper``, ``ttyname``, ``ttyname_r``, ``wctomb``, ``wcwidth``
|
||||
|
||||
|
@ -695,9 +695,10 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const {
|
||||
{{{"strpbrk"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
|
||||
{{{"strndup"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
|
||||
{{{"strndupa"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
|
||||
{{{"strlen"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
|
||||
{{{"wcslen"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
|
||||
{{{"strnlen"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
|
||||
|
||||
// strlen, wcslen, strnlen and alike intentionally don't propagate taint.
|
||||
// See the details here: https://github.com/llvm/llvm-project/pull/66086
|
||||
|
||||
{{{"strtol"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
|
||||
{{{"strtoll"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
|
||||
{{{"strtoul"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
|
||||
|
@ -10,6 +10,7 @@ int scanf(const char *restrict format, ...);
|
||||
int system(const char *command);
|
||||
char* getenv( const char* env_var );
|
||||
size_t strlen( const char* str );
|
||||
int atoi( const char* str );
|
||||
void *malloc(size_t size );
|
||||
void free( void *ptr );
|
||||
char *fgets(char *str, int n, FILE *stream);
|
||||
@ -54,11 +55,11 @@ void taintDiagnosticVLA(void) {
|
||||
// propagating through variables and expressions
|
||||
char *taintDiagnosticPropagation(){
|
||||
char *pathbuf;
|
||||
char *pathlist=getenv("PATH"); // expected-note {{Taint originated here}}
|
||||
char *size=getenv("SIZE"); // expected-note {{Taint originated here}}
|
||||
// expected-note@-1 {{Taint propagated to the return value}}
|
||||
if (pathlist){ // expected-note {{Assuming 'pathlist' is non-null}}
|
||||
if (size){ // expected-note {{Assuming 'size' is non-null}}
|
||||
// expected-note@-1 {{Taking true branch}}
|
||||
pathbuf=(char*) malloc(strlen(pathlist)+1); // expected-warning{{Untrusted data is used to specify the buffer size}}
|
||||
pathbuf=(char*) malloc(atoi(size)); // expected-warning{{Untrusted data is used to specify the buffer size}}
|
||||
// expected-note@-1{{Untrusted data is used to specify the buffer size}}
|
||||
// expected-note@-2 {{Taint propagated to the return value}}
|
||||
return pathbuf;
|
||||
@ -71,12 +72,12 @@ char *taintDiagnosticPropagation(){
|
||||
char *taintDiagnosticPropagation2(){
|
||||
char *pathbuf;
|
||||
char *user_env2=getenv("USER_ENV_VAR2");//unrelated taint source
|
||||
char *pathlist=getenv("PATH"); // expected-note {{Taint originated here}}
|
||||
char *size=getenv("SIZE"); // expected-note {{Taint originated here}}
|
||||
// expected-note@-1 {{Taint propagated to the return value}}
|
||||
char *user_env=getenv("USER_ENV_VAR");//unrelated taint source
|
||||
if (pathlist){ // expected-note {{Assuming 'pathlist' is non-null}}
|
||||
if (size){ // expected-note {{Assuming 'size' is non-null}}
|
||||
// expected-note@-1 {{Taking true branch}}
|
||||
pathbuf=(char*) malloc(strlen(pathlist)+1); // expected-warning{{Untrusted data is used to specify the buffer size}}
|
||||
pathbuf=(char*) malloc(atoi(size)+1); // expected-warning{{Untrusted data is used to specify the buffer size}}
|
||||
// expected-note@-1{{Untrusted data is used to specify the buffer size}}
|
||||
// expected-note@-2 {{Taint propagated to the return value}}
|
||||
return pathbuf;
|
||||
|
@ -443,14 +443,18 @@ int testSprintf_propagates_taint(char *buf, char *msg) {
|
||||
return 1 / x; // expected-warning {{Division by a tainted value, possibly zero}}
|
||||
}
|
||||
|
||||
void test_wchar_apis_propagate(const char *path) {
|
||||
void test_wchar_apis_dont_propagate(const char *path) {
|
||||
// strlen, wcslen, strnlen and alike intentionally don't propagate taint.
|
||||
// See the details here: https://github.com/llvm/llvm-project/pull/66086
|
||||
// This isn't ideal, but this is only what we have now.
|
||||
|
||||
FILE *f = fopen(path, "r");
|
||||
clang_analyzer_isTainted_charp((char*)f); // expected-warning {{YES}}
|
||||
wchar_t wbuf[10];
|
||||
fgetws(wbuf, sizeof(wbuf)/sizeof(*wbuf), f);
|
||||
clang_analyzer_isTainted_wchar(*wbuf); // expected-warning {{YES}}
|
||||
int n = wcslen(wbuf);
|
||||
clang_analyzer_isTainted_int(n); // expected-warning {{YES}}
|
||||
clang_analyzer_isTainted_int(n); // expected-warning {{NO}}
|
||||
|
||||
wchar_t dst[100] = L"ABC";
|
||||
clang_analyzer_isTainted_wchar(*dst); // expected-warning {{NO}}
|
||||
@ -458,7 +462,7 @@ void test_wchar_apis_propagate(const char *path) {
|
||||
clang_analyzer_isTainted_wchar(*dst); // expected-warning {{YES}}
|
||||
|
||||
int m = wcslen(dst);
|
||||
clang_analyzer_isTainted_int(m); // expected-warning {{YES}}
|
||||
clang_analyzer_isTainted_int(m); // expected-warning {{NO}}
|
||||
}
|
||||
|
||||
int scanf_s(const char *format, ...);
|
||||
@ -948,21 +952,27 @@ void testStrndupa(size_t n) {
|
||||
}
|
||||
|
||||
size_t strlen(const char *s);
|
||||
void testStrlen() {
|
||||
void testStrlen_dont_propagate() {
|
||||
// strlen, wcslen, strnlen and alike intentionally don't propagate taint.
|
||||
// See the details here: https://github.com/llvm/llvm-project/pull/66086
|
||||
// This isn't ideal, but this is only what we have now.
|
||||
char s[10];
|
||||
scanf("%9s", s);
|
||||
|
||||
size_t result = strlen(s);
|
||||
clang_analyzer_isTainted_int(result); // expected-warning {{YES}}
|
||||
// strlen propagating taint would bring in many false positives
|
||||
clang_analyzer_isTainted_int(result); // expected-warning {{NO}}
|
||||
}
|
||||
|
||||
size_t strnlen(const char *s, size_t maxlen);
|
||||
void testStrnlen(size_t maxlen) {
|
||||
void testStrnlen_dont_propagate(size_t maxlen) {
|
||||
// strlen, wcslen, strnlen and alike intentionally don't propagate taint.
|
||||
// See the details here: https://github.com/llvm/llvm-project/pull/66086
|
||||
// This isn't ideal, but this is only what we have now.
|
||||
char s[10];
|
||||
scanf("%9s", s);
|
||||
|
||||
size_t result = strnlen(s, maxlen);
|
||||
clang_analyzer_isTainted_int(result); // expected-warning {{YES}}
|
||||
clang_analyzer_isTainted_int(result); // expected-warning {{NO}}
|
||||
}
|
||||
|
||||
long strtol(const char *restrict nptr, char **restrict endptr, int base);
|
||||
|
Loading…
x
Reference in New Issue
Block a user