1 //= CStringChecker.cpp - Checks calls to C string functions --------*- C++ -*-//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This defines CStringChecker, which is an assortment of checks on calls
11 // to functions in <string.h>.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "ClangSACheckers.h"
16 #include "InterCheckerAPI.h"
17 #include "clang/Basic/CharInfo.h"
18 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
19 #include "clang/StaticAnalyzer/Core/Checker.h"
20 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
21 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
22 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringSwitch.h"
26 #include "llvm/Support/raw_ostream.h"
27
28 using namespace clang;
29 using namespace ento;
30
31 namespace {
32 class CStringChecker : public Checker< eval::Call,
33 check::PreStmt<DeclStmt>,
34 check::LiveSymbols,
35 check::DeadSymbols,
36 check::RegionChanges
37 > {
38 mutable std::unique_ptr<BugType> BT_Null, BT_Bounds, BT_Overlap,
39 BT_NotCString, BT_AdditionOverflow;
40
41 mutable const char *CurrentFunctionDescription;
42
43 public:
44 /// The filter is used to filter out the diagnostics which are not enabled by
45 /// the user.
46 struct CStringChecksFilter {
47 DefaultBool CheckCStringNullArg;
48 DefaultBool CheckCStringOutOfBounds;
49 DefaultBool CheckCStringBufferOverlap;
50 DefaultBool CheckCStringNotNullTerm;
51
52 CheckName CheckNameCStringNullArg;
53 CheckName CheckNameCStringOutOfBounds;
54 CheckName CheckNameCStringBufferOverlap;
55 CheckName CheckNameCStringNotNullTerm;
56 };
57
58 CStringChecksFilter Filter;
59
getTag()60 static void *getTag() { static int tag; return &tag; }
61
62 bool evalCall(const CallExpr *CE, CheckerContext &C) const;
63 void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const;
64 void checkLiveSymbols(ProgramStateRef state, SymbolReaper &SR) const;
65 void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
66 bool wantsRegionChangeUpdate(ProgramStateRef state) const;
67
68 ProgramStateRef
69 checkRegionChanges(ProgramStateRef state,
70 const InvalidatedSymbols *,
71 ArrayRef<const MemRegion *> ExplicitRegions,
72 ArrayRef<const MemRegion *> Regions,
73 const CallEvent *Call) const;
74
75 typedef void (CStringChecker::*FnCheck)(CheckerContext &,
76 const CallExpr *) const;
77
78 void evalMemcpy(CheckerContext &C, const CallExpr *CE) const;
79 void evalMempcpy(CheckerContext &C, const CallExpr *CE) const;
80 void evalMemmove(CheckerContext &C, const CallExpr *CE) const;
81 void evalBcopy(CheckerContext &C, const CallExpr *CE) const;
82 void evalCopyCommon(CheckerContext &C, const CallExpr *CE,
83 ProgramStateRef state,
84 const Expr *Size,
85 const Expr *Source,
86 const Expr *Dest,
87 bool Restricted = false,
88 bool IsMempcpy = false) const;
89
90 void evalMemcmp(CheckerContext &C, const CallExpr *CE) const;
91
92 void evalstrLength(CheckerContext &C, const CallExpr *CE) const;
93 void evalstrnLength(CheckerContext &C, const CallExpr *CE) const;
94 void evalstrLengthCommon(CheckerContext &C,
95 const CallExpr *CE,
96 bool IsStrnlen = false) const;
97
98 void evalStrcpy(CheckerContext &C, const CallExpr *CE) const;
99 void evalStrncpy(CheckerContext &C, const CallExpr *CE) const;
100 void evalStpcpy(CheckerContext &C, const CallExpr *CE) const;
101 void evalStrcpyCommon(CheckerContext &C,
102 const CallExpr *CE,
103 bool returnEnd,
104 bool isBounded,
105 bool isAppending) const;
106
107 void evalStrcat(CheckerContext &C, const CallExpr *CE) const;
108 void evalStrncat(CheckerContext &C, const CallExpr *CE) const;
109
110 void evalStrcmp(CheckerContext &C, const CallExpr *CE) const;
111 void evalStrncmp(CheckerContext &C, const CallExpr *CE) const;
112 void evalStrcasecmp(CheckerContext &C, const CallExpr *CE) const;
113 void evalStrncasecmp(CheckerContext &C, const CallExpr *CE) const;
114 void evalStrcmpCommon(CheckerContext &C,
115 const CallExpr *CE,
116 bool isBounded = false,
117 bool ignoreCase = false) const;
118
119 void evalStrsep(CheckerContext &C, const CallExpr *CE) const;
120
121 // Utility methods
122 std::pair<ProgramStateRef , ProgramStateRef >
123 static assumeZero(CheckerContext &C,
124 ProgramStateRef state, SVal V, QualType Ty);
125
126 static ProgramStateRef setCStringLength(ProgramStateRef state,
127 const MemRegion *MR,
128 SVal strLength);
129 static SVal getCStringLengthForRegion(CheckerContext &C,
130 ProgramStateRef &state,
131 const Expr *Ex,
132 const MemRegion *MR,
133 bool hypothetical);
134 SVal getCStringLength(CheckerContext &C,
135 ProgramStateRef &state,
136 const Expr *Ex,
137 SVal Buf,
138 bool hypothetical = false) const;
139
140 const StringLiteral *getCStringLiteral(CheckerContext &C,
141 ProgramStateRef &state,
142 const Expr *expr,
143 SVal val) const;
144
145 static ProgramStateRef InvalidateBuffer(CheckerContext &C,
146 ProgramStateRef state,
147 const Expr *Ex, SVal V,
148 bool IsSourceBuffer);
149
150 static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
151 const MemRegion *MR);
152
153 // Re-usable checks
154 ProgramStateRef checkNonNull(CheckerContext &C,
155 ProgramStateRef state,
156 const Expr *S,
157 SVal l) const;
158 ProgramStateRef CheckLocation(CheckerContext &C,
159 ProgramStateRef state,
160 const Expr *S,
161 SVal l,
162 const char *message = nullptr) const;
163 ProgramStateRef CheckBufferAccess(CheckerContext &C,
164 ProgramStateRef state,
165 const Expr *Size,
166 const Expr *FirstBuf,
167 const Expr *SecondBuf,
168 const char *firstMessage = nullptr,
169 const char *secondMessage = nullptr,
170 bool WarnAboutSize = false) const;
171
CheckBufferAccess(CheckerContext & C,ProgramStateRef state,const Expr * Size,const Expr * Buf,const char * message=nullptr,bool WarnAboutSize=false) const172 ProgramStateRef CheckBufferAccess(CheckerContext &C,
173 ProgramStateRef state,
174 const Expr *Size,
175 const Expr *Buf,
176 const char *message = nullptr,
177 bool WarnAboutSize = false) const {
178 // This is a convenience override.
179 return CheckBufferAccess(C, state, Size, Buf, nullptr, message, nullptr,
180 WarnAboutSize);
181 }
182 ProgramStateRef CheckOverlap(CheckerContext &C,
183 ProgramStateRef state,
184 const Expr *Size,
185 const Expr *First,
186 const Expr *Second) const;
187 void emitOverlapBug(CheckerContext &C,
188 ProgramStateRef state,
189 const Stmt *First,
190 const Stmt *Second) const;
191
192 ProgramStateRef checkAdditionOverflow(CheckerContext &C,
193 ProgramStateRef state,
194 NonLoc left,
195 NonLoc right) const;
196 };
197
198 } //end anonymous namespace
199
REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength,const MemRegion *,SVal)200 REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength, const MemRegion *, SVal)
201
202 //===----------------------------------------------------------------------===//
203 // Individual checks and utility methods.
204 //===----------------------------------------------------------------------===//
205
206 std::pair<ProgramStateRef , ProgramStateRef >
207 CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef state, SVal V,
208 QualType Ty) {
209 Optional<DefinedSVal> val = V.getAs<DefinedSVal>();
210 if (!val)
211 return std::pair<ProgramStateRef , ProgramStateRef >(state, state);
212
213 SValBuilder &svalBuilder = C.getSValBuilder();
214 DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty);
215 return state->assume(svalBuilder.evalEQ(state, *val, zero));
216 }
217
checkNonNull(CheckerContext & C,ProgramStateRef state,const Expr * S,SVal l) const218 ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C,
219 ProgramStateRef state,
220 const Expr *S, SVal l) const {
221 // If a previous check has failed, propagate the failure.
222 if (!state)
223 return nullptr;
224
225 ProgramStateRef stateNull, stateNonNull;
226 std::tie(stateNull, stateNonNull) = assumeZero(C, state, l, S->getType());
227
228 if (stateNull && !stateNonNull) {
229 if (!Filter.CheckCStringNullArg)
230 return nullptr;
231
232 ExplodedNode *N = C.generateSink(stateNull);
233 if (!N)
234 return nullptr;
235
236 if (!BT_Null)
237 BT_Null.reset(new BuiltinBug(
238 Filter.CheckNameCStringNullArg, categories::UnixAPI,
239 "Null pointer argument in call to byte string function"));
240
241 SmallString<80> buf;
242 llvm::raw_svector_ostream os(buf);
243 assert(CurrentFunctionDescription);
244 os << "Null pointer argument in call to " << CurrentFunctionDescription;
245
246 // Generate a report for this bug.
247 BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Null.get());
248 auto report = llvm::make_unique<BugReport>(*BT, os.str(), N);
249
250 report->addRange(S->getSourceRange());
251 bugreporter::trackNullOrUndefValue(N, S, *report);
252 C.emitReport(std::move(report));
253 return nullptr;
254 }
255
256 // From here on, assume that the value is non-null.
257 assert(stateNonNull);
258 return stateNonNull;
259 }
260
261 // FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor?
CheckLocation(CheckerContext & C,ProgramStateRef state,const Expr * S,SVal l,const char * warningMsg) const262 ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
263 ProgramStateRef state,
264 const Expr *S, SVal l,
265 const char *warningMsg) const {
266 // If a previous check has failed, propagate the failure.
267 if (!state)
268 return nullptr;
269
270 // Check for out of bound array element access.
271 const MemRegion *R = l.getAsRegion();
272 if (!R)
273 return state;
274
275 const ElementRegion *ER = dyn_cast<ElementRegion>(R);
276 if (!ER)
277 return state;
278
279 assert(ER->getValueType() == C.getASTContext().CharTy &&
280 "CheckLocation should only be called with char* ElementRegions");
281
282 // Get the size of the array.
283 const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion());
284 SValBuilder &svalBuilder = C.getSValBuilder();
285 SVal Extent =
286 svalBuilder.convertToArrayIndex(superReg->getExtent(svalBuilder));
287 DefinedOrUnknownSVal Size = Extent.castAs<DefinedOrUnknownSVal>();
288
289 // Get the index of the accessed element.
290 DefinedOrUnknownSVal Idx = ER->getIndex().castAs<DefinedOrUnknownSVal>();
291
292 ProgramStateRef StInBound = state->assumeInBound(Idx, Size, true);
293 ProgramStateRef StOutBound = state->assumeInBound(Idx, Size, false);
294 if (StOutBound && !StInBound) {
295 ExplodedNode *N = C.generateSink(StOutBound);
296 if (!N)
297 return nullptr;
298
299 if (!BT_Bounds) {
300 BT_Bounds.reset(new BuiltinBug(
301 Filter.CheckNameCStringOutOfBounds, "Out-of-bound array access",
302 "Byte string function accesses out-of-bound array element"));
303 }
304 BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Bounds.get());
305
306 // Generate a report for this bug.
307 std::unique_ptr<BugReport> report;
308 if (warningMsg) {
309 report = llvm::make_unique<BugReport>(*BT, warningMsg, N);
310 } else {
311 assert(CurrentFunctionDescription);
312 assert(CurrentFunctionDescription[0] != '\0');
313
314 SmallString<80> buf;
315 llvm::raw_svector_ostream os(buf);
316 os << toUppercase(CurrentFunctionDescription[0])
317 << &CurrentFunctionDescription[1]
318 << " accesses out-of-bound array element";
319 report = llvm::make_unique<BugReport>(*BT, os.str(), N);
320 }
321
322 // FIXME: It would be nice to eventually make this diagnostic more clear,
323 // e.g., by referencing the original declaration or by saying *why* this
324 // reference is outside the range.
325
326 report->addRange(S->getSourceRange());
327 C.emitReport(std::move(report));
328 return nullptr;
329 }
330
331 // Array bound check succeeded. From this point forward the array bound
332 // should always succeed.
333 return StInBound;
334 }
335
CheckBufferAccess(CheckerContext & C,ProgramStateRef state,const Expr * Size,const Expr * FirstBuf,const Expr * SecondBuf,const char * firstMessage,const char * secondMessage,bool WarnAboutSize) const336 ProgramStateRef CStringChecker::CheckBufferAccess(CheckerContext &C,
337 ProgramStateRef state,
338 const Expr *Size,
339 const Expr *FirstBuf,
340 const Expr *SecondBuf,
341 const char *firstMessage,
342 const char *secondMessage,
343 bool WarnAboutSize) const {
344 // If a previous check has failed, propagate the failure.
345 if (!state)
346 return nullptr;
347
348 SValBuilder &svalBuilder = C.getSValBuilder();
349 ASTContext &Ctx = svalBuilder.getContext();
350 const LocationContext *LCtx = C.getLocationContext();
351
352 QualType sizeTy = Size->getType();
353 QualType PtrTy = Ctx.getPointerType(Ctx.CharTy);
354
355 // Check that the first buffer is non-null.
356 SVal BufVal = state->getSVal(FirstBuf, LCtx);
357 state = checkNonNull(C, state, FirstBuf, BufVal);
358 if (!state)
359 return nullptr;
360
361 // If out-of-bounds checking is turned off, skip the rest.
362 if (!Filter.CheckCStringOutOfBounds)
363 return state;
364
365 // Get the access length and make sure it is known.
366 // FIXME: This assumes the caller has already checked that the access length
367 // is positive. And that it's unsigned.
368 SVal LengthVal = state->getSVal(Size, LCtx);
369 Optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
370 if (!Length)
371 return state;
372
373 // Compute the offset of the last element to be accessed: size-1.
374 NonLoc One = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>();
375 NonLoc LastOffset = svalBuilder
376 .evalBinOpNN(state, BO_Sub, *Length, One, sizeTy).castAs<NonLoc>();
377
378 // Check that the first buffer is sufficiently long.
379 SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType());
380 if (Optional<Loc> BufLoc = BufStart.getAs<Loc>()) {
381 const Expr *warningExpr = (WarnAboutSize ? Size : FirstBuf);
382
383 SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc,
384 LastOffset, PtrTy);
385 state = CheckLocation(C, state, warningExpr, BufEnd, firstMessage);
386
387 // If the buffer isn't large enough, abort.
388 if (!state)
389 return nullptr;
390 }
391
392 // If there's a second buffer, check it as well.
393 if (SecondBuf) {
394 BufVal = state->getSVal(SecondBuf, LCtx);
395 state = checkNonNull(C, state, SecondBuf, BufVal);
396 if (!state)
397 return nullptr;
398
399 BufStart = svalBuilder.evalCast(BufVal, PtrTy, SecondBuf->getType());
400 if (Optional<Loc> BufLoc = BufStart.getAs<Loc>()) {
401 const Expr *warningExpr = (WarnAboutSize ? Size : SecondBuf);
402
403 SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc,
404 LastOffset, PtrTy);
405 state = CheckLocation(C, state, warningExpr, BufEnd, secondMessage);
406 }
407 }
408
409 // Large enough or not, return this state!
410 return state;
411 }
412
CheckOverlap(CheckerContext & C,ProgramStateRef state,const Expr * Size,const Expr * First,const Expr * Second) const413 ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C,
414 ProgramStateRef state,
415 const Expr *Size,
416 const Expr *First,
417 const Expr *Second) const {
418 if (!Filter.CheckCStringBufferOverlap)
419 return state;
420
421 // Do a simple check for overlap: if the two arguments are from the same
422 // buffer, see if the end of the first is greater than the start of the second
423 // or vice versa.
424
425 // If a previous check has failed, propagate the failure.
426 if (!state)
427 return nullptr;
428
429 ProgramStateRef stateTrue, stateFalse;
430
431 // Get the buffer values and make sure they're known locations.
432 const LocationContext *LCtx = C.getLocationContext();
433 SVal firstVal = state->getSVal(First, LCtx);
434 SVal secondVal = state->getSVal(Second, LCtx);
435
436 Optional<Loc> firstLoc = firstVal.getAs<Loc>();
437 if (!firstLoc)
438 return state;
439
440 Optional<Loc> secondLoc = secondVal.getAs<Loc>();
441 if (!secondLoc)
442 return state;
443
444 // Are the two values the same?
445 SValBuilder &svalBuilder = C.getSValBuilder();
446 std::tie(stateTrue, stateFalse) =
447 state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc));
448
449 if (stateTrue && !stateFalse) {
450 // If the values are known to be equal, that's automatically an overlap.
451 emitOverlapBug(C, stateTrue, First, Second);
452 return nullptr;
453 }
454
455 // assume the two expressions are not equal.
456 assert(stateFalse);
457 state = stateFalse;
458
459 // Which value comes first?
460 QualType cmpTy = svalBuilder.getConditionType();
461 SVal reverse = svalBuilder.evalBinOpLL(state, BO_GT,
462 *firstLoc, *secondLoc, cmpTy);
463 Optional<DefinedOrUnknownSVal> reverseTest =
464 reverse.getAs<DefinedOrUnknownSVal>();
465 if (!reverseTest)
466 return state;
467
468 std::tie(stateTrue, stateFalse) = state->assume(*reverseTest);
469 if (stateTrue) {
470 if (stateFalse) {
471 // If we don't know which one comes first, we can't perform this test.
472 return state;
473 } else {
474 // Switch the values so that firstVal is before secondVal.
475 std::swap(firstLoc, secondLoc);
476
477 // Switch the Exprs as well, so that they still correspond.
478 std::swap(First, Second);
479 }
480 }
481
482 // Get the length, and make sure it too is known.
483 SVal LengthVal = state->getSVal(Size, LCtx);
484 Optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
485 if (!Length)
486 return state;
487
488 // Convert the first buffer's start address to char*.
489 // Bail out if the cast fails.
490 ASTContext &Ctx = svalBuilder.getContext();
491 QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy);
492 SVal FirstStart = svalBuilder.evalCast(*firstLoc, CharPtrTy,
493 First->getType());
494 Optional<Loc> FirstStartLoc = FirstStart.getAs<Loc>();
495 if (!FirstStartLoc)
496 return state;
497
498 // Compute the end of the first buffer. Bail out if THAT fails.
499 SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add,
500 *FirstStartLoc, *Length, CharPtrTy);
501 Optional<Loc> FirstEndLoc = FirstEnd.getAs<Loc>();
502 if (!FirstEndLoc)
503 return state;
504
505 // Is the end of the first buffer past the start of the second buffer?
506 SVal Overlap = svalBuilder.evalBinOpLL(state, BO_GT,
507 *FirstEndLoc, *secondLoc, cmpTy);
508 Optional<DefinedOrUnknownSVal> OverlapTest =
509 Overlap.getAs<DefinedOrUnknownSVal>();
510 if (!OverlapTest)
511 return state;
512
513 std::tie(stateTrue, stateFalse) = state->assume(*OverlapTest);
514
515 if (stateTrue && !stateFalse) {
516 // Overlap!
517 emitOverlapBug(C, stateTrue, First, Second);
518 return nullptr;
519 }
520
521 // assume the two expressions don't overlap.
522 assert(stateFalse);
523 return stateFalse;
524 }
525
emitOverlapBug(CheckerContext & C,ProgramStateRef state,const Stmt * First,const Stmt * Second) const526 void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state,
527 const Stmt *First, const Stmt *Second) const {
528 ExplodedNode *N = C.generateSink(state);
529 if (!N)
530 return;
531
532 if (!BT_Overlap)
533 BT_Overlap.reset(new BugType(Filter.CheckNameCStringBufferOverlap,
534 categories::UnixAPI, "Improper arguments"));
535
536 // Generate a report for this bug.
537 auto report = llvm::make_unique<BugReport>(
538 *BT_Overlap, "Arguments must not be overlapping buffers", N);
539 report->addRange(First->getSourceRange());
540 report->addRange(Second->getSourceRange());
541
542 C.emitReport(std::move(report));
543 }
544
checkAdditionOverflow(CheckerContext & C,ProgramStateRef state,NonLoc left,NonLoc right) const545 ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C,
546 ProgramStateRef state,
547 NonLoc left,
548 NonLoc right) const {
549 // If out-of-bounds checking is turned off, skip the rest.
550 if (!Filter.CheckCStringOutOfBounds)
551 return state;
552
553 // If a previous check has failed, propagate the failure.
554 if (!state)
555 return nullptr;
556
557 SValBuilder &svalBuilder = C.getSValBuilder();
558 BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
559
560 QualType sizeTy = svalBuilder.getContext().getSizeType();
561 const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
562 NonLoc maxVal = svalBuilder.makeIntVal(maxValInt);
563
564 SVal maxMinusRight;
565 if (right.getAs<nonloc::ConcreteInt>()) {
566 maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right,
567 sizeTy);
568 } else {
569 // Try switching the operands. (The order of these two assignments is
570 // important!)
571 maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left,
572 sizeTy);
573 left = right;
574 }
575
576 if (Optional<NonLoc> maxMinusRightNL = maxMinusRight.getAs<NonLoc>()) {
577 QualType cmpTy = svalBuilder.getConditionType();
578 // If left > max - right, we have an overflow.
579 SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left,
580 *maxMinusRightNL, cmpTy);
581
582 ProgramStateRef stateOverflow, stateOkay;
583 std::tie(stateOverflow, stateOkay) =
584 state->assume(willOverflow.castAs<DefinedOrUnknownSVal>());
585
586 if (stateOverflow && !stateOkay) {
587 // We have an overflow. Emit a bug report.
588 ExplodedNode *N = C.generateSink(stateOverflow);
589 if (!N)
590 return nullptr;
591
592 if (!BT_AdditionOverflow)
593 BT_AdditionOverflow.reset(
594 new BuiltinBug(Filter.CheckNameCStringOutOfBounds, "API",
595 "Sum of expressions causes overflow"));
596
597 // This isn't a great error message, but this should never occur in real
598 // code anyway -- you'd have to create a buffer longer than a size_t can
599 // represent, which is sort of a contradiction.
600 const char *warning =
601 "This expression will create a string whose length is too big to "
602 "be represented as a size_t";
603
604 // Generate a report for this bug.
605 C.emitReport(
606 llvm::make_unique<BugReport>(*BT_AdditionOverflow, warning, N));
607
608 return nullptr;
609 }
610
611 // From now on, assume an overflow didn't occur.
612 assert(stateOkay);
613 state = stateOkay;
614 }
615
616 return state;
617 }
618
setCStringLength(ProgramStateRef state,const MemRegion * MR,SVal strLength)619 ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state,
620 const MemRegion *MR,
621 SVal strLength) {
622 assert(!strLength.isUndef() && "Attempt to set an undefined string length");
623
624 MR = MR->StripCasts();
625
626 switch (MR->getKind()) {
627 case MemRegion::StringRegionKind:
628 // FIXME: This can happen if we strcpy() into a string region. This is
629 // undefined [C99 6.4.5p6], but we should still warn about it.
630 return state;
631
632 case MemRegion::SymbolicRegionKind:
633 case MemRegion::AllocaRegionKind:
634 case MemRegion::VarRegionKind:
635 case MemRegion::FieldRegionKind:
636 case MemRegion::ObjCIvarRegionKind:
637 // These are the types we can currently track string lengths for.
638 break;
639
640 case MemRegion::ElementRegionKind:
641 // FIXME: Handle element regions by upper-bounding the parent region's
642 // string length.
643 return state;
644
645 default:
646 // Other regions (mostly non-data) can't have a reliable C string length.
647 // For now, just ignore the change.
648 // FIXME: These are rare but not impossible. We should output some kind of
649 // warning for things like strcpy((char[]){'a', 0}, "b");
650 return state;
651 }
652
653 if (strLength.isUnknown())
654 return state->remove<CStringLength>(MR);
655
656 return state->set<CStringLength>(MR, strLength);
657 }
658
getCStringLengthForRegion(CheckerContext & C,ProgramStateRef & state,const Expr * Ex,const MemRegion * MR,bool hypothetical)659 SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C,
660 ProgramStateRef &state,
661 const Expr *Ex,
662 const MemRegion *MR,
663 bool hypothetical) {
664 if (!hypothetical) {
665 // If there's a recorded length, go ahead and return it.
666 const SVal *Recorded = state->get<CStringLength>(MR);
667 if (Recorded)
668 return *Recorded;
669 }
670
671 // Otherwise, get a new symbol and update the state.
672 SValBuilder &svalBuilder = C.getSValBuilder();
673 QualType sizeTy = svalBuilder.getContext().getSizeType();
674 SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(),
675 MR, Ex, sizeTy,
676 C.blockCount());
677
678 if (!hypothetical) {
679 if (Optional<NonLoc> strLn = strLength.getAs<NonLoc>()) {
680 // In case of unbounded calls strlen etc bound the range to SIZE_MAX/4
681 BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
682 const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
683 llvm::APSInt fourInt = APSIntType(maxValInt).getValue(4);
684 const llvm::APSInt *maxLengthInt = BVF.evalAPSInt(BO_Div, maxValInt,
685 fourInt);
686 NonLoc maxLength = svalBuilder.makeIntVal(*maxLengthInt);
687 SVal evalLength = svalBuilder.evalBinOpNN(state, BO_LE, *strLn,
688 maxLength, sizeTy);
689 state = state->assume(evalLength.castAs<DefinedOrUnknownSVal>(), true);
690 }
691 state = state->set<CStringLength>(MR, strLength);
692 }
693
694 return strLength;
695 }
696
getCStringLength(CheckerContext & C,ProgramStateRef & state,const Expr * Ex,SVal Buf,bool hypothetical) const697 SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
698 const Expr *Ex, SVal Buf,
699 bool hypothetical) const {
700 const MemRegion *MR = Buf.getAsRegion();
701 if (!MR) {
702 // If we can't get a region, see if it's something we /know/ isn't a
703 // C string. In the context of locations, the only time we can issue such
704 // a warning is for labels.
705 if (Optional<loc::GotoLabel> Label = Buf.getAs<loc::GotoLabel>()) {
706 if (!Filter.CheckCStringNotNullTerm)
707 return UndefinedVal();
708
709 if (ExplodedNode *N = C.addTransition(state)) {
710 if (!BT_NotCString)
711 BT_NotCString.reset(new BuiltinBug(
712 Filter.CheckNameCStringNotNullTerm, categories::UnixAPI,
713 "Argument is not a null-terminated string."));
714
715 SmallString<120> buf;
716 llvm::raw_svector_ostream os(buf);
717 assert(CurrentFunctionDescription);
718 os << "Argument to " << CurrentFunctionDescription
719 << " is the address of the label '" << Label->getLabel()->getName()
720 << "', which is not a null-terminated string";
721
722 // Generate a report for this bug.
723 auto report = llvm::make_unique<BugReport>(*BT_NotCString, os.str(), N);
724
725 report->addRange(Ex->getSourceRange());
726 C.emitReport(std::move(report));
727 }
728 return UndefinedVal();
729
730 }
731
732 // If it's not a region and not a label, give up.
733 return UnknownVal();
734 }
735
736 // If we have a region, strip casts from it and see if we can figure out
737 // its length. For anything we can't figure out, just return UnknownVal.
738 MR = MR->StripCasts();
739
740 switch (MR->getKind()) {
741 case MemRegion::StringRegionKind: {
742 // Modifying the contents of string regions is undefined [C99 6.4.5p6],
743 // so we can assume that the byte length is the correct C string length.
744 SValBuilder &svalBuilder = C.getSValBuilder();
745 QualType sizeTy = svalBuilder.getContext().getSizeType();
746 const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral();
747 return svalBuilder.makeIntVal(strLit->getByteLength(), sizeTy);
748 }
749 case MemRegion::SymbolicRegionKind:
750 case MemRegion::AllocaRegionKind:
751 case MemRegion::VarRegionKind:
752 case MemRegion::FieldRegionKind:
753 case MemRegion::ObjCIvarRegionKind:
754 return getCStringLengthForRegion(C, state, Ex, MR, hypothetical);
755 case MemRegion::CompoundLiteralRegionKind:
756 // FIXME: Can we track this? Is it necessary?
757 return UnknownVal();
758 case MemRegion::ElementRegionKind:
759 // FIXME: How can we handle this? It's not good enough to subtract the
760 // offset from the base string length; consider "123\x00567" and &a[5].
761 return UnknownVal();
762 default:
763 // Other regions (mostly non-data) can't have a reliable C string length.
764 // In this case, an error is emitted and UndefinedVal is returned.
765 // The caller should always be prepared to handle this case.
766 if (!Filter.CheckCStringNotNullTerm)
767 return UndefinedVal();
768
769 if (ExplodedNode *N = C.addTransition(state)) {
770 if (!BT_NotCString)
771 BT_NotCString.reset(new BuiltinBug(
772 Filter.CheckNameCStringNotNullTerm, categories::UnixAPI,
773 "Argument is not a null-terminated string."));
774
775 SmallString<120> buf;
776 llvm::raw_svector_ostream os(buf);
777
778 assert(CurrentFunctionDescription);
779 os << "Argument to " << CurrentFunctionDescription << " is ";
780
781 if (SummarizeRegion(os, C.getASTContext(), MR))
782 os << ", which is not a null-terminated string";
783 else
784 os << "not a null-terminated string";
785
786 // Generate a report for this bug.
787 auto report = llvm::make_unique<BugReport>(*BT_NotCString, os.str(), N);
788
789 report->addRange(Ex->getSourceRange());
790 C.emitReport(std::move(report));
791 }
792
793 return UndefinedVal();
794 }
795 }
796
getCStringLiteral(CheckerContext & C,ProgramStateRef & state,const Expr * expr,SVal val) const797 const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C,
798 ProgramStateRef &state, const Expr *expr, SVal val) const {
799
800 // Get the memory region pointed to by the val.
801 const MemRegion *bufRegion = val.getAsRegion();
802 if (!bufRegion)
803 return nullptr;
804
805 // Strip casts off the memory region.
806 bufRegion = bufRegion->StripCasts();
807
808 // Cast the memory region to a string region.
809 const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion);
810 if (!strRegion)
811 return nullptr;
812
813 // Return the actual string in the string region.
814 return strRegion->getStringLiteral();
815 }
816
InvalidateBuffer(CheckerContext & C,ProgramStateRef state,const Expr * E,SVal V,bool IsSourceBuffer)817 ProgramStateRef CStringChecker::InvalidateBuffer(CheckerContext &C,
818 ProgramStateRef state,
819 const Expr *E, SVal V,
820 bool IsSourceBuffer) {
821 Optional<Loc> L = V.getAs<Loc>();
822 if (!L)
823 return state;
824
825 // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes
826 // some assumptions about the value that CFRefCount can't. Even so, it should
827 // probably be refactored.
828 if (Optional<loc::MemRegionVal> MR = L->getAs<loc::MemRegionVal>()) {
829 const MemRegion *R = MR->getRegion()->StripCasts();
830
831 // Are we dealing with an ElementRegion? If so, we should be invalidating
832 // the super-region.
833 if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) {
834 R = ER->getSuperRegion();
835 // FIXME: What about layers of ElementRegions?
836 }
837
838 // Invalidate this region.
839 const LocationContext *LCtx = C.getPredecessor()->getLocationContext();
840
841 bool CausesPointerEscape = false;
842 RegionAndSymbolInvalidationTraits ITraits;
843 // Invalidate and escape only indirect regions accessible through the source
844 // buffer.
845 if (IsSourceBuffer) {
846 ITraits.setTrait(R,
847 RegionAndSymbolInvalidationTraits::TK_PreserveContents);
848 ITraits.setTrait(R, RegionAndSymbolInvalidationTraits::TK_SuppressEscape);
849 CausesPointerEscape = true;
850 }
851
852 return state->invalidateRegions(R, E, C.blockCount(), LCtx,
853 CausesPointerEscape, nullptr, nullptr,
854 &ITraits);
855 }
856
857 // If we have a non-region value by chance, just remove the binding.
858 // FIXME: is this necessary or correct? This handles the non-Region
859 // cases. Is it ever valid to store to these?
860 return state->killBinding(*L);
861 }
862
SummarizeRegion(raw_ostream & os,ASTContext & Ctx,const MemRegion * MR)863 bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
864 const MemRegion *MR) {
865 const TypedValueRegion *TVR = dyn_cast<TypedValueRegion>(MR);
866
867 switch (MR->getKind()) {
868 case MemRegion::FunctionTextRegionKind: {
869 const NamedDecl *FD = cast<FunctionTextRegion>(MR)->getDecl();
870 if (FD)
871 os << "the address of the function '" << *FD << '\'';
872 else
873 os << "the address of a function";
874 return true;
875 }
876 case MemRegion::BlockTextRegionKind:
877 os << "block text";
878 return true;
879 case MemRegion::BlockDataRegionKind:
880 os << "a block";
881 return true;
882 case MemRegion::CXXThisRegionKind:
883 case MemRegion::CXXTempObjectRegionKind:
884 os << "a C++ temp object of type " << TVR->getValueType().getAsString();
885 return true;
886 case MemRegion::VarRegionKind:
887 os << "a variable of type" << TVR->getValueType().getAsString();
888 return true;
889 case MemRegion::FieldRegionKind:
890 os << "a field of type " << TVR->getValueType().getAsString();
891 return true;
892 case MemRegion::ObjCIvarRegionKind:
893 os << "an instance variable of type " << TVR->getValueType().getAsString();
894 return true;
895 default:
896 return false;
897 }
898 }
899
900 //===----------------------------------------------------------------------===//
901 // evaluation of individual function calls.
902 //===----------------------------------------------------------------------===//
903
evalCopyCommon(CheckerContext & C,const CallExpr * CE,ProgramStateRef state,const Expr * Size,const Expr * Dest,const Expr * Source,bool Restricted,bool IsMempcpy) const904 void CStringChecker::evalCopyCommon(CheckerContext &C,
905 const CallExpr *CE,
906 ProgramStateRef state,
907 const Expr *Size, const Expr *Dest,
908 const Expr *Source, bool Restricted,
909 bool IsMempcpy) const {
910 CurrentFunctionDescription = "memory copy function";
911
912 // See if the size argument is zero.
913 const LocationContext *LCtx = C.getLocationContext();
914 SVal sizeVal = state->getSVal(Size, LCtx);
915 QualType sizeTy = Size->getType();
916
917 ProgramStateRef stateZeroSize, stateNonZeroSize;
918 std::tie(stateZeroSize, stateNonZeroSize) =
919 assumeZero(C, state, sizeVal, sizeTy);
920
921 // Get the value of the Dest.
922 SVal destVal = state->getSVal(Dest, LCtx);
923
924 // If the size is zero, there won't be any actual memory access, so
925 // just bind the return value to the destination buffer and return.
926 if (stateZeroSize && !stateNonZeroSize) {
927 stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, destVal);
928 C.addTransition(stateZeroSize);
929 return;
930 }
931
932 // If the size can be nonzero, we have to check the other arguments.
933 if (stateNonZeroSize) {
934 state = stateNonZeroSize;
935
936 // Ensure the destination is not null. If it is NULL there will be a
937 // NULL pointer dereference.
938 state = checkNonNull(C, state, Dest, destVal);
939 if (!state)
940 return;
941
942 // Get the value of the Src.
943 SVal srcVal = state->getSVal(Source, LCtx);
944
945 // Ensure the source is not null. If it is NULL there will be a
946 // NULL pointer dereference.
947 state = checkNonNull(C, state, Source, srcVal);
948 if (!state)
949 return;
950
951 // Ensure the accesses are valid and that the buffers do not overlap.
952 const char * const writeWarning =
953 "Memory copy function overflows destination buffer";
954 state = CheckBufferAccess(C, state, Size, Dest, Source,
955 writeWarning, /* sourceWarning = */ nullptr);
956 if (Restricted)
957 state = CheckOverlap(C, state, Size, Dest, Source);
958
959 if (!state)
960 return;
961
962 // If this is mempcpy, get the byte after the last byte copied and
963 // bind the expr.
964 if (IsMempcpy) {
965 loc::MemRegionVal destRegVal = destVal.castAs<loc::MemRegionVal>();
966
967 // Get the length to copy.
968 if (Optional<NonLoc> lenValNonLoc = sizeVal.getAs<NonLoc>()) {
969 // Get the byte after the last byte copied.
970 SValBuilder &SvalBuilder = C.getSValBuilder();
971 ASTContext &Ctx = SvalBuilder.getContext();
972 QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy);
973 loc::MemRegionVal DestRegCharVal = SvalBuilder.evalCast(destRegVal,
974 CharPtrTy, Dest->getType()).castAs<loc::MemRegionVal>();
975 SVal lastElement = C.getSValBuilder().evalBinOpLN(state, BO_Add,
976 DestRegCharVal,
977 *lenValNonLoc,
978 Dest->getType());
979
980 // The byte after the last byte copied is the return value.
981 state = state->BindExpr(CE, LCtx, lastElement);
982 } else {
983 // If we don't know how much we copied, we can at least
984 // conjure a return value for later.
985 SVal result = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx,
986 C.blockCount());
987 state = state->BindExpr(CE, LCtx, result);
988 }
989
990 } else {
991 // All other copies return the destination buffer.
992 // (Well, bcopy() has a void return type, but this won't hurt.)
993 state = state->BindExpr(CE, LCtx, destVal);
994 }
995
996 // Invalidate the destination (regular invalidation without pointer-escaping
997 // the address of the top-level region).
998 // FIXME: Even if we can't perfectly model the copy, we should see if we
999 // can use LazyCompoundVals to copy the source values into the destination.
1000 // This would probably remove any existing bindings past the end of the
1001 // copied region, but that's still an improvement over blank invalidation.
1002 state = InvalidateBuffer(C, state, Dest, C.getSVal(Dest),
1003 /*IsSourceBuffer*/false);
1004
1005 // Invalidate the source (const-invalidation without const-pointer-escaping
1006 // the address of the top-level region).
1007 state = InvalidateBuffer(C, state, Source, C.getSVal(Source),
1008 /*IsSourceBuffer*/true);
1009
1010 C.addTransition(state);
1011 }
1012 }
1013
1014
evalMemcpy(CheckerContext & C,const CallExpr * CE) const1015 void CStringChecker::evalMemcpy(CheckerContext &C, const CallExpr *CE) const {
1016 if (CE->getNumArgs() < 3)
1017 return;
1018
1019 // void *memcpy(void *restrict dst, const void *restrict src, size_t n);
1020 // The return value is the address of the destination buffer.
1021 const Expr *Dest = CE->getArg(0);
1022 ProgramStateRef state = C.getState();
1023
1024 evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true);
1025 }
1026
evalMempcpy(CheckerContext & C,const CallExpr * CE) const1027 void CStringChecker::evalMempcpy(CheckerContext &C, const CallExpr *CE) const {
1028 if (CE->getNumArgs() < 3)
1029 return;
1030
1031 // void *mempcpy(void *restrict dst, const void *restrict src, size_t n);
1032 // The return value is a pointer to the byte following the last written byte.
1033 const Expr *Dest = CE->getArg(0);
1034 ProgramStateRef state = C.getState();
1035
1036 evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true, true);
1037 }
1038
evalMemmove(CheckerContext & C,const CallExpr * CE) const1039 void CStringChecker::evalMemmove(CheckerContext &C, const CallExpr *CE) const {
1040 if (CE->getNumArgs() < 3)
1041 return;
1042
1043 // void *memmove(void *dst, const void *src, size_t n);
1044 // The return value is the address of the destination buffer.
1045 const Expr *Dest = CE->getArg(0);
1046 ProgramStateRef state = C.getState();
1047
1048 evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1));
1049 }
1050
evalBcopy(CheckerContext & C,const CallExpr * CE) const1051 void CStringChecker::evalBcopy(CheckerContext &C, const CallExpr *CE) const {
1052 if (CE->getNumArgs() < 3)
1053 return;
1054
1055 // void bcopy(const void *src, void *dst, size_t n);
1056 evalCopyCommon(C, CE, C.getState(),
1057 CE->getArg(2), CE->getArg(1), CE->getArg(0));
1058 }
1059
evalMemcmp(CheckerContext & C,const CallExpr * CE) const1060 void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE) const {
1061 if (CE->getNumArgs() < 3)
1062 return;
1063
1064 // int memcmp(const void *s1, const void *s2, size_t n);
1065 CurrentFunctionDescription = "memory comparison function";
1066
1067 const Expr *Left = CE->getArg(0);
1068 const Expr *Right = CE->getArg(1);
1069 const Expr *Size = CE->getArg(2);
1070
1071 ProgramStateRef state = C.getState();
1072 SValBuilder &svalBuilder = C.getSValBuilder();
1073
1074 // See if the size argument is zero.
1075 const LocationContext *LCtx = C.getLocationContext();
1076 SVal sizeVal = state->getSVal(Size, LCtx);
1077 QualType sizeTy = Size->getType();
1078
1079 ProgramStateRef stateZeroSize, stateNonZeroSize;
1080 std::tie(stateZeroSize, stateNonZeroSize) =
1081 assumeZero(C, state, sizeVal, sizeTy);
1082
1083 // If the size can be zero, the result will be 0 in that case, and we don't
1084 // have to check either of the buffers.
1085 if (stateZeroSize) {
1086 state = stateZeroSize;
1087 state = state->BindExpr(CE, LCtx,
1088 svalBuilder.makeZeroVal(CE->getType()));
1089 C.addTransition(state);
1090 }
1091
1092 // If the size can be nonzero, we have to check the other arguments.
1093 if (stateNonZeroSize) {
1094 state = stateNonZeroSize;
1095 // If we know the two buffers are the same, we know the result is 0.
1096 // First, get the two buffers' addresses. Another checker will have already
1097 // made sure they're not undefined.
1098 DefinedOrUnknownSVal LV =
1099 state->getSVal(Left, LCtx).castAs<DefinedOrUnknownSVal>();
1100 DefinedOrUnknownSVal RV =
1101 state->getSVal(Right, LCtx).castAs<DefinedOrUnknownSVal>();
1102
1103 // See if they are the same.
1104 DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
1105 ProgramStateRef StSameBuf, StNotSameBuf;
1106 std::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
1107
1108 // If the two arguments might be the same buffer, we know the result is 0,
1109 // and we only need to check one size.
1110 if (StSameBuf) {
1111 state = StSameBuf;
1112 state = CheckBufferAccess(C, state, Size, Left);
1113 if (state) {
1114 state = StSameBuf->BindExpr(CE, LCtx,
1115 svalBuilder.makeZeroVal(CE->getType()));
1116 C.addTransition(state);
1117 }
1118 }
1119
1120 // If the two arguments might be different buffers, we have to check the
1121 // size of both of them.
1122 if (StNotSameBuf) {
1123 state = StNotSameBuf;
1124 state = CheckBufferAccess(C, state, Size, Left, Right);
1125 if (state) {
1126 // The return value is the comparison result, which we don't know.
1127 SVal CmpV = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx,
1128 C.blockCount());
1129 state = state->BindExpr(CE, LCtx, CmpV);
1130 C.addTransition(state);
1131 }
1132 }
1133 }
1134 }
1135
evalstrLength(CheckerContext & C,const CallExpr * CE) const1136 void CStringChecker::evalstrLength(CheckerContext &C,
1137 const CallExpr *CE) const {
1138 if (CE->getNumArgs() < 1)
1139 return;
1140
1141 // size_t strlen(const char *s);
1142 evalstrLengthCommon(C, CE, /* IsStrnlen = */ false);
1143 }
1144
evalstrnLength(CheckerContext & C,const CallExpr * CE) const1145 void CStringChecker::evalstrnLength(CheckerContext &C,
1146 const CallExpr *CE) const {
1147 if (CE->getNumArgs() < 2)
1148 return;
1149
1150 // size_t strnlen(const char *s, size_t maxlen);
1151 evalstrLengthCommon(C, CE, /* IsStrnlen = */ true);
1152 }
1153
evalstrLengthCommon(CheckerContext & C,const CallExpr * CE,bool IsStrnlen) const1154 void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE,
1155 bool IsStrnlen) const {
1156 CurrentFunctionDescription = "string length function";
1157 ProgramStateRef state = C.getState();
1158 const LocationContext *LCtx = C.getLocationContext();
1159
1160 if (IsStrnlen) {
1161 const Expr *maxlenExpr = CE->getArg(1);
1162 SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1163
1164 ProgramStateRef stateZeroSize, stateNonZeroSize;
1165 std::tie(stateZeroSize, stateNonZeroSize) =
1166 assumeZero(C, state, maxlenVal, maxlenExpr->getType());
1167
1168 // If the size can be zero, the result will be 0 in that case, and we don't
1169 // have to check the string itself.
1170 if (stateZeroSize) {
1171 SVal zero = C.getSValBuilder().makeZeroVal(CE->getType());
1172 stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, zero);
1173 C.addTransition(stateZeroSize);
1174 }
1175
1176 // If the size is GUARANTEED to be zero, we're done!
1177 if (!stateNonZeroSize)
1178 return;
1179
1180 // Otherwise, record the assumption that the size is nonzero.
1181 state = stateNonZeroSize;
1182 }
1183
1184 // Check that the string argument is non-null.
1185 const Expr *Arg = CE->getArg(0);
1186 SVal ArgVal = state->getSVal(Arg, LCtx);
1187
1188 state = checkNonNull(C, state, Arg, ArgVal);
1189
1190 if (!state)
1191 return;
1192
1193 SVal strLength = getCStringLength(C, state, Arg, ArgVal);
1194
1195 // If the argument isn't a valid C string, there's no valid state to
1196 // transition to.
1197 if (strLength.isUndef())
1198 return;
1199
1200 DefinedOrUnknownSVal result = UnknownVal();
1201
1202 // If the check is for strnlen() then bind the return value to no more than
1203 // the maxlen value.
1204 if (IsStrnlen) {
1205 QualType cmpTy = C.getSValBuilder().getConditionType();
1206
1207 // It's a little unfortunate to be getting this again,
1208 // but it's not that expensive...
1209 const Expr *maxlenExpr = CE->getArg(1);
1210 SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1211
1212 Optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
1213 Optional<NonLoc> maxlenValNL = maxlenVal.getAs<NonLoc>();
1214
1215 if (strLengthNL && maxlenValNL) {
1216 ProgramStateRef stateStringTooLong, stateStringNotTooLong;
1217
1218 // Check if the strLength is greater than the maxlen.
1219 std::tie(stateStringTooLong, stateStringNotTooLong) = state->assume(
1220 C.getSValBuilder()
1221 .evalBinOpNN(state, BO_GT, *strLengthNL, *maxlenValNL, cmpTy)
1222 .castAs<DefinedOrUnknownSVal>());
1223
1224 if (stateStringTooLong && !stateStringNotTooLong) {
1225 // If the string is longer than maxlen, return maxlen.
1226 result = *maxlenValNL;
1227 } else if (stateStringNotTooLong && !stateStringTooLong) {
1228 // If the string is shorter than maxlen, return its length.
1229 result = *strLengthNL;
1230 }
1231 }
1232
1233 if (result.isUnknown()) {
1234 // If we don't have enough information for a comparison, there's
1235 // no guarantee the full string length will actually be returned.
1236 // All we know is the return value is the min of the string length
1237 // and the limit. This is better than nothing.
1238 result = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx,
1239 C.blockCount());
1240 NonLoc resultNL = result.castAs<NonLoc>();
1241
1242 if (strLengthNL) {
1243 state = state->assume(C.getSValBuilder().evalBinOpNN(
1244 state, BO_LE, resultNL, *strLengthNL, cmpTy)
1245 .castAs<DefinedOrUnknownSVal>(), true);
1246 }
1247
1248 if (maxlenValNL) {
1249 state = state->assume(C.getSValBuilder().evalBinOpNN(
1250 state, BO_LE, resultNL, *maxlenValNL, cmpTy)
1251 .castAs<DefinedOrUnknownSVal>(), true);
1252 }
1253 }
1254
1255 } else {
1256 // This is a plain strlen(), not strnlen().
1257 result = strLength.castAs<DefinedOrUnknownSVal>();
1258
1259 // If we don't know the length of the string, conjure a return
1260 // value, so it can be used in constraints, at least.
1261 if (result.isUnknown()) {
1262 result = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx,
1263 C.blockCount());
1264 }
1265 }
1266
1267 // Bind the return value.
1268 assert(!result.isUnknown() && "Should have conjured a value by now");
1269 state = state->BindExpr(CE, LCtx, result);
1270 C.addTransition(state);
1271 }
1272
evalStrcpy(CheckerContext & C,const CallExpr * CE) const1273 void CStringChecker::evalStrcpy(CheckerContext &C, const CallExpr *CE) const {
1274 if (CE->getNumArgs() < 2)
1275 return;
1276
1277 // char *strcpy(char *restrict dst, const char *restrict src);
1278 evalStrcpyCommon(C, CE,
1279 /* returnEnd = */ false,
1280 /* isBounded = */ false,
1281 /* isAppending = */ false);
1282 }
1283
evalStrncpy(CheckerContext & C,const CallExpr * CE) const1284 void CStringChecker::evalStrncpy(CheckerContext &C, const CallExpr *CE) const {
1285 if (CE->getNumArgs() < 3)
1286 return;
1287
1288 // char *strncpy(char *restrict dst, const char *restrict src, size_t n);
1289 evalStrcpyCommon(C, CE,
1290 /* returnEnd = */ false,
1291 /* isBounded = */ true,
1292 /* isAppending = */ false);
1293 }
1294
evalStpcpy(CheckerContext & C,const CallExpr * CE) const1295 void CStringChecker::evalStpcpy(CheckerContext &C, const CallExpr *CE) const {
1296 if (CE->getNumArgs() < 2)
1297 return;
1298
1299 // char *stpcpy(char *restrict dst, const char *restrict src);
1300 evalStrcpyCommon(C, CE,
1301 /* returnEnd = */ true,
1302 /* isBounded = */ false,
1303 /* isAppending = */ false);
1304 }
1305
evalStrcat(CheckerContext & C,const CallExpr * CE) const1306 void CStringChecker::evalStrcat(CheckerContext &C, const CallExpr *CE) const {
1307 if (CE->getNumArgs() < 2)
1308 return;
1309
1310 //char *strcat(char *restrict s1, const char *restrict s2);
1311 evalStrcpyCommon(C, CE,
1312 /* returnEnd = */ false,
1313 /* isBounded = */ false,
1314 /* isAppending = */ true);
1315 }
1316
evalStrncat(CheckerContext & C,const CallExpr * CE) const1317 void CStringChecker::evalStrncat(CheckerContext &C, const CallExpr *CE) const {
1318 if (CE->getNumArgs() < 3)
1319 return;
1320
1321 //char *strncat(char *restrict s1, const char *restrict s2, size_t n);
1322 evalStrcpyCommon(C, CE,
1323 /* returnEnd = */ false,
1324 /* isBounded = */ true,
1325 /* isAppending = */ true);
1326 }
1327
evalStrcpyCommon(CheckerContext & C,const CallExpr * CE,bool returnEnd,bool isBounded,bool isAppending) const1328 void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE,
1329 bool returnEnd, bool isBounded,
1330 bool isAppending) const {
1331 CurrentFunctionDescription = "string copy function";
1332 ProgramStateRef state = C.getState();
1333 const LocationContext *LCtx = C.getLocationContext();
1334
1335 // Check that the destination is non-null.
1336 const Expr *Dst = CE->getArg(0);
1337 SVal DstVal = state->getSVal(Dst, LCtx);
1338
1339 state = checkNonNull(C, state, Dst, DstVal);
1340 if (!state)
1341 return;
1342
1343 // Check that the source is non-null.
1344 const Expr *srcExpr = CE->getArg(1);
1345 SVal srcVal = state->getSVal(srcExpr, LCtx);
1346 state = checkNonNull(C, state, srcExpr, srcVal);
1347 if (!state)
1348 return;
1349
1350 // Get the string length of the source.
1351 SVal strLength = getCStringLength(C, state, srcExpr, srcVal);
1352
1353 // If the source isn't a valid C string, give up.
1354 if (strLength.isUndef())
1355 return;
1356
1357 SValBuilder &svalBuilder = C.getSValBuilder();
1358 QualType cmpTy = svalBuilder.getConditionType();
1359 QualType sizeTy = svalBuilder.getContext().getSizeType();
1360
1361 // These two values allow checking two kinds of errors:
1362 // - actual overflows caused by a source that doesn't fit in the destination
1363 // - potential overflows caused by a bound that could exceed the destination
1364 SVal amountCopied = UnknownVal();
1365 SVal maxLastElementIndex = UnknownVal();
1366 const char *boundWarning = nullptr;
1367
1368 // If the function is strncpy, strncat, etc... it is bounded.
1369 if (isBounded) {
1370 // Get the max number of characters to copy.
1371 const Expr *lenExpr = CE->getArg(2);
1372 SVal lenVal = state->getSVal(lenExpr, LCtx);
1373
1374 // Protect against misdeclared strncpy().
1375 lenVal = svalBuilder.evalCast(lenVal, sizeTy, lenExpr->getType());
1376
1377 Optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
1378 Optional<NonLoc> lenValNL = lenVal.getAs<NonLoc>();
1379
1380 // If we know both values, we might be able to figure out how much
1381 // we're copying.
1382 if (strLengthNL && lenValNL) {
1383 ProgramStateRef stateSourceTooLong, stateSourceNotTooLong;
1384
1385 // Check if the max number to copy is less than the length of the src.
1386 // If the bound is equal to the source length, strncpy won't null-
1387 // terminate the result!
1388 std::tie(stateSourceTooLong, stateSourceNotTooLong) = state->assume(
1389 svalBuilder.evalBinOpNN(state, BO_GE, *strLengthNL, *lenValNL, cmpTy)
1390 .castAs<DefinedOrUnknownSVal>());
1391
1392 if (stateSourceTooLong && !stateSourceNotTooLong) {
1393 // Max number to copy is less than the length of the src, so the actual
1394 // strLength copied is the max number arg.
1395 state = stateSourceTooLong;
1396 amountCopied = lenVal;
1397
1398 } else if (!stateSourceTooLong && stateSourceNotTooLong) {
1399 // The source buffer entirely fits in the bound.
1400 state = stateSourceNotTooLong;
1401 amountCopied = strLength;
1402 }
1403 }
1404
1405 // We still want to know if the bound is known to be too large.
1406 if (lenValNL) {
1407 if (isAppending) {
1408 // For strncat, the check is strlen(dst) + lenVal < sizeof(dst)
1409
1410 // Get the string length of the destination. If the destination is
1411 // memory that can't have a string length, we shouldn't be copying
1412 // into it anyway.
1413 SVal dstStrLength = getCStringLength(C, state, Dst, DstVal);
1414 if (dstStrLength.isUndef())
1415 return;
1416
1417 if (Optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>()) {
1418 maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Add,
1419 *lenValNL,
1420 *dstStrLengthNL,
1421 sizeTy);
1422 boundWarning = "Size argument is greater than the free space in the "
1423 "destination buffer";
1424 }
1425
1426 } else {
1427 // For strncpy, this is just checking that lenVal <= sizeof(dst)
1428 // (Yes, strncpy and strncat differ in how they treat termination.
1429 // strncat ALWAYS terminates, but strncpy doesn't.)
1430
1431 // We need a special case for when the copy size is zero, in which
1432 // case strncpy will do no work at all. Our bounds check uses n-1
1433 // as the last element accessed, so n == 0 is problematic.
1434 ProgramStateRef StateZeroSize, StateNonZeroSize;
1435 std::tie(StateZeroSize, StateNonZeroSize) =
1436 assumeZero(C, state, *lenValNL, sizeTy);
1437
1438 // If the size is known to be zero, we're done.
1439 if (StateZeroSize && !StateNonZeroSize) {
1440 StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, DstVal);
1441 C.addTransition(StateZeroSize);
1442 return;
1443 }
1444
1445 // Otherwise, go ahead and figure out the last element we'll touch.
1446 // We don't record the non-zero assumption here because we can't
1447 // be sure. We won't warn on a possible zero.
1448 NonLoc one = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>();
1449 maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL,
1450 one, sizeTy);
1451 boundWarning = "Size argument is greater than the length of the "
1452 "destination buffer";
1453 }
1454 }
1455
1456 // If we couldn't pin down the copy length, at least bound it.
1457 // FIXME: We should actually run this code path for append as well, but
1458 // right now it creates problems with constraints (since we can end up
1459 // trying to pass constraints from symbol to symbol).
1460 if (amountCopied.isUnknown() && !isAppending) {
1461 // Try to get a "hypothetical" string length symbol, which we can later
1462 // set as a real value if that turns out to be the case.
1463 amountCopied = getCStringLength(C, state, lenExpr, srcVal, true);
1464 assert(!amountCopied.isUndef());
1465
1466 if (Optional<NonLoc> amountCopiedNL = amountCopied.getAs<NonLoc>()) {
1467 if (lenValNL) {
1468 // amountCopied <= lenVal
1469 SVal copiedLessThanBound = svalBuilder.evalBinOpNN(state, BO_LE,
1470 *amountCopiedNL,
1471 *lenValNL,
1472 cmpTy);
1473 state = state->assume(
1474 copiedLessThanBound.castAs<DefinedOrUnknownSVal>(), true);
1475 if (!state)
1476 return;
1477 }
1478
1479 if (strLengthNL) {
1480 // amountCopied <= strlen(source)
1481 SVal copiedLessThanSrc = svalBuilder.evalBinOpNN(state, BO_LE,
1482 *amountCopiedNL,
1483 *strLengthNL,
1484 cmpTy);
1485 state = state->assume(
1486 copiedLessThanSrc.castAs<DefinedOrUnknownSVal>(), true);
1487 if (!state)
1488 return;
1489 }
1490 }
1491 }
1492
1493 } else {
1494 // The function isn't bounded. The amount copied should match the length
1495 // of the source buffer.
1496 amountCopied = strLength;
1497 }
1498
1499 assert(state);
1500
1501 // This represents the number of characters copied into the destination
1502 // buffer. (It may not actually be the strlen if the destination buffer
1503 // is not terminated.)
1504 SVal finalStrLength = UnknownVal();
1505
1506 // If this is an appending function (strcat, strncat...) then set the
1507 // string length to strlen(src) + strlen(dst) since the buffer will
1508 // ultimately contain both.
1509 if (isAppending) {
1510 // Get the string length of the destination. If the destination is memory
1511 // that can't have a string length, we shouldn't be copying into it anyway.
1512 SVal dstStrLength = getCStringLength(C, state, Dst, DstVal);
1513 if (dstStrLength.isUndef())
1514 return;
1515
1516 Optional<NonLoc> srcStrLengthNL = amountCopied.getAs<NonLoc>();
1517 Optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>();
1518
1519 // If we know both string lengths, we might know the final string length.
1520 if (srcStrLengthNL && dstStrLengthNL) {
1521 // Make sure the two lengths together don't overflow a size_t.
1522 state = checkAdditionOverflow(C, state, *srcStrLengthNL, *dstStrLengthNL);
1523 if (!state)
1524 return;
1525
1526 finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *srcStrLengthNL,
1527 *dstStrLengthNL, sizeTy);
1528 }
1529
1530 // If we couldn't get a single value for the final string length,
1531 // we can at least bound it by the individual lengths.
1532 if (finalStrLength.isUnknown()) {
1533 // Try to get a "hypothetical" string length symbol, which we can later
1534 // set as a real value if that turns out to be the case.
1535 finalStrLength = getCStringLength(C, state, CE, DstVal, true);
1536 assert(!finalStrLength.isUndef());
1537
1538 if (Optional<NonLoc> finalStrLengthNL = finalStrLength.getAs<NonLoc>()) {
1539 if (srcStrLengthNL) {
1540 // finalStrLength >= srcStrLength
1541 SVal sourceInResult = svalBuilder.evalBinOpNN(state, BO_GE,
1542 *finalStrLengthNL,
1543 *srcStrLengthNL,
1544 cmpTy);
1545 state = state->assume(sourceInResult.castAs<DefinedOrUnknownSVal>(),
1546 true);
1547 if (!state)
1548 return;
1549 }
1550
1551 if (dstStrLengthNL) {
1552 // finalStrLength >= dstStrLength
1553 SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE,
1554 *finalStrLengthNL,
1555 *dstStrLengthNL,
1556 cmpTy);
1557 state =
1558 state->assume(destInResult.castAs<DefinedOrUnknownSVal>(), true);
1559 if (!state)
1560 return;
1561 }
1562 }
1563 }
1564
1565 } else {
1566 // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and
1567 // the final string length will match the input string length.
1568 finalStrLength = amountCopied;
1569 }
1570
1571 // The final result of the function will either be a pointer past the last
1572 // copied element, or a pointer to the start of the destination buffer.
1573 SVal Result = (returnEnd ? UnknownVal() : DstVal);
1574
1575 assert(state);
1576
1577 // If the destination is a MemRegion, try to check for a buffer overflow and
1578 // record the new string length.
1579 if (Optional<loc::MemRegionVal> dstRegVal =
1580 DstVal.getAs<loc::MemRegionVal>()) {
1581 QualType ptrTy = Dst->getType();
1582
1583 // If we have an exact value on a bounded copy, use that to check for
1584 // overflows, rather than our estimate about how much is actually copied.
1585 if (boundWarning) {
1586 if (Optional<NonLoc> maxLastNL = maxLastElementIndex.getAs<NonLoc>()) {
1587 SVal maxLastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
1588 *maxLastNL, ptrTy);
1589 state = CheckLocation(C, state, CE->getArg(2), maxLastElement,
1590 boundWarning);
1591 if (!state)
1592 return;
1593 }
1594 }
1595
1596 // Then, if the final length is known...
1597 if (Optional<NonLoc> knownStrLength = finalStrLength.getAs<NonLoc>()) {
1598 SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
1599 *knownStrLength, ptrTy);
1600
1601 // ...and we haven't checked the bound, we'll check the actual copy.
1602 if (!boundWarning) {
1603 const char * const warningMsg =
1604 "String copy function overflows destination buffer";
1605 state = CheckLocation(C, state, Dst, lastElement, warningMsg);
1606 if (!state)
1607 return;
1608 }
1609
1610 // If this is a stpcpy-style copy, the last element is the return value.
1611 if (returnEnd)
1612 Result = lastElement;
1613 }
1614
1615 // Invalidate the destination (regular invalidation without pointer-escaping
1616 // the address of the top-level region). This must happen before we set the
1617 // C string length because invalidation will clear the length.
1618 // FIXME: Even if we can't perfectly model the copy, we should see if we
1619 // can use LazyCompoundVals to copy the source values into the destination.
1620 // This would probably remove any existing bindings past the end of the
1621 // string, but that's still an improvement over blank invalidation.
1622 state = InvalidateBuffer(C, state, Dst, *dstRegVal,
1623 /*IsSourceBuffer*/false);
1624
1625 // Invalidate the source (const-invalidation without const-pointer-escaping
1626 // the address of the top-level region).
1627 state = InvalidateBuffer(C, state, srcExpr, srcVal, /*IsSourceBuffer*/true);
1628
1629 // Set the C string length of the destination, if we know it.
1630 if (isBounded && !isAppending) {
1631 // strncpy is annoying in that it doesn't guarantee to null-terminate
1632 // the result string. If the original string didn't fit entirely inside
1633 // the bound (including the null-terminator), we don't know how long the
1634 // result is.
1635 if (amountCopied != strLength)
1636 finalStrLength = UnknownVal();
1637 }
1638 state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength);
1639 }
1640
1641 assert(state);
1642
1643 // If this is a stpcpy-style copy, but we were unable to check for a buffer
1644 // overflow, we still need a result. Conjure a return value.
1645 if (returnEnd && Result.isUnknown()) {
1646 Result = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount());
1647 }
1648
1649 // Set the return value.
1650 state = state->BindExpr(CE, LCtx, Result);
1651 C.addTransition(state);
1652 }
1653
evalStrcmp(CheckerContext & C,const CallExpr * CE) const1654 void CStringChecker::evalStrcmp(CheckerContext &C, const CallExpr *CE) const {
1655 if (CE->getNumArgs() < 2)
1656 return;
1657
1658 //int strcmp(const char *s1, const char *s2);
1659 evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ false);
1660 }
1661
evalStrncmp(CheckerContext & C,const CallExpr * CE) const1662 void CStringChecker::evalStrncmp(CheckerContext &C, const CallExpr *CE) const {
1663 if (CE->getNumArgs() < 3)
1664 return;
1665
1666 //int strncmp(const char *s1, const char *s2, size_t n);
1667 evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ false);
1668 }
1669
evalStrcasecmp(CheckerContext & C,const CallExpr * CE) const1670 void CStringChecker::evalStrcasecmp(CheckerContext &C,
1671 const CallExpr *CE) const {
1672 if (CE->getNumArgs() < 2)
1673 return;
1674
1675 //int strcasecmp(const char *s1, const char *s2);
1676 evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ true);
1677 }
1678
evalStrncasecmp(CheckerContext & C,const CallExpr * CE) const1679 void CStringChecker::evalStrncasecmp(CheckerContext &C,
1680 const CallExpr *CE) const {
1681 if (CE->getNumArgs() < 3)
1682 return;
1683
1684 //int strncasecmp(const char *s1, const char *s2, size_t n);
1685 evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ true);
1686 }
1687
evalStrcmpCommon(CheckerContext & C,const CallExpr * CE,bool isBounded,bool ignoreCase) const1688 void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE,
1689 bool isBounded, bool ignoreCase) const {
1690 CurrentFunctionDescription = "string comparison function";
1691 ProgramStateRef state = C.getState();
1692 const LocationContext *LCtx = C.getLocationContext();
1693
1694 // Check that the first string is non-null
1695 const Expr *s1 = CE->getArg(0);
1696 SVal s1Val = state->getSVal(s1, LCtx);
1697 state = checkNonNull(C, state, s1, s1Val);
1698 if (!state)
1699 return;
1700
1701 // Check that the second string is non-null.
1702 const Expr *s2 = CE->getArg(1);
1703 SVal s2Val = state->getSVal(s2, LCtx);
1704 state = checkNonNull(C, state, s2, s2Val);
1705 if (!state)
1706 return;
1707
1708 // Get the string length of the first string or give up.
1709 SVal s1Length = getCStringLength(C, state, s1, s1Val);
1710 if (s1Length.isUndef())
1711 return;
1712
1713 // Get the string length of the second string or give up.
1714 SVal s2Length = getCStringLength(C, state, s2, s2Val);
1715 if (s2Length.isUndef())
1716 return;
1717
1718 // If we know the two buffers are the same, we know the result is 0.
1719 // First, get the two buffers' addresses. Another checker will have already
1720 // made sure they're not undefined.
1721 DefinedOrUnknownSVal LV = s1Val.castAs<DefinedOrUnknownSVal>();
1722 DefinedOrUnknownSVal RV = s2Val.castAs<DefinedOrUnknownSVal>();
1723
1724 // See if they are the same.
1725 SValBuilder &svalBuilder = C.getSValBuilder();
1726 DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
1727 ProgramStateRef StSameBuf, StNotSameBuf;
1728 std::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
1729
1730 // If the two arguments might be the same buffer, we know the result is 0,
1731 // and we only need to check one size.
1732 if (StSameBuf) {
1733 StSameBuf = StSameBuf->BindExpr(CE, LCtx,
1734 svalBuilder.makeZeroVal(CE->getType()));
1735 C.addTransition(StSameBuf);
1736
1737 // If the two arguments are GUARANTEED to be the same, we're done!
1738 if (!StNotSameBuf)
1739 return;
1740 }
1741
1742 assert(StNotSameBuf);
1743 state = StNotSameBuf;
1744
1745 // At this point we can go about comparing the two buffers.
1746 // For now, we only do this if they're both known string literals.
1747
1748 // Attempt to extract string literals from both expressions.
1749 const StringLiteral *s1StrLiteral = getCStringLiteral(C, state, s1, s1Val);
1750 const StringLiteral *s2StrLiteral = getCStringLiteral(C, state, s2, s2Val);
1751 bool canComputeResult = false;
1752
1753 if (s1StrLiteral && s2StrLiteral) {
1754 StringRef s1StrRef = s1StrLiteral->getString();
1755 StringRef s2StrRef = s2StrLiteral->getString();
1756
1757 if (isBounded) {
1758 // Get the max number of characters to compare.
1759 const Expr *lenExpr = CE->getArg(2);
1760 SVal lenVal = state->getSVal(lenExpr, LCtx);
1761
1762 // If the length is known, we can get the right substrings.
1763 if (const llvm::APSInt *len = svalBuilder.getKnownValue(state, lenVal)) {
1764 // Create substrings of each to compare the prefix.
1765 s1StrRef = s1StrRef.substr(0, (size_t)len->getZExtValue());
1766 s2StrRef = s2StrRef.substr(0, (size_t)len->getZExtValue());
1767 canComputeResult = true;
1768 }
1769 } else {
1770 // This is a normal, unbounded strcmp.
1771 canComputeResult = true;
1772 }
1773
1774 if (canComputeResult) {
1775 // Real strcmp stops at null characters.
1776 size_t s1Term = s1StrRef.find('\0');
1777 if (s1Term != StringRef::npos)
1778 s1StrRef = s1StrRef.substr(0, s1Term);
1779
1780 size_t s2Term = s2StrRef.find('\0');
1781 if (s2Term != StringRef::npos)
1782 s2StrRef = s2StrRef.substr(0, s2Term);
1783
1784 // Use StringRef's comparison methods to compute the actual result.
1785 int result;
1786
1787 if (ignoreCase) {
1788 // Compare string 1 to string 2 the same way strcasecmp() does.
1789 result = s1StrRef.compare_lower(s2StrRef);
1790 } else {
1791 // Compare string 1 to string 2 the same way strcmp() does.
1792 result = s1StrRef.compare(s2StrRef);
1793 }
1794
1795 // Build the SVal of the comparison and bind the return value.
1796 SVal resultVal = svalBuilder.makeIntVal(result, CE->getType());
1797 state = state->BindExpr(CE, LCtx, resultVal);
1798 }
1799 }
1800
1801 if (!canComputeResult) {
1802 // Conjure a symbolic value. It's the best we can do.
1803 SVal resultVal = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx,
1804 C.blockCount());
1805 state = state->BindExpr(CE, LCtx, resultVal);
1806 }
1807
1808 // Record this as a possible path.
1809 C.addTransition(state);
1810 }
1811
evalStrsep(CheckerContext & C,const CallExpr * CE) const1812 void CStringChecker::evalStrsep(CheckerContext &C, const CallExpr *CE) const {
1813 //char *strsep(char **stringp, const char *delim);
1814 if (CE->getNumArgs() < 2)
1815 return;
1816
1817 // Sanity: does the search string parameter match the return type?
1818 const Expr *SearchStrPtr = CE->getArg(0);
1819 QualType CharPtrTy = SearchStrPtr->getType()->getPointeeType();
1820 if (CharPtrTy.isNull() ||
1821 CE->getType().getUnqualifiedType() != CharPtrTy.getUnqualifiedType())
1822 return;
1823
1824 CurrentFunctionDescription = "strsep()";
1825 ProgramStateRef State = C.getState();
1826 const LocationContext *LCtx = C.getLocationContext();
1827
1828 // Check that the search string pointer is non-null (though it may point to
1829 // a null string).
1830 SVal SearchStrVal = State->getSVal(SearchStrPtr, LCtx);
1831 State = checkNonNull(C, State, SearchStrPtr, SearchStrVal);
1832 if (!State)
1833 return;
1834
1835 // Check that the delimiter string is non-null.
1836 const Expr *DelimStr = CE->getArg(1);
1837 SVal DelimStrVal = State->getSVal(DelimStr, LCtx);
1838 State = checkNonNull(C, State, DelimStr, DelimStrVal);
1839 if (!State)
1840 return;
1841
1842 SValBuilder &SVB = C.getSValBuilder();
1843 SVal Result;
1844 if (Optional<Loc> SearchStrLoc = SearchStrVal.getAs<Loc>()) {
1845 // Get the current value of the search string pointer, as a char*.
1846 Result = State->getSVal(*SearchStrLoc, CharPtrTy);
1847
1848 // Invalidate the search string, representing the change of one delimiter
1849 // character to NUL.
1850 State = InvalidateBuffer(C, State, SearchStrPtr, Result,
1851 /*IsSourceBuffer*/false);
1852
1853 // Overwrite the search string pointer. The new value is either an address
1854 // further along in the same string, or NULL if there are no more tokens.
1855 State = State->bindLoc(*SearchStrLoc,
1856 SVB.conjureSymbolVal(getTag(), CE, LCtx, CharPtrTy,
1857 C.blockCount()));
1858 } else {
1859 assert(SearchStrVal.isUnknown());
1860 // Conjure a symbolic value. It's the best we can do.
1861 Result = SVB.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount());
1862 }
1863
1864 // Set the return value, and finish.
1865 State = State->BindExpr(CE, LCtx, Result);
1866 C.addTransition(State);
1867 }
1868
1869
1870 //===----------------------------------------------------------------------===//
1871 // The driver method, and other Checker callbacks.
1872 //===----------------------------------------------------------------------===//
1873
evalCall(const CallExpr * CE,CheckerContext & C) const1874 bool CStringChecker::evalCall(const CallExpr *CE, CheckerContext &C) const {
1875 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
1876
1877 if (!FDecl)
1878 return false;
1879
1880 // FIXME: Poorly-factored string switches are slow.
1881 FnCheck evalFunction = nullptr;
1882 if (C.isCLibraryFunction(FDecl, "memcpy"))
1883 evalFunction = &CStringChecker::evalMemcpy;
1884 else if (C.isCLibraryFunction(FDecl, "mempcpy"))
1885 evalFunction = &CStringChecker::evalMempcpy;
1886 else if (C.isCLibraryFunction(FDecl, "memcmp"))
1887 evalFunction = &CStringChecker::evalMemcmp;
1888 else if (C.isCLibraryFunction(FDecl, "memmove"))
1889 evalFunction = &CStringChecker::evalMemmove;
1890 else if (C.isCLibraryFunction(FDecl, "strcpy"))
1891 evalFunction = &CStringChecker::evalStrcpy;
1892 else if (C.isCLibraryFunction(FDecl, "strncpy"))
1893 evalFunction = &CStringChecker::evalStrncpy;
1894 else if (C.isCLibraryFunction(FDecl, "stpcpy"))
1895 evalFunction = &CStringChecker::evalStpcpy;
1896 else if (C.isCLibraryFunction(FDecl, "strcat"))
1897 evalFunction = &CStringChecker::evalStrcat;
1898 else if (C.isCLibraryFunction(FDecl, "strncat"))
1899 evalFunction = &CStringChecker::evalStrncat;
1900 else if (C.isCLibraryFunction(FDecl, "strlen"))
1901 evalFunction = &CStringChecker::evalstrLength;
1902 else if (C.isCLibraryFunction(FDecl, "strnlen"))
1903 evalFunction = &CStringChecker::evalstrnLength;
1904 else if (C.isCLibraryFunction(FDecl, "strcmp"))
1905 evalFunction = &CStringChecker::evalStrcmp;
1906 else if (C.isCLibraryFunction(FDecl, "strncmp"))
1907 evalFunction = &CStringChecker::evalStrncmp;
1908 else if (C.isCLibraryFunction(FDecl, "strcasecmp"))
1909 evalFunction = &CStringChecker::evalStrcasecmp;
1910 else if (C.isCLibraryFunction(FDecl, "strncasecmp"))
1911 evalFunction = &CStringChecker::evalStrncasecmp;
1912 else if (C.isCLibraryFunction(FDecl, "strsep"))
1913 evalFunction = &CStringChecker::evalStrsep;
1914 else if (C.isCLibraryFunction(FDecl, "bcopy"))
1915 evalFunction = &CStringChecker::evalBcopy;
1916 else if (C.isCLibraryFunction(FDecl, "bcmp"))
1917 evalFunction = &CStringChecker::evalMemcmp;
1918
1919 // If the callee isn't a string function, let another checker handle it.
1920 if (!evalFunction)
1921 return false;
1922
1923 // Check and evaluate the call.
1924 (this->*evalFunction)(C, CE);
1925
1926 // If the evaluate call resulted in no change, chain to the next eval call
1927 // handler.
1928 // Note, the custom CString evaluation calls assume that basic safety
1929 // properties are held. However, if the user chooses to turn off some of these
1930 // checks, we ignore the issues and leave the call evaluation to a generic
1931 // handler.
1932 if (!C.isDifferent())
1933 return false;
1934
1935 return true;
1936 }
1937
checkPreStmt(const DeclStmt * DS,CheckerContext & C) const1938 void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const {
1939 // Record string length for char a[] = "abc";
1940 ProgramStateRef state = C.getState();
1941
1942 for (const auto *I : DS->decls()) {
1943 const VarDecl *D = dyn_cast<VarDecl>(I);
1944 if (!D)
1945 continue;
1946
1947 // FIXME: Handle array fields of structs.
1948 if (!D->getType()->isArrayType())
1949 continue;
1950
1951 const Expr *Init = D->getInit();
1952 if (!Init)
1953 continue;
1954 if (!isa<StringLiteral>(Init))
1955 continue;
1956
1957 Loc VarLoc = state->getLValue(D, C.getLocationContext());
1958 const MemRegion *MR = VarLoc.getAsRegion();
1959 if (!MR)
1960 continue;
1961
1962 SVal StrVal = state->getSVal(Init, C.getLocationContext());
1963 assert(StrVal.isValid() && "Initializer string is unknown or undefined");
1964 DefinedOrUnknownSVal strLength =
1965 getCStringLength(C, state, Init, StrVal).castAs<DefinedOrUnknownSVal>();
1966
1967 state = state->set<CStringLength>(MR, strLength);
1968 }
1969
1970 C.addTransition(state);
1971 }
1972
wantsRegionChangeUpdate(ProgramStateRef state) const1973 bool CStringChecker::wantsRegionChangeUpdate(ProgramStateRef state) const {
1974 CStringLengthTy Entries = state->get<CStringLength>();
1975 return !Entries.isEmpty();
1976 }
1977
1978 ProgramStateRef
checkRegionChanges(ProgramStateRef state,const InvalidatedSymbols *,ArrayRef<const MemRegion * > ExplicitRegions,ArrayRef<const MemRegion * > Regions,const CallEvent * Call) const1979 CStringChecker::checkRegionChanges(ProgramStateRef state,
1980 const InvalidatedSymbols *,
1981 ArrayRef<const MemRegion *> ExplicitRegions,
1982 ArrayRef<const MemRegion *> Regions,
1983 const CallEvent *Call) const {
1984 CStringLengthTy Entries = state->get<CStringLength>();
1985 if (Entries.isEmpty())
1986 return state;
1987
1988 llvm::SmallPtrSet<const MemRegion *, 8> Invalidated;
1989 llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions;
1990
1991 // First build sets for the changed regions and their super-regions.
1992 for (ArrayRef<const MemRegion *>::iterator
1993 I = Regions.begin(), E = Regions.end(); I != E; ++I) {
1994 const MemRegion *MR = *I;
1995 Invalidated.insert(MR);
1996
1997 SuperRegions.insert(MR);
1998 while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) {
1999 MR = SR->getSuperRegion();
2000 SuperRegions.insert(MR);
2001 }
2002 }
2003
2004 CStringLengthTy::Factory &F = state->get_context<CStringLength>();
2005
2006 // Then loop over the entries in the current state.
2007 for (CStringLengthTy::iterator I = Entries.begin(),
2008 E = Entries.end(); I != E; ++I) {
2009 const MemRegion *MR = I.getKey();
2010
2011 // Is this entry for a super-region of a changed region?
2012 if (SuperRegions.count(MR)) {
2013 Entries = F.remove(Entries, MR);
2014 continue;
2015 }
2016
2017 // Is this entry for a sub-region of a changed region?
2018 const MemRegion *Super = MR;
2019 while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) {
2020 Super = SR->getSuperRegion();
2021 if (Invalidated.count(Super)) {
2022 Entries = F.remove(Entries, MR);
2023 break;
2024 }
2025 }
2026 }
2027
2028 return state->set<CStringLength>(Entries);
2029 }
2030
checkLiveSymbols(ProgramStateRef state,SymbolReaper & SR) const2031 void CStringChecker::checkLiveSymbols(ProgramStateRef state,
2032 SymbolReaper &SR) const {
2033 // Mark all symbols in our string length map as valid.
2034 CStringLengthTy Entries = state->get<CStringLength>();
2035
2036 for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end();
2037 I != E; ++I) {
2038 SVal Len = I.getData();
2039
2040 for (SymExpr::symbol_iterator si = Len.symbol_begin(),
2041 se = Len.symbol_end(); si != se; ++si)
2042 SR.markInUse(*si);
2043 }
2044 }
2045
checkDeadSymbols(SymbolReaper & SR,CheckerContext & C) const2046 void CStringChecker::checkDeadSymbols(SymbolReaper &SR,
2047 CheckerContext &C) const {
2048 if (!SR.hasDeadSymbols())
2049 return;
2050
2051 ProgramStateRef state = C.getState();
2052 CStringLengthTy Entries = state->get<CStringLength>();
2053 if (Entries.isEmpty())
2054 return;
2055
2056 CStringLengthTy::Factory &F = state->get_context<CStringLength>();
2057 for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end();
2058 I != E; ++I) {
2059 SVal Len = I.getData();
2060 if (SymbolRef Sym = Len.getAsSymbol()) {
2061 if (SR.isDead(Sym))
2062 Entries = F.remove(Entries, I.getKey());
2063 }
2064 }
2065
2066 state = state->set<CStringLength>(Entries);
2067 C.addTransition(state);
2068 }
2069
2070 #define REGISTER_CHECKER(name) \
2071 void ento::register##name(CheckerManager &mgr) { \
2072 CStringChecker *checker = mgr.registerChecker<CStringChecker>(); \
2073 checker->Filter.Check##name = true; \
2074 checker->Filter.CheckName##name = mgr.getCurrentCheckName(); \
2075 }
2076
2077 REGISTER_CHECKER(CStringNullArg)
REGISTER_CHECKER(CStringOutOfBounds)2078 REGISTER_CHECKER(CStringOutOfBounds)
2079 REGISTER_CHECKER(CStringBufferOverlap)
2080 REGISTER_CHECKER(CStringNotNullTerm)
2081
2082 void ento::registerCStringCheckerBasic(CheckerManager &Mgr) {
2083 registerCStringNullArg(Mgr);
2084 }
2085