1 //===--- Format.cpp - Format C++ code -------------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements functions declared in Format.h. This will be
12 /// split into separate files as we go.
13 ///
14 //===----------------------------------------------------------------------===//
15
16 #include "ContinuationIndenter.h"
17 #include "TokenAnnotator.h"
18 #include "UnwrappedLineFormatter.h"
19 #include "UnwrappedLineParser.h"
20 #include "WhitespaceManager.h"
21 #include "clang/Basic/Diagnostic.h"
22 #include "clang/Basic/DiagnosticOptions.h"
23 #include "clang/Basic/SourceManager.h"
24 #include "clang/Format/Format.h"
25 #include "clang/Lex/Lexer.h"
26 #include "llvm/ADT/STLExtras.h"
27 #include "llvm/Support/Allocator.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/Path.h"
30 #include "llvm/Support/Regex.h"
31 #include "llvm/Support/YAMLTraits.h"
32 #include <queue>
33 #include <string>
34
35 #define DEBUG_TYPE "format-formatter"
36
37 using clang::format::FormatStyle;
38
39 LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string)
40
41 namespace llvm {
42 namespace yaml {
43 template <> struct ScalarEnumerationTraits<FormatStyle::LanguageKind> {
enumerationllvm::yaml::ScalarEnumerationTraits44 static void enumeration(IO &IO, FormatStyle::LanguageKind &Value) {
45 IO.enumCase(Value, "Cpp", FormatStyle::LK_Cpp);
46 IO.enumCase(Value, "Java", FormatStyle::LK_Java);
47 IO.enumCase(Value, "JavaScript", FormatStyle::LK_JavaScript);
48 IO.enumCase(Value, "Proto", FormatStyle::LK_Proto);
49 }
50 };
51
52 template <> struct ScalarEnumerationTraits<FormatStyle::LanguageStandard> {
enumerationllvm::yaml::ScalarEnumerationTraits53 static void enumeration(IO &IO, FormatStyle::LanguageStandard &Value) {
54 IO.enumCase(Value, "Cpp03", FormatStyle::LS_Cpp03);
55 IO.enumCase(Value, "C++03", FormatStyle::LS_Cpp03);
56 IO.enumCase(Value, "Cpp11", FormatStyle::LS_Cpp11);
57 IO.enumCase(Value, "C++11", FormatStyle::LS_Cpp11);
58 IO.enumCase(Value, "Auto", FormatStyle::LS_Auto);
59 }
60 };
61
62 template <> struct ScalarEnumerationTraits<FormatStyle::UseTabStyle> {
enumerationllvm::yaml::ScalarEnumerationTraits63 static void enumeration(IO &IO, FormatStyle::UseTabStyle &Value) {
64 IO.enumCase(Value, "Never", FormatStyle::UT_Never);
65 IO.enumCase(Value, "false", FormatStyle::UT_Never);
66 IO.enumCase(Value, "Always", FormatStyle::UT_Always);
67 IO.enumCase(Value, "true", FormatStyle::UT_Always);
68 IO.enumCase(Value, "ForIndentation", FormatStyle::UT_ForIndentation);
69 }
70 };
71
72 template <> struct ScalarEnumerationTraits<FormatStyle::ShortFunctionStyle> {
enumerationllvm::yaml::ScalarEnumerationTraits73 static void enumeration(IO &IO, FormatStyle::ShortFunctionStyle &Value) {
74 IO.enumCase(Value, "None", FormatStyle::SFS_None);
75 IO.enumCase(Value, "false", FormatStyle::SFS_None);
76 IO.enumCase(Value, "All", FormatStyle::SFS_All);
77 IO.enumCase(Value, "true", FormatStyle::SFS_All);
78 IO.enumCase(Value, "Inline", FormatStyle::SFS_Inline);
79 IO.enumCase(Value, "Empty", FormatStyle::SFS_Empty);
80 }
81 };
82
83 template <> struct ScalarEnumerationTraits<FormatStyle::BinaryOperatorStyle> {
enumerationllvm::yaml::ScalarEnumerationTraits84 static void enumeration(IO &IO, FormatStyle::BinaryOperatorStyle &Value) {
85 IO.enumCase(Value, "All", FormatStyle::BOS_All);
86 IO.enumCase(Value, "true", FormatStyle::BOS_All);
87 IO.enumCase(Value, "None", FormatStyle::BOS_None);
88 IO.enumCase(Value, "false", FormatStyle::BOS_None);
89 IO.enumCase(Value, "NonAssignment", FormatStyle::BOS_NonAssignment);
90 }
91 };
92
93 template <> struct ScalarEnumerationTraits<FormatStyle::BraceBreakingStyle> {
enumerationllvm::yaml::ScalarEnumerationTraits94 static void enumeration(IO &IO, FormatStyle::BraceBreakingStyle &Value) {
95 IO.enumCase(Value, "Attach", FormatStyle::BS_Attach);
96 IO.enumCase(Value, "Linux", FormatStyle::BS_Linux);
97 IO.enumCase(Value, "Mozilla", FormatStyle::BS_Mozilla);
98 IO.enumCase(Value, "Stroustrup", FormatStyle::BS_Stroustrup);
99 IO.enumCase(Value, "Allman", FormatStyle::BS_Allman);
100 IO.enumCase(Value, "GNU", FormatStyle::BS_GNU);
101 }
102 };
103
104 template <> struct ScalarEnumerationTraits<FormatStyle::DefinitionReturnTypeBreakingStyle> {
enumerationllvm::yaml::ScalarEnumerationTraits105 static void enumeration(IO &IO, FormatStyle::DefinitionReturnTypeBreakingStyle &Value) {
106 IO.enumCase(Value, "None", FormatStyle::DRTBS_None);
107 IO.enumCase(Value, "All", FormatStyle::DRTBS_All);
108 IO.enumCase(Value, "TopLevel", FormatStyle::DRTBS_TopLevel);
109
110 // For backward compatibility.
111 IO.enumCase(Value, "false", FormatStyle::DRTBS_None);
112 IO.enumCase(Value, "true", FormatStyle::DRTBS_All);
113 }
114 };
115
116 template <>
117 struct ScalarEnumerationTraits<FormatStyle::NamespaceIndentationKind> {
enumerationllvm::yaml::ScalarEnumerationTraits118 static void enumeration(IO &IO,
119 FormatStyle::NamespaceIndentationKind &Value) {
120 IO.enumCase(Value, "None", FormatStyle::NI_None);
121 IO.enumCase(Value, "Inner", FormatStyle::NI_Inner);
122 IO.enumCase(Value, "All", FormatStyle::NI_All);
123 }
124 };
125
126 template <> struct ScalarEnumerationTraits<FormatStyle::PointerAlignmentStyle> {
enumerationllvm::yaml::ScalarEnumerationTraits127 static void enumeration(IO &IO, FormatStyle::PointerAlignmentStyle &Value) {
128 IO.enumCase(Value, "Middle", FormatStyle::PAS_Middle);
129 IO.enumCase(Value, "Left", FormatStyle::PAS_Left);
130 IO.enumCase(Value, "Right", FormatStyle::PAS_Right);
131
132 // For backward compatibility.
133 IO.enumCase(Value, "true", FormatStyle::PAS_Left);
134 IO.enumCase(Value, "false", FormatStyle::PAS_Right);
135 }
136 };
137
138 template <>
139 struct ScalarEnumerationTraits<FormatStyle::SpaceBeforeParensOptions> {
enumerationllvm::yaml::ScalarEnumerationTraits140 static void enumeration(IO &IO,
141 FormatStyle::SpaceBeforeParensOptions &Value) {
142 IO.enumCase(Value, "Never", FormatStyle::SBPO_Never);
143 IO.enumCase(Value, "ControlStatements",
144 FormatStyle::SBPO_ControlStatements);
145 IO.enumCase(Value, "Always", FormatStyle::SBPO_Always);
146
147 // For backward compatibility.
148 IO.enumCase(Value, "false", FormatStyle::SBPO_Never);
149 IO.enumCase(Value, "true", FormatStyle::SBPO_ControlStatements);
150 }
151 };
152
153 template <> struct MappingTraits<FormatStyle> {
mappingllvm::yaml::MappingTraits154 static void mapping(IO &IO, FormatStyle &Style) {
155 // When reading, read the language first, we need it for getPredefinedStyle.
156 IO.mapOptional("Language", Style.Language);
157
158 if (IO.outputting()) {
159 StringRef StylesArray[] = {"LLVM", "Google", "Chromium",
160 "Mozilla", "WebKit", "GNU"};
161 ArrayRef<StringRef> Styles(StylesArray);
162 for (size_t i = 0, e = Styles.size(); i < e; ++i) {
163 StringRef StyleName(Styles[i]);
164 FormatStyle PredefinedStyle;
165 if (getPredefinedStyle(StyleName, Style.Language, &PredefinedStyle) &&
166 Style == PredefinedStyle) {
167 IO.mapOptional("# BasedOnStyle", StyleName);
168 break;
169 }
170 }
171 } else {
172 StringRef BasedOnStyle;
173 IO.mapOptional("BasedOnStyle", BasedOnStyle);
174 if (!BasedOnStyle.empty()) {
175 FormatStyle::LanguageKind OldLanguage = Style.Language;
176 FormatStyle::LanguageKind Language =
177 ((FormatStyle *)IO.getContext())->Language;
178 if (!getPredefinedStyle(BasedOnStyle, Language, &Style)) {
179 IO.setError(Twine("Unknown value for BasedOnStyle: ", BasedOnStyle));
180 return;
181 }
182 Style.Language = OldLanguage;
183 }
184 }
185
186 // For backward compatibility.
187 if (!IO.outputting()) {
188 IO.mapOptional("DerivePointerBinding", Style.DerivePointerAlignment);
189 IO.mapOptional("IndentFunctionDeclarationAfterType",
190 Style.IndentWrappedFunctionNames);
191 IO.mapOptional("PointerBindsToType", Style.PointerAlignment);
192 IO.mapOptional("SpaceAfterControlStatementKeyword",
193 Style.SpaceBeforeParens);
194 }
195
196 IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset);
197 IO.mapOptional("AlignAfterOpenBracket", Style.AlignAfterOpenBracket);
198 IO.mapOptional("AlignConsecutiveAssignments",
199 Style.AlignConsecutiveAssignments);
200 IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft);
201 IO.mapOptional("AlignOperands", Style.AlignOperands);
202 IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments);
203 IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine",
204 Style.AllowAllParametersOfDeclarationOnNextLine);
205 IO.mapOptional("AllowShortBlocksOnASingleLine",
206 Style.AllowShortBlocksOnASingleLine);
207 IO.mapOptional("AllowShortCaseLabelsOnASingleLine",
208 Style.AllowShortCaseLabelsOnASingleLine);
209 IO.mapOptional("AllowShortFunctionsOnASingleLine",
210 Style.AllowShortFunctionsOnASingleLine);
211 IO.mapOptional("AllowShortIfStatementsOnASingleLine",
212 Style.AllowShortIfStatementsOnASingleLine);
213 IO.mapOptional("AllowShortLoopsOnASingleLine",
214 Style.AllowShortLoopsOnASingleLine);
215 IO.mapOptional("AlwaysBreakAfterDefinitionReturnType",
216 Style.AlwaysBreakAfterDefinitionReturnType);
217 IO.mapOptional("AlwaysBreakBeforeMultilineStrings",
218 Style.AlwaysBreakBeforeMultilineStrings);
219 IO.mapOptional("AlwaysBreakTemplateDeclarations",
220 Style.AlwaysBreakTemplateDeclarations);
221 IO.mapOptional("BinPackArguments", Style.BinPackArguments);
222 IO.mapOptional("BinPackParameters", Style.BinPackParameters);
223 IO.mapOptional("BreakBeforeBinaryOperators",
224 Style.BreakBeforeBinaryOperators);
225 IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces);
226 IO.mapOptional("BreakBeforeTernaryOperators",
227 Style.BreakBeforeTernaryOperators);
228 IO.mapOptional("BreakConstructorInitializersBeforeComma",
229 Style.BreakConstructorInitializersBeforeComma);
230 IO.mapOptional("ColumnLimit", Style.ColumnLimit);
231 IO.mapOptional("CommentPragmas", Style.CommentPragmas);
232 IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine",
233 Style.ConstructorInitializerAllOnOneLineOrOnePerLine);
234 IO.mapOptional("ConstructorInitializerIndentWidth",
235 Style.ConstructorInitializerIndentWidth);
236 IO.mapOptional("ContinuationIndentWidth", Style.ContinuationIndentWidth);
237 IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle);
238 IO.mapOptional("DerivePointerAlignment", Style.DerivePointerAlignment);
239 IO.mapOptional("DisableFormat", Style.DisableFormat);
240 IO.mapOptional("ExperimentalAutoDetectBinPacking",
241 Style.ExperimentalAutoDetectBinPacking);
242 IO.mapOptional("ForEachMacros", Style.ForEachMacros);
243 IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels);
244 IO.mapOptional("IndentWidth", Style.IndentWidth);
245 IO.mapOptional("IndentWrappedFunctionNames",
246 Style.IndentWrappedFunctionNames);
247 IO.mapOptional("KeepEmptyLinesAtTheStartOfBlocks",
248 Style.KeepEmptyLinesAtTheStartOfBlocks);
249 IO.mapOptional("MacroBlockBegin", Style.MacroBlockBegin);
250 IO.mapOptional("MacroBlockEnd", Style.MacroBlockEnd);
251 IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep);
252 IO.mapOptional("NamespaceIndentation", Style.NamespaceIndentation);
253 IO.mapOptional("ObjCBlockIndentWidth", Style.ObjCBlockIndentWidth);
254 IO.mapOptional("ObjCSpaceAfterProperty", Style.ObjCSpaceAfterProperty);
255 IO.mapOptional("ObjCSpaceBeforeProtocolList",
256 Style.ObjCSpaceBeforeProtocolList);
257 IO.mapOptional("PenaltyBreakBeforeFirstCallParameter",
258 Style.PenaltyBreakBeforeFirstCallParameter);
259 IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment);
260 IO.mapOptional("PenaltyBreakFirstLessLess",
261 Style.PenaltyBreakFirstLessLess);
262 IO.mapOptional("PenaltyBreakString", Style.PenaltyBreakString);
263 IO.mapOptional("PenaltyExcessCharacter", Style.PenaltyExcessCharacter);
264 IO.mapOptional("PenaltyReturnTypeOnItsOwnLine",
265 Style.PenaltyReturnTypeOnItsOwnLine);
266 IO.mapOptional("PointerAlignment", Style.PointerAlignment);
267 IO.mapOptional("SpaceAfterCStyleCast", Style.SpaceAfterCStyleCast);
268 IO.mapOptional("SpaceBeforeAssignmentOperators",
269 Style.SpaceBeforeAssignmentOperators);
270 IO.mapOptional("SpaceBeforeParens", Style.SpaceBeforeParens);
271 IO.mapOptional("SpaceInEmptyParentheses", Style.SpaceInEmptyParentheses);
272 IO.mapOptional("SpacesBeforeTrailingComments",
273 Style.SpacesBeforeTrailingComments);
274 IO.mapOptional("SpacesInAngles", Style.SpacesInAngles);
275 IO.mapOptional("SpacesInContainerLiterals",
276 Style.SpacesInContainerLiterals);
277 IO.mapOptional("SpacesInCStyleCastParentheses",
278 Style.SpacesInCStyleCastParentheses);
279 IO.mapOptional("SpacesInParentheses", Style.SpacesInParentheses);
280 IO.mapOptional("SpacesInSquareBrackets", Style.SpacesInSquareBrackets);
281 IO.mapOptional("Standard", Style.Standard);
282 IO.mapOptional("TabWidth", Style.TabWidth);
283 IO.mapOptional("UseTab", Style.UseTab);
284 }
285 };
286
287 // Allows to read vector<FormatStyle> while keeping default values.
288 // IO.getContext() should contain a pointer to the FormatStyle structure, that
289 // will be used to get default values for missing keys.
290 // If the first element has no Language specified, it will be treated as the
291 // default one for the following elements.
292 template <> struct DocumentListTraits<std::vector<FormatStyle>> {
sizellvm::yaml::DocumentListTraits293 static size_t size(IO &IO, std::vector<FormatStyle> &Seq) {
294 return Seq.size();
295 }
elementllvm::yaml::DocumentListTraits296 static FormatStyle &element(IO &IO, std::vector<FormatStyle> &Seq,
297 size_t Index) {
298 if (Index >= Seq.size()) {
299 assert(Index == Seq.size());
300 FormatStyle Template;
301 if (Seq.size() > 0 && Seq[0].Language == FormatStyle::LK_None) {
302 Template = Seq[0];
303 } else {
304 Template = *((const FormatStyle *)IO.getContext());
305 Template.Language = FormatStyle::LK_None;
306 }
307 Seq.resize(Index + 1, Template);
308 }
309 return Seq[Index];
310 }
311 };
312 }
313 }
314
315 namespace clang {
316 namespace format {
317
getParseCategory()318 const std::error_category &getParseCategory() {
319 static ParseErrorCategory C;
320 return C;
321 }
make_error_code(ParseError e)322 std::error_code make_error_code(ParseError e) {
323 return std::error_code(static_cast<int>(e), getParseCategory());
324 }
325
name() const326 const char *ParseErrorCategory::name() const LLVM_NOEXCEPT {
327 return "clang-format.parse_error";
328 }
329
message(int EV) const330 std::string ParseErrorCategory::message(int EV) const {
331 switch (static_cast<ParseError>(EV)) {
332 case ParseError::Success:
333 return "Success";
334 case ParseError::Error:
335 return "Invalid argument";
336 case ParseError::Unsuitable:
337 return "Unsuitable";
338 }
339 llvm_unreachable("unexpected parse error");
340 }
341
getLLVMStyle()342 FormatStyle getLLVMStyle() {
343 FormatStyle LLVMStyle;
344 LLVMStyle.Language = FormatStyle::LK_Cpp;
345 LLVMStyle.AccessModifierOffset = -2;
346 LLVMStyle.AlignEscapedNewlinesLeft = false;
347 LLVMStyle.AlignAfterOpenBracket = true;
348 LLVMStyle.AlignOperands = true;
349 LLVMStyle.AlignTrailingComments = true;
350 LLVMStyle.AlignConsecutiveAssignments = false;
351 LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true;
352 LLVMStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_All;
353 LLVMStyle.AllowShortBlocksOnASingleLine = false;
354 LLVMStyle.AllowShortCaseLabelsOnASingleLine = false;
355 LLVMStyle.AllowShortIfStatementsOnASingleLine = false;
356 LLVMStyle.AllowShortLoopsOnASingleLine = false;
357 LLVMStyle.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_None;
358 LLVMStyle.AlwaysBreakBeforeMultilineStrings = false;
359 LLVMStyle.AlwaysBreakTemplateDeclarations = false;
360 LLVMStyle.BinPackParameters = true;
361 LLVMStyle.BinPackArguments = true;
362 LLVMStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_None;
363 LLVMStyle.BreakBeforeTernaryOperators = true;
364 LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach;
365 LLVMStyle.BreakConstructorInitializersBeforeComma = false;
366 LLVMStyle.ColumnLimit = 80;
367 LLVMStyle.CommentPragmas = "^ IWYU pragma:";
368 LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false;
369 LLVMStyle.ConstructorInitializerIndentWidth = 4;
370 LLVMStyle.ContinuationIndentWidth = 4;
371 LLVMStyle.Cpp11BracedListStyle = true;
372 LLVMStyle.DerivePointerAlignment = false;
373 LLVMStyle.ExperimentalAutoDetectBinPacking = false;
374 LLVMStyle.ForEachMacros.push_back("foreach");
375 LLVMStyle.ForEachMacros.push_back("Q_FOREACH");
376 LLVMStyle.ForEachMacros.push_back("BOOST_FOREACH");
377 LLVMStyle.IndentCaseLabels = false;
378 LLVMStyle.IndentWrappedFunctionNames = false;
379 LLVMStyle.IndentWidth = 2;
380 LLVMStyle.TabWidth = 8;
381 LLVMStyle.MaxEmptyLinesToKeep = 1;
382 LLVMStyle.KeepEmptyLinesAtTheStartOfBlocks = true;
383 LLVMStyle.NamespaceIndentation = FormatStyle::NI_None;
384 LLVMStyle.ObjCBlockIndentWidth = 2;
385 LLVMStyle.ObjCSpaceAfterProperty = false;
386 LLVMStyle.ObjCSpaceBeforeProtocolList = true;
387 LLVMStyle.PointerAlignment = FormatStyle::PAS_Right;
388 LLVMStyle.SpacesBeforeTrailingComments = 1;
389 LLVMStyle.Standard = FormatStyle::LS_Cpp11;
390 LLVMStyle.UseTab = FormatStyle::UT_Never;
391 LLVMStyle.SpacesInParentheses = false;
392 LLVMStyle.SpacesInSquareBrackets = false;
393 LLVMStyle.SpaceInEmptyParentheses = false;
394 LLVMStyle.SpacesInContainerLiterals = true;
395 LLVMStyle.SpacesInCStyleCastParentheses = false;
396 LLVMStyle.SpaceAfterCStyleCast = false;
397 LLVMStyle.SpaceBeforeParens = FormatStyle::SBPO_ControlStatements;
398 LLVMStyle.SpaceBeforeAssignmentOperators = true;
399 LLVMStyle.SpacesInAngles = false;
400
401 LLVMStyle.PenaltyBreakComment = 300;
402 LLVMStyle.PenaltyBreakFirstLessLess = 120;
403 LLVMStyle.PenaltyBreakString = 1000;
404 LLVMStyle.PenaltyExcessCharacter = 1000000;
405 LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 60;
406 LLVMStyle.PenaltyBreakBeforeFirstCallParameter = 19;
407
408 LLVMStyle.DisableFormat = false;
409
410 return LLVMStyle;
411 }
412
getGoogleStyle(FormatStyle::LanguageKind Language)413 FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) {
414 FormatStyle GoogleStyle = getLLVMStyle();
415 GoogleStyle.Language = Language;
416
417 GoogleStyle.AccessModifierOffset = -1;
418 GoogleStyle.AlignEscapedNewlinesLeft = true;
419 GoogleStyle.AllowShortIfStatementsOnASingleLine = true;
420 GoogleStyle.AllowShortLoopsOnASingleLine = true;
421 GoogleStyle.AlwaysBreakBeforeMultilineStrings = true;
422 GoogleStyle.AlwaysBreakTemplateDeclarations = true;
423 GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
424 GoogleStyle.DerivePointerAlignment = true;
425 GoogleStyle.IndentCaseLabels = true;
426 GoogleStyle.KeepEmptyLinesAtTheStartOfBlocks = false;
427 GoogleStyle.ObjCSpaceAfterProperty = false;
428 GoogleStyle.ObjCSpaceBeforeProtocolList = false;
429 GoogleStyle.PointerAlignment = FormatStyle::PAS_Left;
430 GoogleStyle.SpacesBeforeTrailingComments = 2;
431 GoogleStyle.Standard = FormatStyle::LS_Auto;
432
433 GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200;
434 GoogleStyle.PenaltyBreakBeforeFirstCallParameter = 1;
435
436 if (Language == FormatStyle::LK_Java) {
437 GoogleStyle.AlignAfterOpenBracket = false;
438 GoogleStyle.AlignOperands = false;
439 GoogleStyle.AlignTrailingComments = false;
440 GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Empty;
441 GoogleStyle.AllowShortIfStatementsOnASingleLine = false;
442 GoogleStyle.AlwaysBreakBeforeMultilineStrings = false;
443 GoogleStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_NonAssignment;
444 GoogleStyle.ColumnLimit = 100;
445 GoogleStyle.SpaceAfterCStyleCast = true;
446 GoogleStyle.SpacesBeforeTrailingComments = 1;
447 } else if (Language == FormatStyle::LK_JavaScript) {
448 GoogleStyle.BreakBeforeTernaryOperators = false;
449 GoogleStyle.MaxEmptyLinesToKeep = 3;
450 GoogleStyle.SpacesInContainerLiterals = false;
451 GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline;
452 GoogleStyle.AlwaysBreakBeforeMultilineStrings = false;
453 } else if (Language == FormatStyle::LK_Proto) {
454 GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_None;
455 GoogleStyle.SpacesInContainerLiterals = false;
456 }
457
458 return GoogleStyle;
459 }
460
getChromiumStyle(FormatStyle::LanguageKind Language)461 FormatStyle getChromiumStyle(FormatStyle::LanguageKind Language) {
462 FormatStyle ChromiumStyle = getGoogleStyle(Language);
463 if (Language == FormatStyle::LK_Java) {
464 ChromiumStyle.AllowShortIfStatementsOnASingleLine = true;
465 ChromiumStyle.IndentWidth = 4;
466 ChromiumStyle.ContinuationIndentWidth = 8;
467 } else {
468 ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false;
469 ChromiumStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline;
470 ChromiumStyle.AllowShortIfStatementsOnASingleLine = false;
471 ChromiumStyle.AllowShortLoopsOnASingleLine = false;
472 ChromiumStyle.BinPackParameters = false;
473 ChromiumStyle.DerivePointerAlignment = false;
474 }
475 ChromiumStyle.MacroBlockBegin = "^IPC_BEGIN_MESSAGE_MAP$";
476 ChromiumStyle.MacroBlockBegin = "^IPC_END_MESSAGE_MAP$";
477 return ChromiumStyle;
478 }
479
getMozillaStyle()480 FormatStyle getMozillaStyle() {
481 FormatStyle MozillaStyle = getLLVMStyle();
482 MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false;
483 MozillaStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline;
484 MozillaStyle.AlwaysBreakAfterDefinitionReturnType =
485 FormatStyle::DRTBS_TopLevel;
486 MozillaStyle.AlwaysBreakTemplateDeclarations = true;
487 MozillaStyle.BreakBeforeBraces = FormatStyle::BS_Mozilla;
488 MozillaStyle.BreakConstructorInitializersBeforeComma = true;
489 MozillaStyle.ConstructorInitializerIndentWidth = 2;
490 MozillaStyle.ContinuationIndentWidth = 2;
491 MozillaStyle.Cpp11BracedListStyle = false;
492 MozillaStyle.IndentCaseLabels = true;
493 MozillaStyle.ObjCSpaceAfterProperty = true;
494 MozillaStyle.ObjCSpaceBeforeProtocolList = false;
495 MozillaStyle.PenaltyReturnTypeOnItsOwnLine = 200;
496 MozillaStyle.PointerAlignment = FormatStyle::PAS_Left;
497 return MozillaStyle;
498 }
499
getWebKitStyle()500 FormatStyle getWebKitStyle() {
501 FormatStyle Style = getLLVMStyle();
502 Style.AccessModifierOffset = -4;
503 Style.AlignAfterOpenBracket = false;
504 Style.AlignOperands = false;
505 Style.AlignTrailingComments = false;
506 Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All;
507 Style.BreakBeforeBraces = FormatStyle::BS_Stroustrup;
508 Style.BreakConstructorInitializersBeforeComma = true;
509 Style.Cpp11BracedListStyle = false;
510 Style.ColumnLimit = 0;
511 Style.IndentWidth = 4;
512 Style.NamespaceIndentation = FormatStyle::NI_Inner;
513 Style.ObjCBlockIndentWidth = 4;
514 Style.ObjCSpaceAfterProperty = true;
515 Style.PointerAlignment = FormatStyle::PAS_Left;
516 Style.Standard = FormatStyle::LS_Cpp03;
517 return Style;
518 }
519
getGNUStyle()520 FormatStyle getGNUStyle() {
521 FormatStyle Style = getLLVMStyle();
522 Style.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_All;
523 Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All;
524 Style.BreakBeforeBraces = FormatStyle::BS_GNU;
525 Style.BreakBeforeTernaryOperators = true;
526 Style.Cpp11BracedListStyle = false;
527 Style.ColumnLimit = 79;
528 Style.SpaceBeforeParens = FormatStyle::SBPO_Always;
529 Style.Standard = FormatStyle::LS_Cpp03;
530 return Style;
531 }
532
getNoStyle()533 FormatStyle getNoStyle() {
534 FormatStyle NoStyle = getLLVMStyle();
535 NoStyle.DisableFormat = true;
536 return NoStyle;
537 }
538
getPredefinedStyle(StringRef Name,FormatStyle::LanguageKind Language,FormatStyle * Style)539 bool getPredefinedStyle(StringRef Name, FormatStyle::LanguageKind Language,
540 FormatStyle *Style) {
541 if (Name.equals_lower("llvm")) {
542 *Style = getLLVMStyle();
543 } else if (Name.equals_lower("chromium")) {
544 *Style = getChromiumStyle(Language);
545 } else if (Name.equals_lower("mozilla")) {
546 *Style = getMozillaStyle();
547 } else if (Name.equals_lower("google")) {
548 *Style = getGoogleStyle(Language);
549 } else if (Name.equals_lower("webkit")) {
550 *Style = getWebKitStyle();
551 } else if (Name.equals_lower("gnu")) {
552 *Style = getGNUStyle();
553 } else if (Name.equals_lower("none")) {
554 *Style = getNoStyle();
555 } else {
556 return false;
557 }
558
559 Style->Language = Language;
560 return true;
561 }
562
parseConfiguration(StringRef Text,FormatStyle * Style)563 std::error_code parseConfiguration(StringRef Text, FormatStyle *Style) {
564 assert(Style);
565 FormatStyle::LanguageKind Language = Style->Language;
566 assert(Language != FormatStyle::LK_None);
567 if (Text.trim().empty())
568 return make_error_code(ParseError::Error);
569
570 std::vector<FormatStyle> Styles;
571 llvm::yaml::Input Input(Text);
572 // DocumentListTraits<vector<FormatStyle>> uses the context to get default
573 // values for the fields, keys for which are missing from the configuration.
574 // Mapping also uses the context to get the language to find the correct
575 // base style.
576 Input.setContext(Style);
577 Input >> Styles;
578 if (Input.error())
579 return Input.error();
580
581 for (unsigned i = 0; i < Styles.size(); ++i) {
582 // Ensures that only the first configuration can skip the Language option.
583 if (Styles[i].Language == FormatStyle::LK_None && i != 0)
584 return make_error_code(ParseError::Error);
585 // Ensure that each language is configured at most once.
586 for (unsigned j = 0; j < i; ++j) {
587 if (Styles[i].Language == Styles[j].Language) {
588 DEBUG(llvm::dbgs()
589 << "Duplicate languages in the config file on positions " << j
590 << " and " << i << "\n");
591 return make_error_code(ParseError::Error);
592 }
593 }
594 }
595 // Look for a suitable configuration starting from the end, so we can
596 // find the configuration for the specific language first, and the default
597 // configuration (which can only be at slot 0) after it.
598 for (int i = Styles.size() - 1; i >= 0; --i) {
599 if (Styles[i].Language == Language ||
600 Styles[i].Language == FormatStyle::LK_None) {
601 *Style = Styles[i];
602 Style->Language = Language;
603 return make_error_code(ParseError::Success);
604 }
605 }
606 return make_error_code(ParseError::Unsuitable);
607 }
608
configurationAsText(const FormatStyle & Style)609 std::string configurationAsText(const FormatStyle &Style) {
610 std::string Text;
611 llvm::raw_string_ostream Stream(Text);
612 llvm::yaml::Output Output(Stream);
613 // We use the same mapping method for input and output, so we need a non-const
614 // reference here.
615 FormatStyle NonConstStyle = Style;
616 Output << NonConstStyle;
617 return Stream.str();
618 }
619
620 namespace {
621
622 class FormatTokenLexer {
623 public:
FormatTokenLexer(SourceManager & SourceMgr,FileID ID,FormatStyle & Style,encoding::Encoding Encoding)624 FormatTokenLexer(SourceManager &SourceMgr, FileID ID, FormatStyle &Style,
625 encoding::Encoding Encoding)
626 : FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false),
627 LessStashed(false), Column(0), TrailingWhitespace(0),
628 SourceMgr(SourceMgr), ID(ID), Style(Style),
629 IdentTable(getFormattingLangOpts(Style)), Keywords(IdentTable),
630 Encoding(Encoding), FirstInLineIndex(0), FormattingDisabled(false),
631 MacroBlockBeginRegex(Style.MacroBlockBegin),
632 MacroBlockEndRegex(Style.MacroBlockEnd) {
633 Lex.reset(new Lexer(ID, SourceMgr.getBuffer(ID), SourceMgr,
634 getFormattingLangOpts(Style)));
635 Lex->SetKeepWhitespaceMode(true);
636
637 for (const std::string &ForEachMacro : Style.ForEachMacros)
638 ForEachMacros.push_back(&IdentTable.get(ForEachMacro));
639 std::sort(ForEachMacros.begin(), ForEachMacros.end());
640 }
641
lex()642 ArrayRef<FormatToken *> lex() {
643 assert(Tokens.empty());
644 assert(FirstInLineIndex == 0);
645 do {
646 Tokens.push_back(getNextToken());
647 tryMergePreviousTokens();
648 if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
649 FirstInLineIndex = Tokens.size() - 1;
650 } while (Tokens.back()->Tok.isNot(tok::eof));
651 return Tokens;
652 }
653
getKeywords()654 const AdditionalKeywords &getKeywords() { return Keywords; }
655
656 private:
tryMergePreviousTokens()657 void tryMergePreviousTokens() {
658 if (tryMerge_TMacro())
659 return;
660 if (tryMergeConflictMarkers())
661 return;
662 if (tryMergeLessLess())
663 return;
664
665 if (Style.Language == FormatStyle::LK_JavaScript) {
666 if (tryMergeJSRegexLiteral())
667 return;
668 if (tryMergeEscapeSequence())
669 return;
670 if (tryMergeTemplateString())
671 return;
672
673 static const tok::TokenKind JSIdentity[] = {tok::equalequal, tok::equal};
674 static const tok::TokenKind JSNotIdentity[] = {tok::exclaimequal,
675 tok::equal};
676 static const tok::TokenKind JSShiftEqual[] = {tok::greater, tok::greater,
677 tok::greaterequal};
678 static const tok::TokenKind JSRightArrow[] = {tok::equal, tok::greater};
679 // FIXME: Investigate what token type gives the correct operator priority.
680 if (tryMergeTokens(JSIdentity, TT_BinaryOperator))
681 return;
682 if (tryMergeTokens(JSNotIdentity, TT_BinaryOperator))
683 return;
684 if (tryMergeTokens(JSShiftEqual, TT_BinaryOperator))
685 return;
686 if (tryMergeTokens(JSRightArrow, TT_JsFatArrow))
687 return;
688 }
689 }
690
tryMergeLessLess()691 bool tryMergeLessLess() {
692 // Merge X,less,less,Y into X,lessless,Y unless X or Y is less.
693 if (Tokens.size() < 3)
694 return false;
695
696 bool FourthTokenIsLess = false;
697 if (Tokens.size() > 3)
698 FourthTokenIsLess = (Tokens.end() - 4)[0]->is(tok::less);
699
700 auto First = Tokens.end() - 3;
701 if (First[2]->is(tok::less) || First[1]->isNot(tok::less) ||
702 First[0]->isNot(tok::less) || FourthTokenIsLess)
703 return false;
704
705 // Only merge if there currently is no whitespace between the two "<".
706 if (First[1]->WhitespaceRange.getBegin() !=
707 First[1]->WhitespaceRange.getEnd())
708 return false;
709
710 First[0]->Tok.setKind(tok::lessless);
711 First[0]->TokenText = "<<";
712 First[0]->ColumnWidth += 1;
713 Tokens.erase(Tokens.end() - 2);
714 return true;
715 }
716
tryMergeTokens(ArrayRef<tok::TokenKind> Kinds,TokenType NewType)717 bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType) {
718 if (Tokens.size() < Kinds.size())
719 return false;
720
721 SmallVectorImpl<FormatToken *>::const_iterator First =
722 Tokens.end() - Kinds.size();
723 if (!First[0]->is(Kinds[0]))
724 return false;
725 unsigned AddLength = 0;
726 for (unsigned i = 1; i < Kinds.size(); ++i) {
727 if (!First[i]->is(Kinds[i]) ||
728 First[i]->WhitespaceRange.getBegin() !=
729 First[i]->WhitespaceRange.getEnd())
730 return false;
731 AddLength += First[i]->TokenText.size();
732 }
733 Tokens.resize(Tokens.size() - Kinds.size() + 1);
734 First[0]->TokenText = StringRef(First[0]->TokenText.data(),
735 First[0]->TokenText.size() + AddLength);
736 First[0]->ColumnWidth += AddLength;
737 First[0]->Type = NewType;
738 return true;
739 }
740
741 // Tries to merge an escape sequence, i.e. a "\\" and the following
742 // character. Use e.g. inside JavaScript regex literals.
tryMergeEscapeSequence()743 bool tryMergeEscapeSequence() {
744 if (Tokens.size() < 2)
745 return false;
746 FormatToken *Previous = Tokens[Tokens.size() - 2];
747 if (Previous->isNot(tok::unknown) || Previous->TokenText != "\\")
748 return false;
749 ++Previous->ColumnWidth;
750 StringRef Text = Previous->TokenText;
751 Previous->TokenText = StringRef(Text.data(), Text.size() + 1);
752 resetLexer(SourceMgr.getFileOffset(Tokens.back()->Tok.getLocation()) + 1);
753 Tokens.resize(Tokens.size() - 1);
754 Column = Previous->OriginalColumn + Previous->ColumnWidth;
755 return true;
756 }
757
758 // Try to determine whether the current token ends a JavaScript regex literal.
759 // We heuristically assume that this is a regex literal if we find two
760 // unescaped slashes on a line and the token before the first slash is one of
761 // "(;,{}![:?", a binary operator or 'return', as those cannot be followed by
762 // a division.
tryMergeJSRegexLiteral()763 bool tryMergeJSRegexLiteral() {
764 if (Tokens.size() < 2)
765 return false;
766
767 // If this is a string literal with a slash inside, compute the slash's
768 // offset and try to find the beginning of the regex literal.
769 // Also look at tok::unknown, as it can be an unterminated char literal.
770 size_t SlashInStringPos = StringRef::npos;
771 if (Tokens.back()->isOneOf(tok::string_literal, tok::char_constant,
772 tok::unknown)) {
773 // Start search from position 1 as otherwise, this is an unknown token
774 // for an unterminated /*-comment which is handled elsewhere.
775 SlashInStringPos = Tokens.back()->TokenText.find('/', 1);
776 if (SlashInStringPos == StringRef::npos)
777 return false;
778 }
779
780 // If a regex literal ends in "\//", this gets represented by an unknown
781 // token "\" and a comment.
782 bool MightEndWithEscapedSlash =
783 Tokens.back()->is(tok::comment) &&
784 Tokens.back()->TokenText.startswith("//") &&
785 Tokens[Tokens.size() - 2]->TokenText == "\\";
786 if (!MightEndWithEscapedSlash && SlashInStringPos == StringRef::npos &&
787 (Tokens.back()->isNot(tok::slash) ||
788 (Tokens[Tokens.size() - 2]->is(tok::unknown) &&
789 Tokens[Tokens.size() - 2]->TokenText == "\\")))
790 return false;
791
792 unsigned TokenCount = 0;
793 for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; ++I) {
794 ++TokenCount;
795 auto Prev = I + 1;
796 while (Prev != E && Prev[0]->is(tok::comment))
797 ++Prev;
798 if (I[0]->isOneOf(tok::slash, tok::slashequal) &&
799 (Prev == E ||
800 ((Prev[0]->isOneOf(tok::l_paren, tok::semi, tok::l_brace,
801 tok::r_brace, tok::exclaim, tok::l_square,
802 tok::colon, tok::comma, tok::question,
803 tok::kw_return) ||
804 Prev[0]->isBinaryOperator())))) {
805 unsigned LastColumn = Tokens.back()->OriginalColumn;
806 SourceLocation Loc = Tokens.back()->Tok.getLocation();
807 if (MightEndWithEscapedSlash) {
808 // This regex literal ends in '\//'. Skip past the '//' of the last
809 // token and re-start lexing from there.
810 resetLexer(SourceMgr.getFileOffset(Loc) + 2);
811 } else if (SlashInStringPos != StringRef::npos) {
812 // This regex literal ends in a string_literal with a slash inside.
813 // Calculate end column and reset lexer appropriately.
814 resetLexer(SourceMgr.getFileOffset(Loc) + SlashInStringPos + 1);
815 LastColumn += SlashInStringPos;
816 }
817 Tokens.resize(Tokens.size() - TokenCount);
818 Tokens.back()->Tok.setKind(tok::unknown);
819 Tokens.back()->Type = TT_RegexLiteral;
820 // Treat regex literals like other string_literals.
821 Tokens.back()->Tok.setKind(tok::string_literal);
822 Tokens.back()->ColumnWidth += LastColumn - I[0]->OriginalColumn;
823 return true;
824 }
825
826 // There can't be a newline inside a regex literal.
827 if (I[0]->NewlinesBefore > 0)
828 return false;
829 }
830 return false;
831 }
832
tryMergeTemplateString()833 bool tryMergeTemplateString() {
834 if (Tokens.size() < 2)
835 return false;
836
837 FormatToken *EndBacktick = Tokens.back();
838 // Backticks get lexed as tok::unknown tokens. If a template string contains
839 // a comment start, it gets lexed as a tok::comment, or tok::unknown if
840 // unterminated.
841 if (!EndBacktick->isOneOf(tok::comment, tok::string_literal,
842 tok::char_constant, tok::unknown))
843 return false;
844 size_t CommentBacktickPos = EndBacktick->TokenText.find('`');
845 // Unknown token that's not actually a backtick, or a comment that doesn't
846 // contain a backtick.
847 if (CommentBacktickPos == StringRef::npos)
848 return false;
849
850 unsigned TokenCount = 0;
851 bool IsMultiline = false;
852 unsigned EndColumnInFirstLine =
853 EndBacktick->OriginalColumn + EndBacktick->ColumnWidth;
854 for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; I++) {
855 ++TokenCount;
856 if (I[0]->IsMultiline)
857 IsMultiline = true;
858
859 // If there was a preceding template string, this must be the start of a
860 // template string, not the end.
861 if (I[0]->is(TT_TemplateString))
862 return false;
863
864 if (I[0]->isNot(tok::unknown) || I[0]->TokenText != "`") {
865 // Keep track of the rhs offset of the last token to wrap across lines -
866 // its the rhs offset of the first line of the template string, used to
867 // determine its width.
868 if (I[0]->IsMultiline)
869 EndColumnInFirstLine = I[0]->OriginalColumn + I[0]->ColumnWidth;
870 // If the token has newlines, the token before it (if it exists) is the
871 // rhs end of the previous line.
872 if (I[0]->NewlinesBefore > 0 && (I + 1 != E)) {
873 EndColumnInFirstLine = I[1]->OriginalColumn + I[1]->ColumnWidth;
874 IsMultiline = true;
875 }
876 continue;
877 }
878
879 Tokens.resize(Tokens.size() - TokenCount);
880 Tokens.back()->Type = TT_TemplateString;
881 const char *EndOffset =
882 EndBacktick->TokenText.data() + 1 + CommentBacktickPos;
883 if (CommentBacktickPos != 0) {
884 // If the backtick was not the first character (e.g. in a comment),
885 // re-lex after the backtick position.
886 SourceLocation Loc = EndBacktick->Tok.getLocation();
887 resetLexer(SourceMgr.getFileOffset(Loc) + CommentBacktickPos + 1);
888 }
889 Tokens.back()->TokenText =
890 StringRef(Tokens.back()->TokenText.data(),
891 EndOffset - Tokens.back()->TokenText.data());
892
893 unsigned EndOriginalColumn = EndBacktick->OriginalColumn;
894 if (EndOriginalColumn == 0) {
895 SourceLocation Loc = EndBacktick->Tok.getLocation();
896 EndOriginalColumn = SourceMgr.getSpellingColumnNumber(Loc);
897 }
898 // If the ` is further down within the token (e.g. in a comment).
899 EndOriginalColumn += CommentBacktickPos;
900
901 if (IsMultiline) {
902 // ColumnWidth is from backtick to last token in line.
903 // LastLineColumnWidth is 0 to backtick.
904 // x = `some content
905 // until here`;
906 Tokens.back()->ColumnWidth =
907 EndColumnInFirstLine - Tokens.back()->OriginalColumn;
908 // +1 for the ` itself.
909 Tokens.back()->LastLineColumnWidth = EndOriginalColumn + 1;
910 Tokens.back()->IsMultiline = true;
911 } else {
912 // Token simply spans from start to end, +1 for the ` itself.
913 Tokens.back()->ColumnWidth =
914 EndOriginalColumn - Tokens.back()->OriginalColumn + 1;
915 }
916 return true;
917 }
918 return false;
919 }
920
tryMerge_TMacro()921 bool tryMerge_TMacro() {
922 if (Tokens.size() < 4)
923 return false;
924 FormatToken *Last = Tokens.back();
925 if (!Last->is(tok::r_paren))
926 return false;
927
928 FormatToken *String = Tokens[Tokens.size() - 2];
929 if (!String->is(tok::string_literal) || String->IsMultiline)
930 return false;
931
932 if (!Tokens[Tokens.size() - 3]->is(tok::l_paren))
933 return false;
934
935 FormatToken *Macro = Tokens[Tokens.size() - 4];
936 if (Macro->TokenText != "_T")
937 return false;
938
939 const char *Start = Macro->TokenText.data();
940 const char *End = Last->TokenText.data() + Last->TokenText.size();
941 String->TokenText = StringRef(Start, End - Start);
942 String->IsFirst = Macro->IsFirst;
943 String->LastNewlineOffset = Macro->LastNewlineOffset;
944 String->WhitespaceRange = Macro->WhitespaceRange;
945 String->OriginalColumn = Macro->OriginalColumn;
946 String->ColumnWidth = encoding::columnWidthWithTabs(
947 String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding);
948 String->NewlinesBefore = Macro->NewlinesBefore;
949 String->HasUnescapedNewline = Macro->HasUnescapedNewline;
950
951 Tokens.pop_back();
952 Tokens.pop_back();
953 Tokens.pop_back();
954 Tokens.back() = String;
955 return true;
956 }
957
tryMergeConflictMarkers()958 bool tryMergeConflictMarkers() {
959 if (Tokens.back()->NewlinesBefore == 0 && Tokens.back()->isNot(tok::eof))
960 return false;
961
962 // Conflict lines look like:
963 // <marker> <text from the vcs>
964 // For example:
965 // >>>>>>> /file/in/file/system at revision 1234
966 //
967 // We merge all tokens in a line that starts with a conflict marker
968 // into a single token with a special token type that the unwrapped line
969 // parser will use to correctly rebuild the underlying code.
970
971 FileID ID;
972 // Get the position of the first token in the line.
973 unsigned FirstInLineOffset;
974 std::tie(ID, FirstInLineOffset) = SourceMgr.getDecomposedLoc(
975 Tokens[FirstInLineIndex]->getStartOfNonWhitespace());
976 StringRef Buffer = SourceMgr.getBuffer(ID)->getBuffer();
977 // Calculate the offset of the start of the current line.
978 auto LineOffset = Buffer.rfind('\n', FirstInLineOffset);
979 if (LineOffset == StringRef::npos) {
980 LineOffset = 0;
981 } else {
982 ++LineOffset;
983 }
984
985 auto FirstSpace = Buffer.find_first_of(" \n", LineOffset);
986 StringRef LineStart;
987 if (FirstSpace == StringRef::npos) {
988 LineStart = Buffer.substr(LineOffset);
989 } else {
990 LineStart = Buffer.substr(LineOffset, FirstSpace - LineOffset);
991 }
992
993 TokenType Type = TT_Unknown;
994 if (LineStart == "<<<<<<<" || LineStart == ">>>>") {
995 Type = TT_ConflictStart;
996 } else if (LineStart == "|||||||" || LineStart == "=======" ||
997 LineStart == "====") {
998 Type = TT_ConflictAlternative;
999 } else if (LineStart == ">>>>>>>" || LineStart == "<<<<") {
1000 Type = TT_ConflictEnd;
1001 }
1002
1003 if (Type != TT_Unknown) {
1004 FormatToken *Next = Tokens.back();
1005
1006 Tokens.resize(FirstInLineIndex + 1);
1007 // We do not need to build a complete token here, as we will skip it
1008 // during parsing anyway (as we must not touch whitespace around conflict
1009 // markers).
1010 Tokens.back()->Type = Type;
1011 Tokens.back()->Tok.setKind(tok::kw___unknown_anytype);
1012
1013 Tokens.push_back(Next);
1014 return true;
1015 }
1016
1017 return false;
1018 }
1019
getStashedToken()1020 FormatToken *getStashedToken() {
1021 // Create a synthesized second '>' or '<' token.
1022 Token Tok = FormatTok->Tok;
1023 StringRef TokenText = FormatTok->TokenText;
1024
1025 unsigned OriginalColumn = FormatTok->OriginalColumn;
1026 FormatTok = new (Allocator.Allocate()) FormatToken;
1027 FormatTok->Tok = Tok;
1028 SourceLocation TokLocation =
1029 FormatTok->Tok.getLocation().getLocWithOffset(Tok.getLength() - 1);
1030 FormatTok->Tok.setLocation(TokLocation);
1031 FormatTok->WhitespaceRange = SourceRange(TokLocation, TokLocation);
1032 FormatTok->TokenText = TokenText;
1033 FormatTok->ColumnWidth = 1;
1034 FormatTok->OriginalColumn = OriginalColumn + 1;
1035
1036 return FormatTok;
1037 }
1038
getNextToken()1039 FormatToken *getNextToken() {
1040 if (GreaterStashed) {
1041 GreaterStashed = false;
1042 return getStashedToken();
1043 }
1044 if (LessStashed) {
1045 LessStashed = false;
1046 return getStashedToken();
1047 }
1048
1049 FormatTok = new (Allocator.Allocate()) FormatToken;
1050 readRawToken(*FormatTok);
1051 SourceLocation WhitespaceStart =
1052 FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace);
1053 FormatTok->IsFirst = IsFirstToken;
1054 IsFirstToken = false;
1055
1056 // Consume and record whitespace until we find a significant token.
1057 unsigned WhitespaceLength = TrailingWhitespace;
1058 while (FormatTok->Tok.is(tok::unknown)) {
1059 StringRef Text = FormatTok->TokenText;
1060 auto EscapesNewline = [&](int pos) {
1061 // A '\r' here is just part of '\r\n'. Skip it.
1062 if (pos >= 0 && Text[pos] == '\r')
1063 --pos;
1064 // See whether there is an odd number of '\' before this.
1065 unsigned count = 0;
1066 for (; pos >= 0; --pos, ++count)
1067 if (Text[pos] != '\\')
1068 break;
1069 return count & 1;
1070 };
1071 // FIXME: This miscounts tok:unknown tokens that are not just
1072 // whitespace, e.g. a '`' character.
1073 for (int i = 0, e = Text.size(); i != e; ++i) {
1074 switch (Text[i]) {
1075 case '\n':
1076 ++FormatTok->NewlinesBefore;
1077 FormatTok->HasUnescapedNewline = !EscapesNewline(i - 1);
1078 FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
1079 Column = 0;
1080 break;
1081 case '\r':
1082 FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
1083 Column = 0;
1084 break;
1085 case '\f':
1086 case '\v':
1087 Column = 0;
1088 break;
1089 case ' ':
1090 ++Column;
1091 break;
1092 case '\t':
1093 Column += Style.TabWidth - Column % Style.TabWidth;
1094 break;
1095 case '\\':
1096 if (i + 1 == e || (Text[i + 1] != '\r' && Text[i + 1] != '\n'))
1097 FormatTok->Type = TT_ImplicitStringLiteral;
1098 break;
1099 default:
1100 FormatTok->Type = TT_ImplicitStringLiteral;
1101 break;
1102 }
1103 }
1104
1105 if (FormatTok->is(TT_ImplicitStringLiteral))
1106 break;
1107 WhitespaceLength += FormatTok->Tok.getLength();
1108
1109 readRawToken(*FormatTok);
1110 }
1111
1112 // In case the token starts with escaped newlines, we want to
1113 // take them into account as whitespace - this pattern is quite frequent
1114 // in macro definitions.
1115 // FIXME: Add a more explicit test.
1116 while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' &&
1117 FormatTok->TokenText[1] == '\n') {
1118 ++FormatTok->NewlinesBefore;
1119 WhitespaceLength += 2;
1120 FormatTok->LastNewlineOffset = 2;
1121 Column = 0;
1122 FormatTok->TokenText = FormatTok->TokenText.substr(2);
1123 }
1124
1125 FormatTok->WhitespaceRange = SourceRange(
1126 WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
1127
1128 FormatTok->OriginalColumn = Column;
1129
1130 TrailingWhitespace = 0;
1131 if (FormatTok->Tok.is(tok::comment)) {
1132 // FIXME: Add the trimmed whitespace to Column.
1133 StringRef UntrimmedText = FormatTok->TokenText;
1134 FormatTok->TokenText = FormatTok->TokenText.rtrim(" \t\v\f");
1135 TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size();
1136 } else if (FormatTok->Tok.is(tok::raw_identifier)) {
1137 IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText);
1138 FormatTok->Tok.setIdentifierInfo(&Info);
1139 FormatTok->Tok.setKind(Info.getTokenID());
1140 if (Style.Language == FormatStyle::LK_Java &&
1141 FormatTok->isOneOf(tok::kw_struct, tok::kw_union, tok::kw_delete)) {
1142 FormatTok->Tok.setKind(tok::identifier);
1143 FormatTok->Tok.setIdentifierInfo(nullptr);
1144 }
1145 } else if (FormatTok->Tok.is(tok::greatergreater)) {
1146 FormatTok->Tok.setKind(tok::greater);
1147 FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
1148 GreaterStashed = true;
1149 } else if (FormatTok->Tok.is(tok::lessless)) {
1150 FormatTok->Tok.setKind(tok::less);
1151 FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
1152 LessStashed = true;
1153 }
1154
1155 // Now FormatTok is the next non-whitespace token.
1156
1157 StringRef Text = FormatTok->TokenText;
1158 size_t FirstNewlinePos = Text.find('\n');
1159 if (FirstNewlinePos == StringRef::npos) {
1160 // FIXME: ColumnWidth actually depends on the start column, we need to
1161 // take this into account when the token is moved.
1162 FormatTok->ColumnWidth =
1163 encoding::columnWidthWithTabs(Text, Column, Style.TabWidth, Encoding);
1164 Column += FormatTok->ColumnWidth;
1165 } else {
1166 FormatTok->IsMultiline = true;
1167 // FIXME: ColumnWidth actually depends on the start column, we need to
1168 // take this into account when the token is moved.
1169 FormatTok->ColumnWidth = encoding::columnWidthWithTabs(
1170 Text.substr(0, FirstNewlinePos), Column, Style.TabWidth, Encoding);
1171
1172 // The last line of the token always starts in column 0.
1173 // Thus, the length can be precomputed even in the presence of tabs.
1174 FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs(
1175 Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth,
1176 Encoding);
1177 Column = FormatTok->LastLineColumnWidth;
1178 }
1179
1180 if (Style.Language == FormatStyle::LK_Cpp) {
1181 if (!(Tokens.size() > 0 && Tokens.back()->Tok.getIdentifierInfo() &&
1182 Tokens.back()->Tok.getIdentifierInfo()->getPPKeywordID() ==
1183 tok::pp_define) &&
1184 std::find(ForEachMacros.begin(), ForEachMacros.end(),
1185 FormatTok->Tok.getIdentifierInfo()) != ForEachMacros.end()) {
1186 FormatTok->Type = TT_ForEachMacro;
1187 } else if (FormatTok->is(tok::identifier)) {
1188 if (MacroBlockBeginRegex.match(Text)) {
1189 FormatTok->Type = TT_MacroBlockBegin;
1190 } else if (MacroBlockEndRegex.match(Text)) {
1191 FormatTok->Type = TT_MacroBlockEnd;
1192 }
1193 }
1194 }
1195
1196 return FormatTok;
1197 }
1198
1199 FormatToken *FormatTok;
1200 bool IsFirstToken;
1201 bool GreaterStashed, LessStashed;
1202 unsigned Column;
1203 unsigned TrailingWhitespace;
1204 std::unique_ptr<Lexer> Lex;
1205 SourceManager &SourceMgr;
1206 FileID ID;
1207 FormatStyle &Style;
1208 IdentifierTable IdentTable;
1209 AdditionalKeywords Keywords;
1210 encoding::Encoding Encoding;
1211 llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
1212 // Index (in 'Tokens') of the last token that starts a new line.
1213 unsigned FirstInLineIndex;
1214 SmallVector<FormatToken *, 16> Tokens;
1215 SmallVector<IdentifierInfo *, 8> ForEachMacros;
1216
1217 bool FormattingDisabled;
1218
1219 llvm::Regex MacroBlockBeginRegex;
1220 llvm::Regex MacroBlockEndRegex;
1221
readRawToken(FormatToken & Tok)1222 void readRawToken(FormatToken &Tok) {
1223 Lex->LexFromRawLexer(Tok.Tok);
1224 Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
1225 Tok.Tok.getLength());
1226 // For formatting, treat unterminated string literals like normal string
1227 // literals.
1228 if (Tok.is(tok::unknown)) {
1229 if (!Tok.TokenText.empty() && Tok.TokenText[0] == '"') {
1230 Tok.Tok.setKind(tok::string_literal);
1231 Tok.IsUnterminatedLiteral = true;
1232 } else if (Style.Language == FormatStyle::LK_JavaScript &&
1233 Tok.TokenText == "''") {
1234 Tok.Tok.setKind(tok::char_constant);
1235 }
1236 }
1237
1238 if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format on" ||
1239 Tok.TokenText == "/* clang-format on */")) {
1240 FormattingDisabled = false;
1241 }
1242
1243 Tok.Finalized = FormattingDisabled;
1244
1245 if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format off" ||
1246 Tok.TokenText == "/* clang-format off */")) {
1247 FormattingDisabled = true;
1248 }
1249 }
1250
resetLexer(unsigned Offset)1251 void resetLexer(unsigned Offset) {
1252 StringRef Buffer = SourceMgr.getBufferData(ID);
1253 Lex.reset(new Lexer(SourceMgr.getLocForStartOfFile(ID),
1254 getFormattingLangOpts(Style), Buffer.begin(),
1255 Buffer.begin() + Offset, Buffer.end()));
1256 Lex->SetKeepWhitespaceMode(true);
1257 TrailingWhitespace = 0;
1258 }
1259 };
1260
getLanguageName(FormatStyle::LanguageKind Language)1261 static StringRef getLanguageName(FormatStyle::LanguageKind Language) {
1262 switch (Language) {
1263 case FormatStyle::LK_Cpp:
1264 return "C++";
1265 case FormatStyle::LK_Java:
1266 return "Java";
1267 case FormatStyle::LK_JavaScript:
1268 return "JavaScript";
1269 case FormatStyle::LK_Proto:
1270 return "Proto";
1271 default:
1272 return "Unknown";
1273 }
1274 }
1275
1276 class Formatter : public UnwrappedLineConsumer {
1277 public:
Formatter(const FormatStyle & Style,SourceManager & SourceMgr,FileID ID,ArrayRef<CharSourceRange> Ranges)1278 Formatter(const FormatStyle &Style, SourceManager &SourceMgr, FileID ID,
1279 ArrayRef<CharSourceRange> Ranges)
1280 : Style(Style), ID(ID), SourceMgr(SourceMgr),
1281 Whitespaces(SourceMgr, Style,
1282 inputUsesCRLF(SourceMgr.getBufferData(ID))),
1283 Ranges(Ranges.begin(), Ranges.end()), UnwrappedLines(1),
1284 Encoding(encoding::detectEncoding(SourceMgr.getBufferData(ID))) {
1285 DEBUG(llvm::dbgs() << "File encoding: "
1286 << (Encoding == encoding::Encoding_UTF8 ? "UTF8"
1287 : "unknown")
1288 << "\n");
1289 DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style.Language)
1290 << "\n");
1291 }
1292
format(bool * IncompleteFormat)1293 tooling::Replacements format(bool *IncompleteFormat) {
1294 tooling::Replacements Result;
1295 FormatTokenLexer Tokens(SourceMgr, ID, Style, Encoding);
1296
1297 UnwrappedLineParser Parser(Style, Tokens.getKeywords(), Tokens.lex(),
1298 *this);
1299 Parser.parse();
1300 assert(UnwrappedLines.rbegin()->empty());
1301 for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE;
1302 ++Run) {
1303 DEBUG(llvm::dbgs() << "Run " << Run << "...\n");
1304 SmallVector<AnnotatedLine *, 16> AnnotatedLines;
1305 for (unsigned i = 0, e = UnwrappedLines[Run].size(); i != e; ++i) {
1306 AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i]));
1307 }
1308 tooling::Replacements RunResult =
1309 format(AnnotatedLines, Tokens, IncompleteFormat);
1310 DEBUG({
1311 llvm::dbgs() << "Replacements for run " << Run << ":\n";
1312 for (tooling::Replacements::iterator I = RunResult.begin(),
1313 E = RunResult.end();
1314 I != E; ++I) {
1315 llvm::dbgs() << I->toString() << "\n";
1316 }
1317 });
1318 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1319 delete AnnotatedLines[i];
1320 }
1321 Result.insert(RunResult.begin(), RunResult.end());
1322 Whitespaces.reset();
1323 }
1324 return Result;
1325 }
1326
format(SmallVectorImpl<AnnotatedLine * > & AnnotatedLines,FormatTokenLexer & Tokens,bool * IncompleteFormat)1327 tooling::Replacements format(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
1328 FormatTokenLexer &Tokens,
1329 bool *IncompleteFormat) {
1330 TokenAnnotator Annotator(Style, Tokens.getKeywords());
1331 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1332 Annotator.annotate(*AnnotatedLines[i]);
1333 }
1334 deriveLocalStyle(AnnotatedLines);
1335 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1336 Annotator.calculateFormattingInformation(*AnnotatedLines[i]);
1337 }
1338 computeAffectedLines(AnnotatedLines.begin(), AnnotatedLines.end());
1339
1340 Annotator.setCommentLineLevels(AnnotatedLines);
1341 ContinuationIndenter Indenter(Style, Tokens.getKeywords(), SourceMgr,
1342 Whitespaces, Encoding,
1343 BinPackInconclusiveFunctions);
1344 UnwrappedLineFormatter(&Indenter, &Whitespaces, Style, Tokens.getKeywords(),
1345 IncompleteFormat)
1346 .format(AnnotatedLines);
1347 return Whitespaces.generateReplacements();
1348 }
1349
1350 private:
1351 // Determines which lines are affected by the SourceRanges given as input.
1352 // Returns \c true if at least one line between I and E or one of their
1353 // children is affected.
computeAffectedLines(SmallVectorImpl<AnnotatedLine * >::iterator I,SmallVectorImpl<AnnotatedLine * >::iterator E)1354 bool computeAffectedLines(SmallVectorImpl<AnnotatedLine *>::iterator I,
1355 SmallVectorImpl<AnnotatedLine *>::iterator E) {
1356 bool SomeLineAffected = false;
1357 const AnnotatedLine *PreviousLine = nullptr;
1358 while (I != E) {
1359 AnnotatedLine *Line = *I;
1360 Line->LeadingEmptyLinesAffected = affectsLeadingEmptyLines(*Line->First);
1361
1362 // If a line is part of a preprocessor directive, it needs to be formatted
1363 // if any token within the directive is affected.
1364 if (Line->InPPDirective) {
1365 FormatToken *Last = Line->Last;
1366 SmallVectorImpl<AnnotatedLine *>::iterator PPEnd = I + 1;
1367 while (PPEnd != E && !(*PPEnd)->First->HasUnescapedNewline) {
1368 Last = (*PPEnd)->Last;
1369 ++PPEnd;
1370 }
1371
1372 if (affectsTokenRange(*Line->First, *Last,
1373 /*IncludeLeadingNewlines=*/false)) {
1374 SomeLineAffected = true;
1375 markAllAsAffected(I, PPEnd);
1376 }
1377 I = PPEnd;
1378 continue;
1379 }
1380
1381 if (nonPPLineAffected(Line, PreviousLine))
1382 SomeLineAffected = true;
1383
1384 PreviousLine = Line;
1385 ++I;
1386 }
1387 return SomeLineAffected;
1388 }
1389
1390 // Determines whether 'Line' is affected by the SourceRanges given as input.
1391 // Returns \c true if line or one if its children is affected.
nonPPLineAffected(AnnotatedLine * Line,const AnnotatedLine * PreviousLine)1392 bool nonPPLineAffected(AnnotatedLine *Line,
1393 const AnnotatedLine *PreviousLine) {
1394 bool SomeLineAffected = false;
1395 Line->ChildrenAffected =
1396 computeAffectedLines(Line->Children.begin(), Line->Children.end());
1397 if (Line->ChildrenAffected)
1398 SomeLineAffected = true;
1399
1400 // Stores whether one of the line's tokens is directly affected.
1401 bool SomeTokenAffected = false;
1402 // Stores whether we need to look at the leading newlines of the next token
1403 // in order to determine whether it was affected.
1404 bool IncludeLeadingNewlines = false;
1405
1406 // Stores whether the first child line of any of this line's tokens is
1407 // affected.
1408 bool SomeFirstChildAffected = false;
1409
1410 for (FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) {
1411 // Determine whether 'Tok' was affected.
1412 if (affectsTokenRange(*Tok, *Tok, IncludeLeadingNewlines))
1413 SomeTokenAffected = true;
1414
1415 // Determine whether the first child of 'Tok' was affected.
1416 if (!Tok->Children.empty() && Tok->Children.front()->Affected)
1417 SomeFirstChildAffected = true;
1418
1419 IncludeLeadingNewlines = Tok->Children.empty();
1420 }
1421
1422 // Was this line moved, i.e. has it previously been on the same line as an
1423 // affected line?
1424 bool LineMoved = PreviousLine && PreviousLine->Affected &&
1425 Line->First->NewlinesBefore == 0;
1426
1427 bool IsContinuedComment =
1428 Line->First->is(tok::comment) && Line->First->Next == nullptr &&
1429 Line->First->NewlinesBefore < 2 && PreviousLine &&
1430 PreviousLine->Affected && PreviousLine->Last->is(tok::comment);
1431
1432 if (SomeTokenAffected || SomeFirstChildAffected || LineMoved ||
1433 IsContinuedComment) {
1434 Line->Affected = true;
1435 SomeLineAffected = true;
1436 }
1437 return SomeLineAffected;
1438 }
1439
1440 // Marks all lines between I and E as well as all their children as affected.
markAllAsAffected(SmallVectorImpl<AnnotatedLine * >::iterator I,SmallVectorImpl<AnnotatedLine * >::iterator E)1441 void markAllAsAffected(SmallVectorImpl<AnnotatedLine *>::iterator I,
1442 SmallVectorImpl<AnnotatedLine *>::iterator E) {
1443 while (I != E) {
1444 (*I)->Affected = true;
1445 markAllAsAffected((*I)->Children.begin(), (*I)->Children.end());
1446 ++I;
1447 }
1448 }
1449
1450 // Returns true if the range from 'First' to 'Last' intersects with one of the
1451 // input ranges.
affectsTokenRange(const FormatToken & First,const FormatToken & Last,bool IncludeLeadingNewlines)1452 bool affectsTokenRange(const FormatToken &First, const FormatToken &Last,
1453 bool IncludeLeadingNewlines) {
1454 SourceLocation Start = First.WhitespaceRange.getBegin();
1455 if (!IncludeLeadingNewlines)
1456 Start = Start.getLocWithOffset(First.LastNewlineOffset);
1457 SourceLocation End = Last.getStartOfNonWhitespace();
1458 End = End.getLocWithOffset(Last.TokenText.size());
1459 CharSourceRange Range = CharSourceRange::getCharRange(Start, End);
1460 return affectsCharSourceRange(Range);
1461 }
1462
1463 // Returns true if one of the input ranges intersect the leading empty lines
1464 // before 'Tok'.
affectsLeadingEmptyLines(const FormatToken & Tok)1465 bool affectsLeadingEmptyLines(const FormatToken &Tok) {
1466 CharSourceRange EmptyLineRange = CharSourceRange::getCharRange(
1467 Tok.WhitespaceRange.getBegin(),
1468 Tok.WhitespaceRange.getBegin().getLocWithOffset(Tok.LastNewlineOffset));
1469 return affectsCharSourceRange(EmptyLineRange);
1470 }
1471
1472 // Returns true if 'Range' intersects with one of the input ranges.
affectsCharSourceRange(const CharSourceRange & Range)1473 bool affectsCharSourceRange(const CharSourceRange &Range) {
1474 for (SmallVectorImpl<CharSourceRange>::const_iterator I = Ranges.begin(),
1475 E = Ranges.end();
1476 I != E; ++I) {
1477 if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), I->getBegin()) &&
1478 !SourceMgr.isBeforeInTranslationUnit(I->getEnd(), Range.getBegin()))
1479 return true;
1480 }
1481 return false;
1482 }
1483
inputUsesCRLF(StringRef Text)1484 static bool inputUsesCRLF(StringRef Text) {
1485 return Text.count('\r') * 2 > Text.count('\n');
1486 }
1487
1488 void
deriveLocalStyle(const SmallVectorImpl<AnnotatedLine * > & AnnotatedLines)1489 deriveLocalStyle(const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
1490 unsigned CountBoundToVariable = 0;
1491 unsigned CountBoundToType = 0;
1492 bool HasCpp03IncompatibleFormat = false;
1493 bool HasBinPackedFunction = false;
1494 bool HasOnePerLineFunction = false;
1495 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1496 if (!AnnotatedLines[i]->First->Next)
1497 continue;
1498 FormatToken *Tok = AnnotatedLines[i]->First->Next;
1499 while (Tok->Next) {
1500 if (Tok->is(TT_PointerOrReference)) {
1501 bool SpacesBefore =
1502 Tok->WhitespaceRange.getBegin() != Tok->WhitespaceRange.getEnd();
1503 bool SpacesAfter = Tok->Next->WhitespaceRange.getBegin() !=
1504 Tok->Next->WhitespaceRange.getEnd();
1505 if (SpacesBefore && !SpacesAfter)
1506 ++CountBoundToVariable;
1507 else if (!SpacesBefore && SpacesAfter)
1508 ++CountBoundToType;
1509 }
1510
1511 if (Tok->WhitespaceRange.getBegin() == Tok->WhitespaceRange.getEnd()) {
1512 if (Tok->is(tok::coloncolon) && Tok->Previous->is(TT_TemplateOpener))
1513 HasCpp03IncompatibleFormat = true;
1514 if (Tok->is(TT_TemplateCloser) &&
1515 Tok->Previous->is(TT_TemplateCloser))
1516 HasCpp03IncompatibleFormat = true;
1517 }
1518
1519 if (Tok->PackingKind == PPK_BinPacked)
1520 HasBinPackedFunction = true;
1521 if (Tok->PackingKind == PPK_OnePerLine)
1522 HasOnePerLineFunction = true;
1523
1524 Tok = Tok->Next;
1525 }
1526 }
1527 if (Style.DerivePointerAlignment) {
1528 if (CountBoundToType > CountBoundToVariable)
1529 Style.PointerAlignment = FormatStyle::PAS_Left;
1530 else if (CountBoundToType < CountBoundToVariable)
1531 Style.PointerAlignment = FormatStyle::PAS_Right;
1532 }
1533 if (Style.Standard == FormatStyle::LS_Auto) {
1534 Style.Standard = HasCpp03IncompatibleFormat ? FormatStyle::LS_Cpp11
1535 : FormatStyle::LS_Cpp03;
1536 }
1537 BinPackInconclusiveFunctions =
1538 HasBinPackedFunction || !HasOnePerLineFunction;
1539 }
1540
consumeUnwrappedLine(const UnwrappedLine & TheLine)1541 void consumeUnwrappedLine(const UnwrappedLine &TheLine) override {
1542 assert(!UnwrappedLines.empty());
1543 UnwrappedLines.back().push_back(TheLine);
1544 }
1545
finishRun()1546 void finishRun() override {
1547 UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>());
1548 }
1549
1550 FormatStyle Style;
1551 FileID ID;
1552 SourceManager &SourceMgr;
1553 WhitespaceManager Whitespaces;
1554 SmallVector<CharSourceRange, 8> Ranges;
1555 SmallVector<SmallVector<UnwrappedLine, 16>, 2> UnwrappedLines;
1556
1557 encoding::Encoding Encoding;
1558 bool BinPackInconclusiveFunctions;
1559 };
1560
1561 } // end anonymous namespace
1562
reformat(const FormatStyle & Style,SourceManager & SourceMgr,FileID ID,ArrayRef<CharSourceRange> Ranges,bool * IncompleteFormat)1563 tooling::Replacements reformat(const FormatStyle &Style,
1564 SourceManager &SourceMgr, FileID ID,
1565 ArrayRef<CharSourceRange> Ranges,
1566 bool *IncompleteFormat) {
1567 if (Style.DisableFormat)
1568 return tooling::Replacements();
1569 Formatter formatter(Style, SourceMgr, ID, Ranges);
1570 return formatter.format(IncompleteFormat);
1571 }
1572
reformat(const FormatStyle & Style,StringRef Code,ArrayRef<tooling::Range> Ranges,StringRef FileName,bool * IncompleteFormat)1573 tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
1574 ArrayRef<tooling::Range> Ranges,
1575 StringRef FileName, bool *IncompleteFormat) {
1576 if (Style.DisableFormat)
1577 return tooling::Replacements();
1578
1579 FileManager Files((FileSystemOptions()));
1580 DiagnosticsEngine Diagnostics(
1581 IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
1582 new DiagnosticOptions);
1583 SourceManager SourceMgr(Diagnostics, Files);
1584 std::unique_ptr<llvm::MemoryBuffer> Buf =
1585 llvm::MemoryBuffer::getMemBuffer(Code, FileName);
1586 const clang::FileEntry *Entry =
1587 Files.getVirtualFile(FileName, Buf->getBufferSize(), 0);
1588 SourceMgr.overrideFileContents(Entry, std::move(Buf));
1589 FileID ID =
1590 SourceMgr.createFileID(Entry, SourceLocation(), clang::SrcMgr::C_User);
1591 SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID);
1592 std::vector<CharSourceRange> CharRanges;
1593 for (const tooling::Range &Range : Ranges) {
1594 SourceLocation Start = StartOfFile.getLocWithOffset(Range.getOffset());
1595 SourceLocation End = Start.getLocWithOffset(Range.getLength());
1596 CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
1597 }
1598 return reformat(Style, SourceMgr, ID, CharRanges, IncompleteFormat);
1599 }
1600
getFormattingLangOpts(const FormatStyle & Style)1601 LangOptions getFormattingLangOpts(const FormatStyle &Style) {
1602 LangOptions LangOpts;
1603 LangOpts.CPlusPlus = 1;
1604 LangOpts.CPlusPlus11 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
1605 LangOpts.CPlusPlus14 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
1606 LangOpts.LineComment = 1;
1607 bool AlternativeOperators = Style.Language == FormatStyle::LK_Cpp;
1608 LangOpts.CXXOperatorNames = AlternativeOperators ? 1 : 0;
1609 LangOpts.Bool = 1;
1610 LangOpts.ObjC1 = 1;
1611 LangOpts.ObjC2 = 1;
1612 LangOpts.MicrosoftExt = 1; // To get kw___try, kw___finally.
1613 return LangOpts;
1614 }
1615
1616 const char *StyleOptionHelpDescription =
1617 "Coding style, currently supports:\n"
1618 " LLVM, Google, Chromium, Mozilla, WebKit.\n"
1619 "Use -style=file to load style configuration from\n"
1620 ".clang-format file located in one of the parent\n"
1621 "directories of the source file (or current\n"
1622 "directory for stdin).\n"
1623 "Use -style=\"{key: value, ...}\" to set specific\n"
1624 "parameters, e.g.:\n"
1625 " -style=\"{BasedOnStyle: llvm, IndentWidth: 8}\"";
1626
getLanguageByFileName(StringRef FileName)1627 static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) {
1628 if (FileName.endswith(".java")) {
1629 return FormatStyle::LK_Java;
1630 } else if (FileName.endswith_lower(".js") || FileName.endswith_lower(".ts")) {
1631 // JavaScript or TypeScript.
1632 return FormatStyle::LK_JavaScript;
1633 } else if (FileName.endswith_lower(".proto") ||
1634 FileName.endswith_lower(".protodevel")) {
1635 return FormatStyle::LK_Proto;
1636 }
1637 return FormatStyle::LK_Cpp;
1638 }
1639
getStyle(StringRef StyleName,StringRef FileName,StringRef FallbackStyle)1640 FormatStyle getStyle(StringRef StyleName, StringRef FileName,
1641 StringRef FallbackStyle) {
1642 FormatStyle Style = getLLVMStyle();
1643 Style.Language = getLanguageByFileName(FileName);
1644 if (!getPredefinedStyle(FallbackStyle, Style.Language, &Style)) {
1645 llvm::errs() << "Invalid fallback style \"" << FallbackStyle
1646 << "\" using LLVM style\n";
1647 return Style;
1648 }
1649
1650 if (StyleName.startswith("{")) {
1651 // Parse YAML/JSON style from the command line.
1652 if (std::error_code ec = parseConfiguration(StyleName, &Style)) {
1653 llvm::errs() << "Error parsing -style: " << ec.message() << ", using "
1654 << FallbackStyle << " style\n";
1655 }
1656 return Style;
1657 }
1658
1659 if (!StyleName.equals_lower("file")) {
1660 if (!getPredefinedStyle(StyleName, Style.Language, &Style))
1661 llvm::errs() << "Invalid value for -style, using " << FallbackStyle
1662 << " style\n";
1663 return Style;
1664 }
1665
1666 // Look for .clang-format/_clang-format file in the file's parent directories.
1667 SmallString<128> UnsuitableConfigFiles;
1668 SmallString<128> Path(FileName);
1669 llvm::sys::fs::make_absolute(Path);
1670 for (StringRef Directory = Path; !Directory.empty();
1671 Directory = llvm::sys::path::parent_path(Directory)) {
1672 if (!llvm::sys::fs::is_directory(Directory))
1673 continue;
1674 SmallString<128> ConfigFile(Directory);
1675
1676 llvm::sys::path::append(ConfigFile, ".clang-format");
1677 DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
1678 bool IsFile = false;
1679 // Ignore errors from is_regular_file: we only need to know if we can read
1680 // the file or not.
1681 llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile);
1682
1683 if (!IsFile) {
1684 // Try _clang-format too, since dotfiles are not commonly used on Windows.
1685 ConfigFile = Directory;
1686 llvm::sys::path::append(ConfigFile, "_clang-format");
1687 DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
1688 llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile);
1689 }
1690
1691 if (IsFile) {
1692 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
1693 llvm::MemoryBuffer::getFile(ConfigFile.c_str());
1694 if (std::error_code EC = Text.getError()) {
1695 llvm::errs() << EC.message() << "\n";
1696 break;
1697 }
1698 if (std::error_code ec =
1699 parseConfiguration(Text.get()->getBuffer(), &Style)) {
1700 if (ec == ParseError::Unsuitable) {
1701 if (!UnsuitableConfigFiles.empty())
1702 UnsuitableConfigFiles.append(", ");
1703 UnsuitableConfigFiles.append(ConfigFile);
1704 continue;
1705 }
1706 llvm::errs() << "Error reading " << ConfigFile << ": " << ec.message()
1707 << "\n";
1708 break;
1709 }
1710 DEBUG(llvm::dbgs() << "Using configuration file " << ConfigFile << "\n");
1711 return Style;
1712 }
1713 }
1714 if (!UnsuitableConfigFiles.empty()) {
1715 llvm::errs() << "Configuration file(s) do(es) not support "
1716 << getLanguageName(Style.Language) << ": "
1717 << UnsuitableConfigFiles << "\n";
1718 }
1719 return Style;
1720 }
1721
1722 } // namespace format
1723 } // namespace clang
1724