aboutsummaryrefslogtreecommitdiff
blob: c8865d128fb8423f55c0d8b97403bef47347ada4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
//===- Symbols.h ------------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLD_COFF_SYMBOLS_H
#define LLD_COFF_SYMBOLS_H

#include "Chunks.h"
#include "Config.h"
#include "lld/Common/LLVM.h"
#include "lld/Common/Memory.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/COFF.h"
#include <atomic>
#include <memory>
#include <vector>

namespace lld {

std::string toString(coff::Symbol &b);

// There are two different ways to convert an Archive::Symbol to a string:
// One for Microsoft name mangling and one for Itanium name mangling.
// Call the functions toCOFFString and toELFString, not just toString.
std::string toCOFFString(const coff::Archive::Symbol &b);

namespace coff {

using llvm::object::Archive;
using llvm::object::COFFSymbolRef;
using llvm::object::coff_import_header;
using llvm::object::coff_symbol_generic;

class ArchiveFile;
class InputFile;
class ObjFile;
class SymbolTable;

// The base class for real symbol classes.
class Symbol {
public:
  enum Kind {
    // The order of these is significant. We start with the regular defined
    // symbols as those are the most prevalent and the zero tag is the cheapest
    // to set. Among the defined kinds, the lower the kind is preferred over
    // the higher kind when testing whether one symbol should take precedence
    // over another.
    DefinedRegularKind = 0,
    DefinedCommonKind,
    DefinedLocalImportKind,
    DefinedImportThunkKind,
    DefinedImportDataKind,
    DefinedAbsoluteKind,
    DefinedSyntheticKind,

    UndefinedKind,
    LazyArchiveKind,
    LazyObjectKind,
    LazyDLLSymbolKind,

    LastDefinedCOFFKind = DefinedCommonKind,
    LastDefinedKind = DefinedSyntheticKind,
  };

  Kind kind() const { return static_cast<Kind>(symbolKind); }

  // Returns the symbol name.
  StringRef getName() {
    // COFF symbol names are read lazily for a performance reason.
    // Non-external symbol names are never used by the linker except for logging
    // or debugging. Their internal references are resolved not by name but by
    // symbol index. And because they are not external, no one can refer them by
    // name. Object files contain lots of non-external symbols, and creating
    // StringRefs for them (which involves lots of strlen() on the string table)
    // is a waste of time.
    if (nameData == nullptr)
      computeName();
    return StringRef(nameData, nameSize);
  }

  void replaceKeepingName(Symbol *other, size_t size);

  // Returns the file from which this symbol was created.
  InputFile *getFile();

  // Indicates that this symbol will be included in the final image. Only valid
  // after calling markLive.
  bool isLive() const;

  bool isLazy() const {
    return symbolKind == LazyArchiveKind || symbolKind == LazyObjectKind ||
           symbolKind == LazyDLLSymbolKind;
  }

private:
  void computeName();

protected:
  friend SymbolTable;
  explicit Symbol(Kind k, StringRef n = "")
      : symbolKind(k), isExternal(true), isCOMDAT(false),
        writtenToSymtab(false), pendingArchiveLoad(false), isGCRoot(false),
        isRuntimePseudoReloc(false), deferUndefined(false), canInline(true),
        nameSize(n.size()), nameData(n.empty() ? nullptr : n.data()) {}

  const unsigned symbolKind : 8;
  unsigned isExternal : 1;

public:
  // This bit is used by the \c DefinedRegular subclass.
  unsigned isCOMDAT : 1;

  // This bit is used by Writer::createSymbolAndStringTable() to prevent
  // symbols from being written to the symbol table more than once.
  unsigned writtenToSymtab : 1;

  // True if this symbol was referenced by a regular (non-bitcode) object.
  unsigned isUsedInRegularObj : 1;

  // True if we've seen both a lazy and an undefined symbol with this symbol
  // name, which means that we have enqueued an archive member load and should
  // not load any more archive members to resolve the same symbol.
  unsigned pendingArchiveLoad : 1;

  /// True if we've already added this symbol to the list of GC roots.
  unsigned isGCRoot : 1;

  unsigned isRuntimePseudoReloc : 1;

  // True if we want to allow this symbol to be undefined in the early
  // undefined check pass in SymbolTable::reportUnresolvable(), as it
  // might be fixed up later.
  unsigned deferUndefined : 1;

  // False if LTO shouldn't inline whatever this symbol points to. If a symbol
  // is overwritten after LTO, LTO shouldn't inline the symbol because it
  // doesn't know the final contents of the symbol.
  unsigned canInline : 1;

protected:
  // Symbol name length. Assume symbol lengths fit in a 32-bit integer.
  uint32_t nameSize;

  const char *nameData;
};

// The base class for any defined symbols, including absolute symbols,
// etc.
class Defined : public Symbol {
public:
  Defined(Kind k, StringRef n) : Symbol(k, n) {}

  static bool classof(const Symbol *s) { return s->kind() <= LastDefinedKind; }

  // Returns the RVA (relative virtual address) of this symbol. The
  // writer sets and uses RVAs.
  uint64_t getRVA();

  // Returns the chunk containing this symbol. Absolute symbols and __ImageBase
  // do not have chunks, so this may return null.
  Chunk *getChunk();
};

// Symbols defined via a COFF object file or bitcode file.  For COFF files, this
// stores a coff_symbol_generic*, and names of internal symbols are lazily
// loaded through that. For bitcode files, Sym is nullptr and the name is stored
// as a decomposed StringRef.
class DefinedCOFF : public Defined {
  friend Symbol;

public:
  DefinedCOFF(Kind k, InputFile *f, StringRef n, const coff_symbol_generic *s)
      : Defined(k, n), file(f), sym(s) {}

  static bool classof(const Symbol *s) {
    return s->kind() <= LastDefinedCOFFKind;
  }

  InputFile *getFile() { return file; }

  COFFSymbolRef getCOFFSymbol();

  InputFile *file;

protected:
  const coff_symbol_generic *sym;
};

// Regular defined symbols read from object file symbol tables.
class DefinedRegular : public DefinedCOFF {
public:
  DefinedRegular(InputFile *f, StringRef n, bool isCOMDAT,
                 bool isExternal = false,
                 const coff_symbol_generic *s = nullptr,
                 SectionChunk *c = nullptr)
      : DefinedCOFF(DefinedRegularKind, f, n, s), data(c ? &c->repl : nullptr) {
    this->isExternal = isExternal;
    this->isCOMDAT = isCOMDAT;
  }

  static bool classof(const Symbol *s) {
    return s->kind() == DefinedRegularKind;
  }

  uint64_t getRVA() const { return (*data)->getRVA() + sym->Value; }
  SectionChunk *getChunk() const { return *data; }
  uint32_t getValue() const { return sym->Value; }

  SectionChunk **data;
};

class DefinedCommon : public DefinedCOFF {
public:
  DefinedCommon(InputFile *f, StringRef n, uint64_t size,
                const coff_symbol_generic *s = nullptr,
                CommonChunk *c = nullptr)
      : DefinedCOFF(DefinedCommonKind, f, n, s), data(c), size(size) {
    this->isExternal = true;
  }

  static bool classof(const Symbol *s) {
    return s->kind() == DefinedCommonKind;
  }

  uint64_t getRVA() { return data->getRVA(); }
  CommonChunk *getChunk() { return data; }

private:
  friend SymbolTable;
  uint64_t getSize() const { return size; }
  CommonChunk *data;
  uint64_t size;
};

// Absolute symbols.
class DefinedAbsolute : public Defined {
public:
  DefinedAbsolute(StringRef n, COFFSymbolRef s)
      : Defined(DefinedAbsoluteKind, n), va(s.getValue()) {
    isExternal = s.isExternal();
  }

  DefinedAbsolute(StringRef n, uint64_t v)
      : Defined(DefinedAbsoluteKind, n), va(v) {}

  static bool classof(const Symbol *s) {
    return s->kind() == DefinedAbsoluteKind;
  }

  uint64_t getRVA() { return va - config->imageBase; }
  void setVA(uint64_t v) { va = v; }
  uint64_t getVA() const { return va; }

  // Section index relocations against absolute symbols resolve to
  // this 16 bit number, and it is the largest valid section index
  // plus one. This variable keeps it.
  static uint16_t numOutputSections;

private:
  uint64_t va;
};

// This symbol is used for linker-synthesized symbols like __ImageBase and
// __safe_se_handler_table.
class DefinedSynthetic : public Defined {
public:
  explicit DefinedSynthetic(StringRef name, Chunk *c)
      : Defined(DefinedSyntheticKind, name), c(c) {}

  static bool classof(const Symbol *s) {
    return s->kind() == DefinedSyntheticKind;
  }

  // A null chunk indicates that this is __ImageBase. Otherwise, this is some
  // other synthesized chunk, like SEHTableChunk.
  uint32_t getRVA() { return c ? c->getRVA() : 0; }
  Chunk *getChunk() { return c; }

private:
  Chunk *c;
};

// This class represents a symbol defined in an archive file. It is
// created from an archive file header, and it knows how to load an
// object file from an archive to replace itself with a defined
// symbol. If the resolver finds both Undefined and LazyArchive for
// the same name, it will ask the LazyArchive to load a file.
class LazyArchive : public Symbol {
public:
  LazyArchive(ArchiveFile *f, const Archive::Symbol s)
      : Symbol(LazyArchiveKind, s.getName()), file(f), sym(s) {}

  static bool classof(const Symbol *s) { return s->kind() == LazyArchiveKind; }

  MemoryBufferRef getMemberBuffer();

  ArchiveFile *file;
  const Archive::Symbol sym;
};

class LazyObject : public Symbol {
public:
  LazyObject(InputFile *f, StringRef n) : Symbol(LazyObjectKind, n), file(f) {}
  static bool classof(const Symbol *s) { return s->kind() == LazyObjectKind; }
  InputFile *file;
};

// MinGW only.
class LazyDLLSymbol : public Symbol {
public:
  LazyDLLSymbol(DLLFile *f, DLLFile::Symbol *s, StringRef n)
      : Symbol(LazyDLLSymbolKind, n), file(f), sym(s) {}
  static bool classof(const Symbol *s) {
    return s->kind() == LazyDLLSymbolKind;
  }

  DLLFile *file;
  DLLFile::Symbol *sym;
};

// Undefined symbols.
class Undefined : public Symbol {
public:
  explicit Undefined(StringRef n) : Symbol(UndefinedKind, n) {}

  static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; }

  // An undefined symbol can have a fallback symbol which gives an
  // undefined symbol a second chance if it would remain undefined.
  // If it remains undefined, it'll be replaced with whatever the
  // Alias pointer points to.
  Symbol *weakAlias = nullptr;

  // If this symbol is external weak, try to resolve it to a defined
  // symbol by searching the chain of fallback symbols. Returns the symbol if
  // successful, otherwise returns null.
  Defined *getWeakAlias();
};

// Windows-specific classes.

// This class represents a symbol imported from a DLL. This has two
// names for internal use and external use. The former is used for
// name resolution, and the latter is used for the import descriptor
// table in an output. The former has "__imp_" prefix.
class DefinedImportData : public Defined {
public:
  DefinedImportData(StringRef n, ImportFile *f)
      : Defined(DefinedImportDataKind, n), file(f) {
  }

  static bool classof(const Symbol *s) {
    return s->kind() == DefinedImportDataKind;
  }

  uint64_t getRVA() { return file->location->getRVA(); }
  Chunk *getChunk() { return file->location; }
  void setLocation(Chunk *addressTable) { file->location = addressTable; }

  StringRef getDLLName() { return file->dllName; }
  StringRef getExternalName() { return file->externalName; }
  uint16_t getOrdinal() { return file->hdr->OrdinalHint; }

  ImportFile *file;

  // This is a pointer to the synthetic symbol associated with the load thunk
  // for this symbol that will be called if the DLL is delay-loaded. This is
  // needed for Control Flow Guard because if this DefinedImportData symbol is a
  // valid call target, the corresponding load thunk must also be marked as a
  // valid call target.
  DefinedSynthetic *loadThunkSym = nullptr;
};

// This class represents a symbol for a jump table entry which jumps
// to a function in a DLL. Linker are supposed to create such symbols
// without "__imp_" prefix for all function symbols exported from
// DLLs, so that you can call DLL functions as regular functions with
// a regular name. A function pointer is given as a DefinedImportData.
class DefinedImportThunk : public Defined {
public:
  DefinedImportThunk(StringRef name, DefinedImportData *s, uint16_t machine);

  static bool classof(const Symbol *s) {
    return s->kind() == DefinedImportThunkKind;
  }

  uint64_t getRVA() { return data->getRVA(); }
  Chunk *getChunk() { return data; }

  DefinedImportData *wrappedSym;

private:
  Chunk *data;
};

// If you have a symbol "foo" in your object file, a symbol name
// "__imp_foo" becomes automatically available as a pointer to "foo".
// This class is for such automatically-created symbols.
// Yes, this is an odd feature. We didn't intend to implement that.
// This is here just for compatibility with MSVC.
class DefinedLocalImport : public Defined {
public:
  DefinedLocalImport(StringRef n, Defined *s)
      : Defined(DefinedLocalImportKind, n), data(make<LocalImportChunk>(s)) {}

  static bool classof(const Symbol *s) {
    return s->kind() == DefinedLocalImportKind;
  }

  uint64_t getRVA() { return data->getRVA(); }
  Chunk *getChunk() { return data; }

private:
  LocalImportChunk *data;
};

inline uint64_t Defined::getRVA() {
  switch (kind()) {
  case DefinedAbsoluteKind:
    return cast<DefinedAbsolute>(this)->getRVA();
  case DefinedSyntheticKind:
    return cast<DefinedSynthetic>(this)->getRVA();
  case DefinedImportDataKind:
    return cast<DefinedImportData>(this)->getRVA();
  case DefinedImportThunkKind:
    return cast<DefinedImportThunk>(this)->getRVA();
  case DefinedLocalImportKind:
    return cast<DefinedLocalImport>(this)->getRVA();
  case DefinedCommonKind:
    return cast<DefinedCommon>(this)->getRVA();
  case DefinedRegularKind:
    return cast<DefinedRegular>(this)->getRVA();
  case LazyArchiveKind:
  case LazyObjectKind:
  case LazyDLLSymbolKind:
  case UndefinedKind:
    llvm_unreachable("Cannot get the address for an undefined symbol.");
  }
  llvm_unreachable("unknown symbol kind");
}

inline Chunk *Defined::getChunk() {
  switch (kind()) {
  case DefinedRegularKind:
    return cast<DefinedRegular>(this)->getChunk();
  case DefinedAbsoluteKind:
    return nullptr;
  case DefinedSyntheticKind:
    return cast<DefinedSynthetic>(this)->getChunk();
  case DefinedImportDataKind:
    return cast<DefinedImportData>(this)->getChunk();
  case DefinedImportThunkKind:
    return cast<DefinedImportThunk>(this)->getChunk();
  case DefinedLocalImportKind:
    return cast<DefinedLocalImport>(this)->getChunk();
  case DefinedCommonKind:
    return cast<DefinedCommon>(this)->getChunk();
  case LazyArchiveKind:
  case LazyObjectKind:
  case LazyDLLSymbolKind:
  case UndefinedKind:
    llvm_unreachable("Cannot get the chunk of an undefined symbol.");
  }
  llvm_unreachable("unknown symbol kind");
}

// A buffer class that is large enough to hold any Symbol-derived
// object. We allocate memory using this class and instantiate a symbol
// using the placement new.
union SymbolUnion {
  alignas(DefinedRegular) char a[sizeof(DefinedRegular)];
  alignas(DefinedCommon) char b[sizeof(DefinedCommon)];
  alignas(DefinedAbsolute) char c[sizeof(DefinedAbsolute)];
  alignas(DefinedSynthetic) char d[sizeof(DefinedSynthetic)];
  alignas(LazyArchive) char e[sizeof(LazyArchive)];
  alignas(Undefined) char f[sizeof(Undefined)];
  alignas(DefinedImportData) char g[sizeof(DefinedImportData)];
  alignas(DefinedImportThunk) char h[sizeof(DefinedImportThunk)];
  alignas(DefinedLocalImport) char i[sizeof(DefinedLocalImport)];
  alignas(LazyObject) char j[sizeof(LazyObject)];
  alignas(LazyDLLSymbol) char k[sizeof(LazyDLLSymbol)];
};

template <typename T, typename... ArgT>
void replaceSymbol(Symbol *s, ArgT &&... arg) {
  static_assert(std::is_trivially_destructible<T>(),
                "Symbol types must be trivially destructible");
  static_assert(sizeof(T) <= sizeof(SymbolUnion), "Symbol too small");
  static_assert(alignof(T) <= alignof(SymbolUnion),
                "SymbolUnion not aligned enough");
  assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr &&
         "Not a Symbol");
  bool canInline = s->canInline;
  new (s) T(std::forward<ArgT>(arg)...);
  s->canInline = canInline;
}
} // namespace coff

} // namespace lld

#endif