initStaticTLS()
|
const tls_variant = switch (native_arch) {
.arm, .armeb, .thumb, .aarch64, .aarch64_be, .riscv32, .riscv64, .mips, .mipsel, .mips64, .mips64el, .powerpc, .powerpcle, .powerpc64, .powerpc64le => TLSVariant.VariantI,
.x86_64, .x86, .sparc64 => TLSVariant.VariantII,
else => @compileError("undefined tls_variant for this architecture"),
};
// Controls how many bytes are reserved for the Thread Control Block
const tls_tcb_size = switch (native_arch) {
// ARM EABI mandates enough space for two pointers: the first one points to
// the DTV while the second one is unspecified but reserved
.arm, .armeb, .thumb, .aarch64, .aarch64_be => 2 * @sizeOf(usize),
// One pointer-sized word that points either to the DTV or the TCB itself
else => @sizeOf(usize),
};
// Controls if the TP points to the end of the TCB instead of its beginning
const tls_tp_points_past_tcb = switch (native_arch) {
.riscv32, .riscv64, .mips, .mipsel, .mips64, .mips64el, .powerpc, .powerpc64, .powerpc64le => true,
else => false,
};
// Some architectures add some offset to the tp and dtv addresses in order to
// make the generated code more efficient
const tls_tp_offset = switch (native_arch) {
.mips, .mipsel, .mips64, .mips64el, .powerpc, .powerpc64, .powerpc64le => 0x7000,
else => 0,
};
const tls_dtv_offset = switch (native_arch) {
.mips, .mipsel, .mips64, .mips64el, .powerpc, .powerpc64, .powerpc64le => 0x8000,
.riscv32, .riscv64 => 0x800,
else => 0,
};
// Per-thread storage for Zig's use
const CustomData = struct {
dummy: usize,
};
// Dynamic Thread Vector
const DTV = extern struct {
entries: usize,
tls_block: [1][*]u8,
};
// Holds all the information about the process TLS image
const TLSImage = struct {
init_data: []const u8,
alloc_size: usize,
alloc_align: usize,
tcb_offset: usize,
dtv_offset: usize,
data_offset: usize,
data_size: usize,
// Only used on the x86 architecture
gdt_entry_number: usize,
};
pub var tls_image: TLSImage = undefined;
pub fn setThreadPointer(addr: usize) void {
switch (native_arch) {
.x86 => {
var user_desc = std.os.linux.user_desc{
.entry_number = tls_image.gdt_entry_number,
.base_addr = addr,
.limit = 0xfffff,
.flags = .{
.seg_32bit = 1,
.contents = 0, // Data
.read_exec_only = 0,
.limit_in_pages = 1,
.seg_not_present = 0,
.useable = 1,
},
};
const rc = std.os.linux.syscall1(.set_thread_area, @intFromPtr(&user_desc));
assert(rc == 0);
const gdt_entry_number = user_desc.entry_number;
// We have to keep track of our slot as it's also needed for clone()
tls_image.gdt_entry_number = gdt_entry_number;
// Update the %gs selector
asm volatile ("movl %[gs_val], %%gs"
:
: [gs_val] "r" (gdt_entry_number << 3 | 3),
);
},
.x86_64 => {
const rc = std.os.linux.syscall2(.arch_prctl, std.os.linux.ARCH.SET_FS, addr);
assert(rc == 0);
},
.aarch64, .aarch64_be => {
asm volatile (
\\ msr tpidr_el0, %[addr]
:
: [addr] "r" (addr),
);
},
.arm, .thumb => {
const rc = std.os.linux.syscall1(.set_tls, addr);
assert(rc == 0);
},
.riscv64 => {
asm volatile (
\\ mv tp, %[addr]
:
: [addr] "r" (addr),
);
},
.mips, .mipsel, .mips64, .mips64el => {
const rc = std.os.linux.syscall1(.set_thread_area, addr);
assert(rc == 0);
},
.powerpc, .powerpcle => {
asm volatile (
\\ mr 2, %[addr]
:
: [addr] "r" (addr),
);
},
.powerpc64, .powerpc64le => {
asm volatile (
\\ mr 13, %[addr]
:
: [addr] "r" (addr),
);
},
.sparc64 => {
asm volatile (
\\ mov %[addr], %%g7
:
: [addr] "r" (addr),
);
},
else => @compileError("Unsupported architecture"),
}
}
fn initTLS(phdrs: []elf.Phdr) void {
var tls_phdr: ?*elf.Phdr = null;
var img_base: usize = 0;
for (phdrs) |*phdr| {
switch (phdr.p_type) {
elf.PT_PHDR => img_base = @intFromPtr(phdrs.ptr) - phdr.p_vaddr,
elf.PT_TLS => tls_phdr = phdr,
else => {},
}
}
var tls_align_factor: usize = undefined;
var tls_data: []const u8 = undefined;
var tls_data_alloc_size: usize = undefined;
if (tls_phdr) |phdr| {
// The effective size in memory is represented by p_memsz, the length of
// the data stored in the PT_TLS segment is p_filesz and may be less
// than the former
tls_align_factor = phdr.p_align;
tls_data = @as([*]u8, @ptrFromInt(img_base + phdr.p_vaddr))[0..phdr.p_filesz];
tls_data_alloc_size = phdr.p_memsz;
} else {
tls_align_factor = @alignOf(usize);
tls_data = &[_]u8{};
tls_data_alloc_size = 0;
}
// Offsets into the allocated TLS area
var tcb_offset: usize = undefined;
var dtv_offset: usize = undefined;
var data_offset: usize = undefined;
// Compute the total size of the ABI-specific data plus our own control
// structures. All the offset calculated here assume a well-aligned base
// address.
const alloc_size = switch (tls_variant) {
.VariantI => blk: {
var l: usize = 0;
dtv_offset = l;
l += @sizeOf(DTV);
// Add some padding here so that the thread pointer (tcb_offset) is
// aligned to p_align and the CustomData structure can be found by
// simply subtracting its @sizeOf from the tp value
const delta = (l + @sizeOf(CustomData)) & (tls_align_factor - 1);
if (delta > 0)
l += tls_align_factor - delta;
l += @sizeOf(CustomData);
tcb_offset = l;
l += mem.alignForward(usize, tls_tcb_size, tls_align_factor);
data_offset = l;
l += tls_data_alloc_size;
break :blk l;
},
.VariantII => blk: {
var l: usize = 0;
data_offset = l;
l += mem.alignForward(usize, tls_data_alloc_size, tls_align_factor);
// The thread pointer is aligned to p_align
tcb_offset = l;
l += tls_tcb_size;
// The CustomData structure is right after the TCB with no padding
// in between so it can be easily found
l += @sizeOf(CustomData);
l = mem.alignForward(usize, l, @alignOf(DTV));
dtv_offset = l;
l += @sizeOf(DTV);
break :blk l;
},
};
tls_image = TLSImage{
.init_data = tls_data,
.alloc_size = alloc_size,
.alloc_align = tls_align_factor,
.tcb_offset = tcb_offset,
.dtv_offset = dtv_offset,
.data_offset = data_offset,
.data_size = tls_data_alloc_size,
.gdt_entry_number = @as(usize, @bitCast(@as(isize, -1))),
};
}
inline fn alignPtrCast(comptime T: type, ptr: [*]u8) *T {
return @ptrCast(@alignCast(ptr));
}
/// Initializes all the fields of the static TLS area and returns the computed
/// architecture-specific value of the thread-pointer register
pub fn prepareTLS(area: []u8) usize {
// Clear the area we're going to use, just to be safe
@memset(area, 0);
// Prepare the DTV
const dtv = alignPtrCast(DTV, area.ptr + tls_image.dtv_offset);
dtv.entries = 1;
dtv.tls_block[0] = area.ptr + tls_dtv_offset + tls_image.data_offset;
// Prepare the TCB
const tcb_ptr = alignPtrCast([*]u8, area.ptr + tls_image.tcb_offset);
tcb_ptr.* = switch (tls_variant) {
.VariantI => area.ptr + tls_image.dtv_offset,
.VariantII => area.ptr + tls_image.tcb_offset,
};
// Copy the data
@memcpy(area[tls_image.data_offset..][0..tls_image.init_data.len], tls_image.init_data);
// Return the corrected value (if needed) for the tp register.
// Overflow here is not a problem, the pointer arithmetic involving the tp
// is done with wrapping semantics.
return @intFromPtr(area.ptr) +% tls_tp_offset +%
if (tls_tp_points_past_tcb) tls_image.data_offset else tls_image.tcb_offset;
}
// The main motivation for the size chosen here is this is how much ends up being
// requested for the thread local variables of the std.crypto.random implementation.
// I'm not sure why it ends up being so much; the struct itself is only 64 bytes.
// I think it has to do with being page aligned and LLVM or LLD is not smart enough
// to lay out the TLS data in a space conserving way. Anyway I think it's fine
// because it's less than 3 pages of memory, and putting it in the ELF like this
// is equivalent to moving the mmap call below into the kernel, avoiding syscall
// overhead.
var main_thread_tls_buffer: [0x2100]u8 align(mem.page_size) = undefined;
pub fn initStaticTLS(phdrs: []elf.Phdr) void {
initTLS(phdrs);
const tls_area = blk: {
// Fast path for the common case where the TLS data is really small,
// avoid an allocation and use our local buffer.
if (tls_image.alloc_align <= mem.page_size and
tls_image.alloc_size <= main_thread_tls_buffer.len)
{
break :blk main_thread_tls_buffer[0..tls_image.alloc_size];
}
const alloc_tls_area = os.mmap(
null,
tls_image.alloc_size + tls_image.alloc_align - 1,
os.PROT.READ | os.PROT.WRITE,
os.MAP.PRIVATE | os.MAP.ANONYMOUS,
-1,
0,
) catch os.abort();
// Make sure the slice is correctly aligned.
const begin_addr = @intFromPtr(alloc_tls_area.ptr);
const begin_aligned_addr = mem.alignForward(usize, begin_addr, tls_image.alloc_align);
const start = begin_aligned_addr - begin_addr;
break :blk alloc_tls_area[start .. start + tls_image.alloc_size];
};
const tp_value = prepareTLS(tls_area);
setThreadPointer(tp_value);
}
|