Mach-Oを力任せに解析&再現、Macの実行ファイルを自作しちゃる!

2025-03-08

自作したリンカーはELFは扱えるがMacOSで使われるMach-O形式には対応できてない。 Mach-Oの実行ファイルも自分で生成できるようになればリンカーにも適用できて嬉しい。 なので調べてみようという試み。

環境:Apple M1(aarch64), macOS Sequoia 15.3.1, Apple clang version 16.0.0

$ gcc --version
Apple clang version 16.0.0 (clang-1600.0.26.6)
Target: arm64-apple-darwin24.3.0
Thread model: posix
InstalledDir: /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin

概要

Mach-Oはヘッダとロードコマンドで構成されることは以前調べた通りで、 ヘッダのfiletypeMH_EXECUTE(=2)となっている。

実行ファイルでの注意点としてMacでは静的リンクは許されていないとのこと。 なので実行ファイルも単独で完結できず、標準ライブラリとの動的リンクの情報が必要となる。

Mach-O実行形式を調べてみる

C言語でハローワールドするコードをgccでコンパイルして出力された実行ファイルのバイナリを見てみた。 出力されたバイナリに含まれるロードコマンドはセグメント、シンボルテーブルなどなど。 まずはそのバイナリと同じ内容を出力するコードを作成して、構造体の内容を計算するように変えていった。

内容をいじれるようにする

そこから省いても動くロードコマンドを削除しようとしたが、なにをいじっても動かなくなる。 最初全然わからなかったのだけど「コードシグネチャ」と整合してないとまずいということに気づいた。

ちゃんとしたコードシグネチャを作るにはApple Developer登録して証明書を作成して、、、とか必要らしいがそんな面倒なことはやってられない。 作成した実行ファイルを配布するわけじゃなければ、アドホックなコード署名なら以下でできるらしい:

$ codesign -s - -i - -f a.out

コード

そんな感じで、Hello, world!を出力するMach-O実行ファイルを再構成するコードを作ってみた、以下部分ごとに。 コード全体はgist

インクルード、マクロ定義

#include <mach-o/compact_unwind_encoding.h>
#include <mach-o/fixup-chains.h>
#include <mach-o/ldsyms.h>
#include <mach-o/loader.h>
#include <mach-o/nlist.h>
#include <stdio.h>
#include <string.h>

#define ARRAYSIZE(a) (sizeof(a) / sizeof(a[0]))
#define ALIGN(x, a) (((x) + (a) - 1) & ~((a) - 1))
#define PAGE_SIZE 0x4000

// #define OUTPUT_SYMBTAB // シンボルテーブルを出力する?
  • シンボルテーブルは出力しなくても動作するので、切り替えられるようにした

マシンコード、データ

static const uint32_t text_code[] = {
0xa9bf7bfd, // 0000000100003f6c stp x29, x30, [sp, #-0x10]!
0x910003fd, // 0000000100003f70 mov x29, sp
0x90000000, // 0000000100003f74 adrp x0, 0 ; 0x100003000
0x913e6000, // 0000000100003f78 add x0, x0, #0xf98 ; literal pool for: "Hello, world!"
0x94000004, // 0000000100003f7c bl 0x100003f8c ; symbol stub for: _puts
0x52800000, // 0000000100003f80 mov w0, #0x0
0xa8c17bfd, // 0000000100003f84 ldp x29, x30, [sp], #0x10
0xd65f03c0, // 0000000100003f88 ret
};
static const uint32_t text_stub[] = {
0xb0000010, // 0000000100003f8c adrp x16, 1 ; 0x1000
0xf9400210, // 0000000100003f90 ldr x16, [x16]
0xd61f0200, // 0000000100003f94 br x16
};
static const char text_cstring[] = "Hello, world!";
  • aarch64のマシンコードと、ライブラリ関数_puts呼び出し用に自動生成されたスタブ

アンワインド、GOT

struct text_unwind_info_t {
struct unwind_info_section_header header;
struct unwind_info_section_header_index_entry index_entries[2];
uint64_t reserved1;
struct unwind_info_compressed_second_level_page_header second_level_page_header;
uint32_t second_level_entry_pages[1];
uint32_t second_level_encodings_pages[1];
} static const text_unwind_info = {
.header = {
.version = UNWIND_SECTION_VERSION,
.commonEncodingsArraySectionOffset = 0,
.commonEncodingsArrayCount = 0x00000000,
.personalityArraySectionOffset = 0,
.personalityArrayCount = 0x00000000,
.indexSectionOffset = offsetof(struct text_unwind_info_t, index_entries) + sizeof(struct unwind_info_section_header_index_entry) * 0,
.indexCount = 0x00000002,
},
.index_entries = {
{
.functionOffset = 0x00003f6c,
.secondLevelPagesSectionOffset = offsetof(struct text_unwind_info_t, second_level_page_header), // <-- 0にしても動く
.lsdaIndexArraySectionOffset = offsetof(struct text_unwind_info_t, second_level_page_header), // <-- 0にしても動く
},
{
.functionOffset = 0x00003f8c,
.secondLevelPagesSectionOffset = 0x00000000,
.lsdaIndexArraySectionOffset = offsetof(struct text_unwind_info_t, second_level_page_header), // <-- 0にしても動く
},
},
.second_level_page_header = {
.kind = UNWIND_SECOND_LEVEL_COMPRESSED,
.entryPageOffset = offsetof(struct text_unwind_info_t, second_level_entry_pages) - offsetof(struct text_unwind_info_t, second_level_page_header),
.entryCount = 0x01,
.encodingsPageOffset = offsetof(struct text_unwind_info_t, second_level_encodings_pages) - offsetof(struct text_unwind_info_t, second_level_page_header),
.encodingsCount = 0x01,
},
.second_level_entry_pages = {
0x00000000,
},
.second_level_encodings_pages = {
UNWIND_ARM_MODE_DWARF | 0x000000,
},
};
static const uint64_t data_const_got[] = {
0x8000000000000000, // 0x004000
};
  • 正直、内容よくわかってない…
  • Cで作成したコードにアンワインドとか必要なのか?と思うんだけど、ないと動かない
  • GOT (Global Offset Table)

間接シンボル、シンボル文字列

#ifdef OUTPUT_SYMBTAB
static const uint32_t indirectsym[] = {
2,
2,
};
static const char symstring[0x28] =
"\0"
_MH_EXECUTE_SYM "\0"
"_main\0"
"_puts\0";
#endif
  • 間接シンボルの2は_putsを指している(なぜ2つあるのかは不明)
  • シンボル名はnlist_64構造体の.n_un.n_strxによってオフセットで参照される

動的リンカー関連

static const char dylinker_name[0x14] = "/usr/lib/dyld";
static const char loaddylib_name[0x20] = "/usr/lib/libSystem.B.dylib";
  • 不明

0埋め用ユーティリティ関数

void put_padding(FILE *fp, size_t offset) {
size_t pos = ftell(fp);
if (pos < offset) {
size_t n = offset - pos;
for (size_t i = 0; i < n; ++i)
fputc(0, fp);
}
}

メイン関数:変数宣言、ロードコマンド

int main(void) {
// ヘッダ
struct mach_header_64 header;
// セクション
struct section_64 section0s[0];
struct section_64 section1s[4];
struct section_64 section2s[1];
struct section_64 section3s[0];
struct section_64* sections[] = {section0s, section1s, section2s, section3s};
size_t section_sizes[] = {sizeof(section0s), sizeof(section1s), sizeof(section2s), sizeof(section3s)};
// ロードコマンド
struct segment_command_64 segmentcmds[ARRAYSIZE(sections)];
struct linkedit_data_command dyldchainedfixupcmd;
#ifdef OUTPUT_SYMBTAB
struct symtab_command symtabcmd;
struct dysymtab_command dysymtabcmd;
#endif
struct dylinker_command loaddylinkercmd;
struct entry_point_command entrypointcmd;
struct dylib_command loaddyldcmd;

struct commanddata {
void *command;
size_t size;
void *extra_data;
size_t extra_size;
} const load_commands[] = {
{&segmentcmds[0], sizeof(segmentcmds[0]), section0s, sizeof(section0s)},
{&segmentcmds[1], sizeof(segmentcmds[1]), section1s, sizeof(section1s)},
{&segmentcmds[2], sizeof(segmentcmds[2]), section2s, sizeof(section2s)},
{&segmentcmds[3], sizeof(segmentcmds[3]), section3s, sizeof(section3s)},
{&dyldchainedfixupcmd, sizeof(dyldchainedfixupcmd)},
#ifdef OUTPUT_SYMBTAB
{&symtabcmd, sizeof(symtabcmd)},
{&dysymtabcmd, sizeof(dysymtabcmd)},
#endif
{&loaddylinkercmd, sizeof(loaddylinkercmd), (void*)dylinker_name, sizeof(dylinker_name)},
{&entrypointcmd, sizeof(entrypointcmd)},
{&loaddyldcmd, sizeof(loaddyldcmd), (void*)loaddylib_name, sizeof(loaddylib_name)},
};

size_t sizeofcmds = 0;
for (size_t i = 0; i < ARRAYSIZE(load_commands); ++i) {
const struct commanddata *cmd = &load_commands[i];
sizeofcmds += cmd->size + cmd->extra_size;
}
  • ロードコマンド:セグメントx4と動的リンク関連など、合計8個
  • ロードコマンドすべての合計サイズを先に計算

セクション情報

// int main(void) {
uint64_t vmaddr = 0x100000000;

const uint32_t text_start_off = 0;
const uint64_t text_total_size = sizeof(text_code) + sizeof(text_stub) + ALIGN(sizeof(text_cstring), 8) + sizeof(text_unwind_info);
const uint32_t text_code_off = ALIGN(text_start_off + sizeof(header) + sizeofcmds, PAGE_SIZE) - text_total_size;
const uint64_t entryoff = text_code_off + 0; // TODO
section1s[0] = (struct section_64){
.sectname = SECT_TEXT,
.segname = SEG_TEXT,
.addr = vmaddr + text_code_off,
.size = sizeof(text_code),
.offset = text_code_off,
.align = 2, // 2^2
.reloff = 0, //reloc_start_off,
.nreloc = 0, //sizeof(relocs) / sizeof(*relocs),
.flags = S_ATTR_PURE_INSTRUCTIONS | S_ATTR_SOME_INSTRUCTIONS,
};
const uint32_t text_stub_off = text_code_off + sizeof(text_code);
section1s[1] = (struct section_64){
.sectname = "__stubs",
.segname = SEG_TEXT,
.addr = vmaddr + text_stub_off,
.size = sizeof(text_stub),
.offset = text_stub_off,
.align = 2, // 2^2
.reloff = 0, //reloc_start_off,
.nreloc = 0, //sizeof(relocs) / sizeof(*relocs),
.flags = S_ATTR_PURE_INSTRUCTIONS | S_ATTR_SOME_INSTRUCTIONS | S_SYMBOL_STUBS,
.reserved1 = 0x00000000, // <-- Indirect Sym Index?
.reserved2 = sizeof(text_stub), // <-- サイズ
};
const uint32_t text_cstring_off = text_stub_off + sizeof(text_stub);
section1s[2] = (struct section_64){
.sectname = "__cstring",
.segname = SEG_TEXT,
.addr = vmaddr + text_cstring_off,
.size = sizeof(text_cstring),
.offset = text_cstring_off,
.align = 0, // 2^0
.reloff = 0, //reloc_start_off,
.nreloc = 0, //sizeof(relocs) / sizeof(*relocs),
.flags = S_CSTRING_LITERALS,
};
const uint32_t text_unwind_info_off = text_cstring_off + ALIGN(sizeof(text_cstring), 8);
section1s[3] = (struct section_64){
.sectname = "__unwind_info",
.segname = SEG_TEXT,
.addr = vmaddr + text_unwind_info_off,
.size = sizeof(text_unwind_info),
.offset = text_unwind_info_off,
.align = 2, // 2^2
.reloff = 0, //reloc_start_off,
.nreloc = 0, //sizeof(relocs) / sizeof(*relocs),
.flags = 0,
};

const uint32_t data_start_off = ALIGN(text_code_off + text_total_size, PAGE_SIZE);
const uint64_t data_total_size = sizeof(data_const_got);
const uint32_t data_const_got_off = data_start_off;
section2s[0] = (struct section_64){
.sectname = "__got",
.segname = "__DATA_CONST",
.addr = vmaddr + data_const_got_off,
.size = sizeof(data_const_got),
.offset = data_const_got_off,
.align = 3, // 2^3
.reloff = 0, //reloc_start_off,
.nreloc = 0, //sizeof(relocs) / sizeof(*relocs),
.flags = S_NON_LAZY_SYMBOL_POINTERS,
.reserved1 = 0x00000001, // ?
};

const uint32_t linkedit_start_off = ALIGN(data_start_off + data_total_size, PAGE_SIZE);
  • セグメントごとのセクション情報:
    1. ゼロページ __PAGEZERO
      • 仮想アドレス空間の確保のみで、内容は無し
    2. テキスト __TEXT
      • __text:aarch64のマシンコード
      • __stubprintfがコンパイルによってputs呼び出しに変換され、その動的解決用に自動生成されたもの
      • __cstring:文字列
      • __unwind_info:アンワインド情報(内容不明)
    3. データ __DATA_CONST
      • __got:GOT(内容不明)
    4. リンクエディット __LINKEDIT
      • セクションは無し
  • セクションの中身が出力するファイル中のどこに配置されているかのオフセットを計算
    • 実行時の仮想アドレスもそれに対応している
    • ファイル内でもページ単位に配置する必要があるためか、テキストセクションはMach-Oヘッダを含む先頭ページに配置されている
  • リンクエディットは動的リンクに使われるっぽい

チェインドフィックスアップ

// int main(void) {
struct dyld_chained_fixups_t {
struct dyld_chained_fixups_header header;
// uint32_t reserved1; // padding
struct { // struct dyld_chained_starts_in_image
uint32_t seg_count;
uint32_t seg_info_offset[ALIGN(4, 2)];
} starts;
struct dyld_chained_starts_in_segment starts_in_segment;
struct dyld_chained_import imports[1];
// uint32_t reserved2; // padding
char symbol[8];
} const dyld_chained_fixups = {
.header = {
.fixups_version = 0x00000000,
.starts_offset = offsetof(struct dyld_chained_fixups_t, starts),
.imports_offset = offsetof(struct dyld_chained_fixups_t, imports),
.symbols_offset = offsetof(struct dyld_chained_fixups_t, symbol),
.imports_count = 0x00000001,
.imports_format = DYLD_CHAINED_IMPORT,
.symbols_format = 0x00000000, // Uncompressed
},
.starts = { // struct dyld_chained_starts_in_image
.seg_count = 4,
{
0x00000000,
0x00000000,
offsetof(struct dyld_chained_fixups_t, starts_in_segment) - offsetof(struct dyld_chained_fixups_t, starts),
0x00000000,
},
},
.starts_in_segment = {
.size = sizeof(((struct dyld_chained_fixups_t*)NULL)->starts_in_segment),
.page_size = PAGE_SIZE,
.pointer_format = DYLD_CHAINED_PTR_64_OFFSET,
.segment_offset = data_start_off,
.max_valid_pointer = 0x00000000,
.page_count = (data_total_size + (PAGE_SIZE - 1)) / PAGE_SIZE,
.page_start = {0x0000},
},
.imports = {
{.lib_ordinal = 1, .weak_import = 0, .name_offset = 1}, // _puts
},
.symbol = "\0_puts",
};
  • 正確な内容は把握してないが、実行に関する動的シンボルの解決はこいつで行われるっぽい
    • なのでシンボルテーブルや動的シンボルテーブルコマンドは別に必須ではない
  • セグメント4つのうち、[2]のデータセグメントのみ
    • データセグメントの内容はGOTで、_putsをどう解決するか示していると推測

シンボルテーブル

// int main(void) {
#ifdef OUTPUT_SYMBTAB
const uint32_t null_nameofs = 0;
const uint32_t mh_execute_header_nameofs = null_nameofs + strlen("") + 1;
const uint32_t main_nameofs = mh_execute_header_nameofs + strlen(_MH_EXECUTE_SYM) + 1;
const uint32_t puts_nameofs = main_nameofs + strlen("_main") + 1;
const uint32_t nlocalsym = 0;
const uint32_t nextdefsym = 2; // __mh_execute_header, _main
const uint32_t nundefsym = 1; // _puts
const struct nlist_64 symbols[] = {
{
.n_un.n_strx = mh_execute_header_nameofs,
.n_type = N_SECT | N_EXT,
.n_sect = 0x01,
.n_desc = 0x0010,
.n_value = vmaddr
},
{
.n_un.n_strx = main_nameofs,
.n_type = N_SECT | N_EXT,
.n_sect = 0x01,
.n_desc = 0x0000,
.n_value = vmaddr + text_code_off,
},
{
.n_un.n_strx = puts_nameofs,
.n_type = N_EXT,
.n_sect = NO_SECT,
.n_desc = (1 << 8) | 0, // ordinal=1 (index of dyld)
},
};
#endif
  • 必須ではないが念の為
  • 定義済みシンボルは__mh_execute_header_mainの2つ
  • _putsは動的ライブラリのものを参照するので、未定義シンボル
  • nlist_64構造体のn_descの内容がよくわかってない

ロードコマンドとヘッダの内容構築

// int main(void) {
segmentcmds[0] = (struct segment_command_64){ // __PAGEZERO
.cmd = LC_SEGMENT_64,
.cmdsize = sizeof(segmentcmds[0]) + sizeof(section0s),
.segname = SEG_PAGEZERO,
.vmaddr = 0,
.vmsize = vmaddr,
.fileoff = 0,
.filesize = 0,
.maxprot = VM_PROT_NONE, // ---
.initprot = VM_PROT_NONE, // ---
.nsects = ARRAYSIZE(section0s),
.flags = 0,
};
segmentcmds[1] = (struct segment_command_64){ // __TEXT
.cmd = LC_SEGMENT_64,
.cmdsize = sizeof(segmentcmds[1]) + sizeof(section1s),
.segname = SEG_TEXT,
.vmaddr = vmaddr + text_start_off,
.vmsize = ALIGN(text_total_size, PAGE_SIZE),
.fileoff = text_start_off,
.filesize = ALIGN(text_total_size, PAGE_SIZE), // 念の為ALIGN、なくても動く?
.maxprot = VM_PROT_EXECUTE | VM_PROT_READ,
.initprot = VM_PROT_EXECUTE | VM_PROT_READ,
.nsects = ARRAYSIZE(section1s),
.flags = 0,
};
segmentcmds[2] = (struct segment_command_64){ // __DATA_CONST
.cmd = LC_SEGMENT_64,
.cmdsize = sizeof(segmentcmds[2]) + sizeof(section2s),
.segname = "__DATA_CONST",
.vmaddr = vmaddr + data_start_off,
.vmsize = ALIGN(data_total_size, PAGE_SIZE),
.fileoff = data_start_off,
.filesize = ALIGN(data_total_size, PAGE_SIZE), // 念の為ALIGN、なくても動く?
.maxprot = VM_PROT_WRITE | VM_PROT_READ,
.initprot = VM_PROT_WRITE | VM_PROT_READ,
.nsects = ARRAYSIZE(section2s),
.flags = SG_READ_ONLY,
};
uint64_t linkedit_total_size = sizeof(dyld_chained_fixups);
#ifdef OUTPUT_SYMBTAB
linkedit_total_size += sizeof(symbols) + sizeof(indirectsym) + sizeof(symstring);
#endif
segmentcmds[3] = (struct segment_command_64){ // __LINKEDIT
.cmd = LC_SEGMENT_64,
.cmdsize = sizeof(segmentcmds[3]) + sizeof(section3s),
.segname = SEG_LINKEDIT,
.vmaddr = vmaddr + linkedit_start_off,
.vmsize = ALIGN(linkedit_total_size, PAGE_SIZE),
.fileoff = linkedit_start_off,
.filesize = linkedit_total_size,
.maxprot = VM_PROT_READ,
.initprot = VM_PROT_READ,
.nsects = ARRAYSIZE(section3s),
.flags = 0,
};
const uint32_t dyld_chained_fixups_off = linkedit_start_off;
dyldchainedfixupcmd = (struct linkedit_data_command){
.cmd = LC_DYLD_CHAINED_FIXUPS,
.cmdsize = sizeof(dyldchainedfixupcmd),
.dataoff = dyld_chained_fixups_off,
.datasize = sizeof(dyld_chained_fixups),
};
const uint32_t symtab_off = dyld_chained_fixups_off + sizeof(dyld_chained_fixups);
#ifdef OUTPUT_SYMBTAB
const uint32_t indirectsym_off = symtab_off + sizeof(symbols);
const uint32_t symstring_off = indirectsym_off + sizeof(indirectsym);
symtabcmd = (struct symtab_command){
.cmd = LC_SYMTAB,
.cmdsize = sizeof(symtabcmd),
.symoff = symtab_off,
.nsyms = ARRAYSIZE(symbols),
.stroff = symstring_off,
.strsize = sizeof(symstring),
};
dysymtabcmd = (struct dysymtab_command){
.cmd = LC_DYSYMTAB,
.cmdsize = sizeof(dysymtabcmd),
.ilocalsym = 0,
.nlocalsym = nlocalsym,
.iextdefsym = 0 + nlocalsym,
.nextdefsym = nextdefsym,
.iundefsym = 0 + nlocalsym + nextdefsym,
.nundefsym = nundefsym,
.tocoff = 0x00000000,
.ntoc = 0x00000000,
.modtaboff = 0x00000000,
.nmodtab = 0x00000000,
.extrefsymoff = 0x00000000,
.nextrefsyms = 0x00000000,
.indirectsymoff = indirectsym_off,
.nindirectsyms = ARRAYSIZE(indirectsym),
.extreloff = 0x00000000,
.nextrel = 0x00000000,
.locreloff = 0x00000000,
.nlocrel = 0x00000000,
};
#endif
loaddylinkercmd = (struct dylinker_command){
.cmd = LC_LOAD_DYLINKER,
.cmdsize = sizeof(loaddylinkercmd) + sizeof(dylinker_name),
.name = sizeof(loaddylinkercmd),
};
entrypointcmd = (struct entry_point_command){
.cmd = LC_MAIN,
.cmdsize = sizeof(entrypointcmd),
.entryoff = entryoff,
.stacksize = 0x0000000000000000,
};
loaddyldcmd = (struct dylib_command){
.cmd = LC_LOAD_DYLIB,
.cmdsize = sizeof(loaddyldcmd) + sizeof(loaddylib_name),
{
.name = sizeof(struct dylib_command),
.timestamp = 0x00000002,
.current_version = 0x05470000,
.compatibility_version = 0x00010000,
},
};

header = (struct mach_header_64){
.magic = MH_MAGIC_64,
.cputype = CPU_TYPE_ARM64,
.cpusubtype = 0,
.filetype = MH_EXECUTE,
.ncmds = ARRAYSIZE(load_commands),
.sizeofcmds = sizeofcmds,
.flags = MH_NOUNDEFS | MH_DYLDLINK | MH_TWOLEVEL | MH_PIE,
};
  • 各セグメントの.filesizeはページサイズ(4096)にアライメントしなくても動くっぽい
  • 動的リンカー関連はよくわからず…
  • エントリポイントコマンドではユーザコードの開始関数である_main関数を指すようentryoffを指定
    • しかし実行時の仮想アドレスじゃなくファイル内のオフセットなのが謎

標準出力に書き出す

// int main(void) {
FILE *fp = stdout;

fwrite(&header, sizeof(header), 1, fp);
for (size_t i = 0; i < ARRAYSIZE(load_commands); ++i) {
const struct commanddata *cmd = &load_commands[i];
fwrite(cmd->command, cmd->size, 1, fp);
if (cmd->extra_data != NULL)
fwrite(cmd->extra_data, cmd->extra_size, 1, fp);
}

struct {
const void *data;
size_t offset;
size_t size;
} const table[] = {
{ text_code, text_code_off, sizeof(text_code) },
{ text_stub, text_stub_off, sizeof(text_stub) },
{ text_cstring, text_cstring_off, sizeof(text_cstring) },
{ &text_unwind_info, text_unwind_info_off, sizeof(text_unwind_info) },
{ data_const_got, data_const_got_off, sizeof(data_const_got) },
{ &dyld_chained_fixups, dyldchainedfixupcmd.dataoff, sizeof(dyld_chained_fixups) },
#ifdef OUTPUT_SYMBTAB
{ symbols, symtabcmd.symoff, sizeof(symbols) },
{ indirectsym, dysymtabcmd.indirectsymoff, sizeof(indirectsym) },
{ symstring, symstring_off, sizeof(symstring) },
#endif
};

for (size_t i = 0; i < sizeof(table) / sizeof(table[0]); ++i) {
put_padding(fp, table[i].offset);
fwrite(table[i].data, table[i].size, 1, fp);
}
}
  • ファイル内のオフセットをあらかじめ計算しているので、それに合わせて順に書き出すのみ

あれこれ

  • loaddyldcmdloaddylib_nameがなくても実行自体は可能っぽい
    • しかしnm -x hello-machoとするとLLVM ERROR: bad chained fixups: import #0 bad library ordinal: 1というエラーが出る
  • 元のgccからの出力ではもっと色々なロードコマンドがあるが、省いでも動いた:
    • uuid_command
    • build_version_command
    • source_version_command
    • linkedit_data_command
      • LC_DATA_IN_CODE
      • LC_FUNCTION_STARTS
      • LC_DYLD_EXPORTS_TRIE
      • LC_CODE_SIGNATURE

締め

  • いろいろ推測で書いてるので、実際にはいろいろ間違ってる可能性はある
    • 本家の資料が見つからないってどうなの…
  • GOTやアンワインド、動的リンクがわかってないので、リンカーに実装するにはまだ壁がある

動かし方

$ gcc gen-macho-exe.c    # 実行ファイル生成
$ ./a.out > hello-macho # Mach-Oの実行形式出力
$ codesign -s - -i - -f hello-macho # アドホックなコード署名
$ chmod 755 hello-macho # 実行権限を付加
$ ./hello-macho # 実行
Hello, world!

出力されるファイル内容

# hello-macho
00000000: cf fa ed fe 0c 00 00 01 00 00 00 00 02 00 00 00 ................
00000010: 08 00 00 00 30 03 00 00 85 00 20 00 00 00 00 00 ....0..... .....
# struct mach_header_64
# .magic: 0xfeedfacf,
# .cputype: 0x0100000c,
# .cpusubtype: 0x00000000,
# .filetype: 0x00000002,
# .ncmds: 0x00000008,
# .sizeofcmds: 0x00000330,
# .flags: 0x00200085,

00000020: 19 00 00 00 48 00 00 00 5f 5f 50 41 47 45 5a 45 ....H...__PAGEZE
00000030: 52 4f 00 00 00 00 00 00 00 00 00 00 00 00 00 00 RO..............
00000040: 00 00 00 00 01 00 00 00 00 00 00 00 00 00 00 00 ................
00000050: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
00000060: 00 00 00 00 00 00 00 00 ........
# struct segment_command_64
# .cmd: LC_SEGMENT_64 (0x19),
# .cmdsize: 0x00000048,
# .segname: "__PAGEZERO",
# .vmaddr: 0x0000000000000000,
# .vmsize: 0x0000000100000000,
# .fileoff: 0x0000000000000000,
# .filesize: 0x0000000000000000,
# .maxprot: 0x00000000,
# .initprot: 0x00000000,
# .nsects: 0x00000000,
# .flags: 0x00000000,

00000068: 19 00 00 00 88 01 00 00 ........
00000070: 5f 5f 54 45 58 54 00 00 00 00 00 00 00 00 00 00 __TEXT..........
00000080: 00 00 00 00 01 00 00 00 00 40 00 00 00 00 00 00 .........@......
00000090: 00 00 00 00 00 00 00 00 00 40 00 00 00 00 00 00 .........@......
000000a0: 05 00 00 00 05 00 00 00 04 00 00 00 00 00 00 00 ................
# struct segment_command_64
# .cmd: LC_SEGMENT_64 (0x19),
# .cmdsize: 0x00000188,
# .segname: "__TEXT",
# .vmaddr: 0x0000000100000000,
# .vmsize: 0x0000000000004000,
# .fileoff: 0x0000000000000000,
# .filesize: 0x0000000000004000,
# .maxprot: 0x00000005,
# .initprot: 0x00000005,
# .nsects: 0x00000004,
# .flags: 0x00000000,

000000b0: 5f 5f 74 65 78 74 00 00 00 00 00 00 00 00 00 00 __text..........
000000c0: 5f 5f 54 45 58 54 00 00 00 00 00 00 00 00 00 00 __TEXT..........
000000d0: 6c 3f 00 00 01 00 00 00 20 00 00 00 00 00 00 00 l?...... .......
000000e0: 6c 3f 00 00 02 00 00 00 00 00 00 00 00 00 00 00 l?..............
000000f0: 00 04 00 80 00 00 00 00 00 00 00 00 00 00 00 00 ................
# struct section_64
# .sectname: "__text",
# .segname: "__TEXT",
# .addr: 0x0000000100003f6c,
# .size: 0x0000000000000020,
# .offset: 0x00003f6c,
# .align: 0x00000002,
# .reloff: 0x00000000,
# .nreloc: 0x00000000,
# .flags: 0x80000400,

00000100: 5f 5f 73 74 75 62 73 00 00 00 00 00 00 00 00 00 __stubs.........
00000110: 5f 5f 54 45 58 54 00 00 00 00 00 00 00 00 00 00 __TEXT..........
00000120: 8c 3f 00 00 01 00 00 00 0c 00 00 00 00 00 00 00 .?..............
00000130: 8c 3f 00 00 02 00 00 00 00 00 00 00 00 00 00 00 .?..............
00000140: 08 04 00 80 00 00 00 00 0c 00 00 00 00 00 00 00 ................
# struct section_64
# .sectname: "__stubs",
# .segname: "__TEXT",
# .addr: 0x0000000100003f8c,
# .size: 0x000000000000000c,
# .offset: 0x00003f8c,
# .align: 0x00000002,
# .reloff: 0x00000000,
# .nreloc: 0x00000000,
# .flags: 0x80000408,

00000150: 5f 5f 63 73 74 72 69 6e 67 00 00 00 00 00 00 00 __cstring.......
00000160: 5f 5f 54 45 58 54 00 00 00 00 00 00 00 00 00 00 __TEXT..........
00000170: 98 3f 00 00 01 00 00 00 0e 00 00 00 00 00 00 00 .?..............
00000180: 98 3f 00 00 00 00 00 00 00 00 00 00 00 00 00 00 .?..............
00000190: 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
# struct section_64
# .sectname: "__cstring",
# .segname: "__TEXT",
# .addr: 0x0000000100003f98,
# .size: 0x000000000000000e,
# .offset: 0x00003f98,
# .align: 0x00000000,
# .reloff: 0x00000000,
# .nreloc: 0x00000000,
# .flags: 0x00000002,

000001a0: 5f 5f 75 6e 77 69 6e 64 5f 69 6e 66 6f 00 00 00 __unwind_info...
000001b0: 5f 5f 54 45 58 54 00 00 00 00 00 00 00 00 00 00 __TEXT..........
000001c0: a8 3f 00 00 01 00 00 00 58 00 00 00 00 00 00 00 .?......X.......
000001d0: a8 3f 00 00 02 00 00 00 00 00 00 00 00 00 00 00 .?..............
000001e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
# struct section_64
# .sectname: "__unwind_info",
# .segname: "__TEXT",
# .addr: 0x0000000100003fa8,
# .size: 0x0000000000000058,
# .offset: 0x00003fa8,
# .align: 0x00000002,
# .reloff: 0x00000000,
# .nreloc: 0x00000000,
# .flags: 0x00000000,

000001f0: 19 00 00 00 98 00 00 00 5f 5f 44 41 54 41 5f 43 ........__DATA_C
00000200: 4f 4e 53 54 00 00 00 00 00 40 00 00 01 00 00 00 ONST.....@......
00000210: 00 40 00 00 00 00 00 00 00 40 00 00 00 00 00 00 .@.......@......
00000220: 00 40 00 00 00 00 00 00 03 00 00 00 03 00 00 00 .@..............
00000230: 01 00 00 00 10 00 00 00 ........
# struct segment_command_64
# .cmd: LC_SEGMENT_64 (0x19),
# .cmdsize: 0x00000098,
# .segname: "__DATA_CONST",
# .vmaddr: 0x0000000100004000,
# .vmsize: 0x0000000000004000,
# .fileoff: 0x0000000000004000,
# .filesize: 0x0000000000004000,
# .maxprot: 0x00000003,
# .initprot: 0x00000003,
# .nsects: 0x00000001,
# .flags: 0x00000010,

00000238: 5f 5f 67 6f 74 00 00 00 __got...
00000240: 00 00 00 00 00 00 00 00 5f 5f 44 41 54 41 5f 43 ........__DATA_C
00000250: 4f 4e 53 54 00 00 00 00 00 40 00 00 01 00 00 00 ONST.....@......
00000260: 08 00 00 00 00 00 00 00 00 40 00 00 03 00 00 00 .........@......
00000270: 00 00 00 00 00 00 00 00 06 00 00 00 01 00 00 00 ................
00000280: 00 00 00 00 00 00 00 00 ........
# struct section_64
# .sectname: "__got",
# .segname: "__DATA_CONST",
# .addr: 0x0000000100004000,
# .size: 0x0000000000000008,
# .offset: 0x00004000,
# .align: 0x00000003,
# .reloff: 0x00000000,
# .nreloc: 0x00000000,
# .flags: 0x00000006,

00000288: 19 00 00 00 48 00 00 00 ....H...
00000290: 5f 5f 4c 49 4e 4b 45 44 49 54 00 00 00 00 00 00 __LINKEDIT......
000002a0: 00 80 00 00 01 00 00 00 00 40 00 00 00 00 00 00 .........@......
000002b0: 00 80 00 00 00 00 00 00 58 00 00 00 00 00 00 00 ........X.......
000002c0: 01 00 00 00 01 00 00 00 00 00 00 00 00 00 00 00 ................
# struct segment_command_64
# .cmd: LC_SEGMENT_64 (0x19),
# .cmdsize: 0x00000048,
# .segname: "__LINKEDIT",
# .vmaddr: 0x0000000100008000,
# .vmsize: 0x0000000000004000,
# .fileoff: 0x0000000000008000,
# .filesize: 0x0000000000000058,
# .maxprot: 0x00000001,
# .initprot: 0x00000001,
# .nsects: 0x00000000,
# .flags: 0x00000000,

000002d0: 34 00 00 80 10 00 00 00 00 80 00 00 58 00 00 00 4...........X...
# struct linkedit_data_command
# .cmd: LC_DYLD_CHAINED_FIXUPS (0x80000034),
# .cmdsize: 0x00000010,
# .dataoff: 0x00008000,
# .datasize: 0x00000058,

000002e0: 0e 00 00 00 20 00 00 00 0c 00 00 00 .... .......
# struct dylinker_command
# .cmd: LC_LOAD_DYLINKER (0xe),
# .cmdsize: 0x00000020,
# .name: 0x0000000c,
000002ec: 2f 75 73 72 /usr
000002f0: 2f 6c 69 62 2f 64 79 6c 64 00 00 00 00 00 00 00 /lib/dyld.......

00000300: 28 00 00 80 18 00 00 00 6c 3f 00 00 00 00 00 00 (.......l?......
00000310: 00 00 00 00 00 00 00 00 ........
# struct entry_point_command
# .cmd: LC_MAIN (0x80000028),
# .cmdsize: 0x00000018,
# .entryoff: 0x0000000000003f6c,
# .stacksize: 0x0000000000000000,

00000318: 0c 00 00 00 38 00 00 00 ....8...
00000320: 18 00 00 00 02 00 00 00 00 00 47 05 00 00 01 00 ..........G.....
# struct dylib_command
# .cmd: LC_LOAD_DYLIB (0xc),
# .cmdsize: 0x00000038,
# .name: 0x00000018,
# .timestamp: 0x00000002,
# .current_version: 0x05470000,
# .compatibility_version: 0x00010000,
00000330: 2f 75 73 72 2f 6c 69 62 2f 6c 69 62 53 79 73 74 /usr/lib/libSyst
00000340: 65 6d 2e 42 2e 64 79 6c 69 62 00 00 00 00 00 00 em.B.dylib......

00003f6c: fd 7b bf a9 .{..
00003f70: fd 03 00 91 00 00 00 90 00 60 3e 91 04 00 00 94 .........`>.....
00003f80: 00 00 80 52 fd 7b c1 a8 c0 03 5f d6 ...R.{...._.
# __text

00003f8c: 10 00 00 b0 ....
00003f90: 10 02 40 f9 00 02 1f d6 ..@.....
# __stubs

00003f98: 48 65 6c 6c 6f 2c 20 77 Hello, w
00003fa0: 6f 72 6c 64 21 00 orld!.
# __cstring

00003fa8: 01 00 00 00 00 00 00 00 ........
00003fb0: 00 00 00 00 00 00 00 00 00 00 00 00 1c 00 00 00 ................
00003fc0: 02 00 00 00 6c 3f 00 00 40 00 00 00 40 00 00 00 ....l?..@...@...
00003fd0: 8c 3f 00 00 00 00 00 00 40 00 00 00 00 00 00 00 .?......@.......
00003fe0: 00 00 00 00 00 00 00 00 03 00 00 00 0c 00 01 00 ................
00003ff0: 10 00 01 00 00 00 00 00 00 00 00 04 00 00 00 00 ................
# __unwind_info

00004000: 00 00 00 00 00 00 00 80 ........
# __got

00008000: 00 00 00 00 1c 00 00 00 48 00 00 00 4c 00 00 00 ........H...L...
00008010: 01 00 00 00 01 00 00 00 00 00 00 00 04 00 00 00 ................
00008020: 00 00 00 00 00 00 00 00 14 00 00 00 00 00 00 00 ................
00008030: 18 00 00 00 00 40 06 00 00 40 00 00 00 00 00 00 .....@...@......
00008040: 00 00 00 00 01 00 00 00 01 02 00 00 00 5f 70 75 ............._pu
00008050: 74 73 00 00 00 00 00 00 ts......
# dyld_chained_fixups

リンク