| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968 |
- // Copyright (C) 2016 Y_Less
- //
- // Permission is hereby granted, free of charge, to any person obtaining a
- // copy of this software and associated documentation files (the "Software"),
- // to deal in the Software without restriction, including without limitation
- // the rights to use, copy, modify, merge, publish, distribute, sublicense,
- // and/or sell copies of the Software, and to permit persons to whom the
- // Software is furnished to do so, subject to the following conditions:
- //
- // The above copyright notice and this permission notice shall be included in
- // all copies or substantial portions of the Software.
- //
- // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- // DEALINGS IN THE SOFTWARE.
- #if defined CODESCAN_INC
- #endinput
- #endif
- #define CODESCAN_INC
- /*
- // Example:
- forward TailCall_FoundCallback(m[CodeScanner])
- main() {
- new scanner[CodeScanner];
- CodeScanInit(scanner);
-
- new csm0[CodeScanMatcher];
- CodeScanMatcherInit(csm0, &TailCall_FoundCallback);
- CodeScanMatcherPattern(csm0,
- OP(PUSH_C, ???)
- OP(CALL, &MyFunc)
- OP(RETN)
- );
- CodeScanAddMatcher(scanner, csm0);
-
- // Add other matcher patterns here.
-
- // Run all the scanners in parallel.
- CodeScanRun(scanner);
- }
- public TailCall_FoundCallback(m[CodeScanner]) {
- // Do something with the found address (of the START of the match), and the
- // stack size (of the END of the match) - different for reasons...
- }
- // Create a default call for this function, so that we can include it in the AMX
- // and take the address in "OP". Note that you do NOT need to do this for
- // scanner callbacks if you only use their address in "CodeScanMatcherInit".
- #define CALL@MyFunc MyFunc(0, "")
- stock MyFunc(a, b[], ...) {
- // Normal function.
- }
- */
- #include <core>
- #include "frame_info"
- #include "disasm"
- #include "asm"
- #include "addressof"
- #define SCANNER_FAIL_ON_INVALID (1)
- #define SCANNER_IGNORE_NOP (2)
- #define SCANNER_IGNORE_BREAK (4)
- #define SCANNER_NAME_FUNCTIONS (8)
- #define SCANNER_IGNORE_HALT (16)
- #define SCANNER_IGNORE_BOUNDS (32)
- #define SCANNER_HAS_USER_DATA (64)
- #define O@I_ (0) // Type integer.
- #define O@U_ (1) // Type unknown (???).
- #define O@F_ (2) // Type function (&func).
- #define O@O_ (4) // Type opcode.
- #define O@S_ (5) // Type skipped.
- #define OP_TYPE_INTEGER_ (O@I_) // Type integer.
- #define OP_TYPE_UNKNOWN_ (O@U_) // Type unknown (???).
- #define OP_TYPE_FUNCTION_ (O@F_) // Type function (&func).
- #define OP_TYPE_OPCODE_ (O@O_) // Type opcode.
- #define OP_TYPE_SKIP_ (O@S_) // Type skipped.
- // If we can determine a function's name, we can determine if it is a public or
- // not. If we can't name it, it is a normal one. However, if naming is skipped
- // then we will have no idea what type it is.
- #define SCANNER_FUNC_PUBLIC (1)
- #define SCANNER_FUNC_OTHER (2)
- #define SCANNER_FUNC_AUTOMATA (3)
- #define SCANNER_FUNC_HALT (4)
- #define SCANNER_FUNC_UNKNOWN (5)
- #define SCANNER_FUNC_AUTOMATA_NO_NAME (7)
- #define SCANNER_FUNC_HALT_NO_NAME (8)
- // The "OP()" macro is used to easilly define code patterns to scan for:
- //
- // new csm[CodeScanMatcher];
- // CodeScanMatcherInit(csm, &callback);
- // CodeScanMatcherPattern(csm,
- // OP(CONST_PRI, 42)
- // OP(ADD_C, ???)
- // OP(CALL, &my_func)
- // )
- //
- // Any function that you want to take the address of in this way must have its
- // call pattern defined as:
- //
- // #define CALL@my_func my_func(0, "hi", false)
- //
- // Because otherwise a) the code can't guarantee that the function will be in
- // the final amx, and b) we need a call to it from which to extract the addr.
- //
- // You can use this style explcitly within an "OP" scanner, or there is a new
- // dedicated keyword for it - "addressof(func)" (note the lack of "&" there).
- //
- #define OP(%0) ,(_:O@T_:O@O_),(Opcode:O@X_:O@Y_:O@W_:$OP_%0)
- #define OP_%0\32;%1) OP_%0%1)
- #define O@X_:%9$%0,%1,%2) %0),(_:O@1_:O@2_:O@3_:$%1|||,%2)
- #define O@Y_:%9$%0,%1) %0),(_:O@1_:O@2_:O@3_:$%1|||)
- #define O@Z_:%9$%0) %0)
- #define O@W_:%9$%0) %0)
- #define O@T_:O@O_),(Opcode:O@X_:O@Y_:O@W_:$OP_???%0) O@S_),(0)
- #define O@1_:%9$%0???%1|||%2) O@U_ ),(_:O@X_:O@Y_:O@Z_:$0%2)
- #define O@2_:%9$%0&%1|||%2) O@F_),(O@A_()?(((CALL@%1),O@V_)?1:2):_:O@X_:O@Y_:O@Z_:$(O@V_)%2)
- #define O@3_:%9$%1|||%2) O@I_ ),(_:O@X_:O@Y_:O@Z_:$(%1)%2)
- #if !defined cellbytes
- #define cellbytes (cellbits / 8)
- #endif
- #if !defined CODE_SCAN_MAX_PATTERN
- #define CODE_SCAN_MAX_PATTERN (16)
- #endif
- #define CODE_SCAN_MAX_PATTERN_ARRAY (CODE_SCAN_MAX_PATTERN * 4)
- #define CODE_SCAN_MAX_HOLES (CODE_SCAN_MAX_PATTERN / 2)
- #if !defined CODE_SCAN_MAX_PARALLEL
- #define CODE_SCAN_MAX_PARALLEL (2)
- #endif
- #if !defined CODE_SCAN_MAX_JUMP_TARGETS
- #define CODE_SCAN_MAX_JUMP_TARGETS (32)
- #endif
- // All the information for scanning through an AMX and extracting lots of nice
- // information about it.
- enum CodeScanner {
- CodeScanMatch_func, // Start of the containing function.
- CodeScanMatch_size, // Size of the match.
- CodeScanMatch_type, // Public, normal, automata, etc.
- CodeScanMatch_heap, // At the point of this scanner.
- CodeScanMatch_stack, // At the point of this scanner.
- CodeScanMatch_params, // Likely unknown statically.
- CodeScanMatch_cip, // The point of the pattern match.
- CodeScanMatch_holes[CODE_SCAN_MAX_HOLES], // Results of "???"s.
- CodeScanMatch_name[32 char],
- CodeScanner_first,
- CodeScanner_minn,
- CodeScanner_jump_switch[CODE_SCAN_MAX_JUMP_TARGETS], // For "CASETBL" not regular jumps.
- CodeScanner_jump_target[CODE_SCAN_MAX_JUMP_TARGETS], // Zero when this slot is available.
- CodeScanner_jump_stack [CODE_SCAN_MAX_JUMP_TARGETS], // Sizes at the time of the jump.
- CodeScanner_jump_heap [CODE_SCAN_MAX_JUMP_TARGETS], // Sizes at the time of the jump.
- CodeScanner_state,
- CodeScanner_param
- }
- enum CodeScanMatcher {
- CodeScanMatcher_func, // A pointer to the callback.
- CodeScanMatcher_user_data, // User data to pass to their callback.
- CodeScanMatcher_code[CODE_SCAN_MAX_PATTERN_ARRAY], // The code to look for.
- CodeScanMatcher_len,
- CodeScanMatcher_offset[CODE_SCAN_MAX_PARALLEL], // Where the current scanner is in this code.
- CodeScanMatcher_start[CODE_SCAN_MAX_PARALLEL],
- CodeScanMatcher_holeidx[CODE_SCAN_MAX_PARALLEL],
- CodeScanMatcher_holes[CODE_SCAN_MAX_PARALLEL * CODE_SCAN_MAX_HOLES],
- CodeScanMatcher_next, // The next match array.
- CodeScanMatcher_flags // Customisation.
- }
- // This macro is to let anyone use `&callback` for a scanner callback without
- // having to define the `CALL@...` macro for the required parameters (since we
- // know to call scanner callbacks in this code).
- #define addressof_ScannerCallback_(%1) (O@A_()?(((%1((gCodeScanCallback_match))),O@V_)?1:2):(O@V_))
- stock
- gCodeScanCallback_match[CodeScanner];
- static stock
- gHdr[AMX_HDR],
- gBase,
- gDat;
- static stock bool:CodeScanCheckJumpTarget(cip, deloc, &stk, &hea, jumpTargets[CodeScanner], num = CODE_SCAN_MAX_JUMP_TARGETS) {
- // Use "minn" to restrict the number of jump targets that we check. Returns
- // "true" if the current address is equal to an address that any jump goes
- // to.
- new
- minn = jumpTargets[CodeScanner_minn],
- sip,
- count;
- while (num-- > minn) {
- if (jumpTargets[CodeScanner_jump_target][num]) {
- if ((sip = jumpTargets[CodeScanner_jump_switch][num])) {
- count = ReadAmxMemory(sip) + 1,
- sip += cellbytes;
- while (count--) {
- if (ReadAmxMemory(sip) == deloc) {
- return
- --jumpTargets[CodeScanner_jump_target][num],
- stk = jumpTargets[CodeScanner_jump_stack][num],
- hea = jumpTargets[CodeScanner_jump_heap][num],
- true;
- }
- sip += 2 * cellbytes;
- }
- } else if (jumpTargets[CodeScanner_jump_target][num] == cip) {
- return
- jumpTargets[CodeScanner_jump_target][num] = 0,
- stk = jumpTargets[CodeScanner_jump_stack][num],
- hea = jumpTargets[CodeScanner_jump_heap][num],
- true;
- }
- }
- }
- return false;
- }
- static stock CodeScanResetJumpTargets(jumpTargets[CodeScanner], num = CODE_SCAN_MAX_JUMP_TARGETS) {
- jumpTargets[CodeScanner_minn] = num;
- while (num--) {
- jumpTargets[CodeScanner_jump_target][num] = 0;
- }
- }
- static stock CodeScanAddJumpTarget(cip, stk, hea, jumpTargets[CodeScanner], num = CODE_SCAN_MAX_JUMP_TARGETS) {
- while (num--) {
- // Multiple jumps to the same place?
- if (jumpTargets[CodeScanner_jump_target][num] == cip) {
- return;
- } else if (!jumpTargets[CodeScanner_jump_target][num]) {
- jumpTargets[CodeScanner_jump_switch][num] = 0;
- jumpTargets[CodeScanner_jump_target][num] = cip;
- jumpTargets[CodeScanner_jump_stack][num] = stk;
- jumpTargets[CodeScanner_jump_heap][num] = hea;
- jumpTargets[CodeScanner_minn] = min(jumpTargets[CodeScanner_minn], num);
- return;
- }
- }
- }
- static stock CodeScanAddSwitchTarget(dctx[DisasmContext], stk, hea, jumpTargets[CodeScanner], num = CODE_SCAN_MAX_JUMP_TARGETS) {
- new
- sip = DisasmGetOperand(dctx) - gBase,
- codepos = sip + gHdr[AMX_HDR_DAT] - gHdr[AMX_HDR_COD];
- if (codepos < 0 || codepos > gHdr[AMX_HDR_DAT] || UnrelocateOpcode(Opcode:ReadAmxMemory(sip)) != OP_CASETBL) {
- // Can happen when we parse "RelocateOpcodeNow" because it has an
- // explicit "#emit switch 0" in.
- return;
- }
- while (num--) {
- // Multiple jumps to the same place?
- if (!jumpTargets[CodeScanner_jump_target][num]) {
- jumpTargets[CodeScanner_jump_switch][num] = sip + cellbytes,
- jumpTargets[CodeScanner_jump_target][num] = ReadAmxMemory(sip + cellbytes) + 1,
- jumpTargets[CodeScanner_jump_stack][num] = stk,
- jumpTargets[CodeScanner_jump_heap][num] = hea,
- jumpTargets[CodeScanner_minn] = min(jumpTargets[CodeScanner_minn], num);
- return;
- }
- }
- }
- static stock CodeScanReset(cs[CodeScanMatcher], &next) {
- static
- lReset[CODE_SCAN_MAX_PARALLEL];
- next = cs[CodeScanMatcher_next],
- cs[CodeScanMatcher_offset] = lReset,
- cs[CodeScanMatcher_holeidx] = lReset;
- if (!cs[CodeScanMatcher_func]) {
- cs[CodeScanMatcher_len] = 0;
- }
- }
- stock CodeScanAddMatcher(scanner[CodeScanner], searcher[CodeScanMatcher]) {
- searcher[CodeScanMatcher_next] = scanner[CodeScanner_first],
- scanner[CodeScanner_first] = ref(searcher);
- }
- stock CodeScanMatcherInit_(searcher[CodeScanMatcher], address, flags = SCANNER_IGNORE_NOP | SCANNER_IGNORE_BOUNDS | SCANNER_IGNORE_BREAK | SCANNER_IGNORE_HALT) {
- // This used to look the function up by name from the public functions
- // table, but that was very silly since we already had code in this exact
- // file to get the address of ANY function at run-time (`addressof`). The
- // only difference between the normal `addressof` and the one used here is
- // that because we know exactly what sort of functions we are expecting, we
- // know exactly what parameters they require to construct the fake call, so
- // we can do away with the `CALL@...` macro requirement for defining the
- // standard call pattern. This also means that we actually ensure that the
- // passed function has the correct shape.
- searcher[CodeScanMatcher_func] = address,
- searcher[CodeScanMatcher_flags] = flags & ~SCANNER_HAS_USER_DATA,
- searcher[CodeScanMatcher_next] = -1,
- searcher[CodeScanMatcher_len] = 0,
- searcher[CodeScanMatcher_user_data] = 0,
- CodeScanReset(searcher, flags);
- }
- stock CodeScanMatcherData(searcher[CodeScanMatcher], val) {
- // Use `ref()` to pass an array.
- searcher[CodeScanMatcher_flags] |= SCANNER_HAS_USER_DATA,
- searcher[CodeScanMatcher_user_data] = val;
- }
- // Will not call the function because the check will fail, but will not compile
- // if the function doesn't exist, while still passing it in as a string.
- #define CodeScanMatcherInit(%0,&%1) CodeScanMatcherInit_((%0),addressof_ScannerCallback_(%1))
- #define CodeScanMatcherInit_(%0,addressof_ScannerCallback_(%1,%2)) CodeScanMatcherInit_(%0,addressof_ScannerCallback_(%1),%2)
- stock CodeScanMatcherPattern_(searcher[CodeScanMatcher], {Opcode, Float, _}:...) {
- new
- len = numargs() - 1;
- if (len > CODE_SCAN_MAX_PATTERN_ARRAY) {
- return -1;
- }
- if (len & 0x01) {
- // Not a multiple of 2 in the scanner.
- return -2;
- }
- for (new i = 0; i != len; ) {
- new
- optype = getarg(i + 1),
- Opcode:op = Opcode:getarg(i + 2);
- searcher[CodeScanMatcher_code][i + 0] = optype;
- searcher[CodeScanMatcher_code][i + 1] = _:op;
- i += 2;
- switch (optype) {
- case OP_TYPE_OPCODE_: {
- new opcount = GetOpcodeInstructionParameters(op);
- for (new partype; i != len; )
- {
- partype = getarg(i + 1);
- switch (partype) {
- case OP_TYPE_INTEGER_, OP_TYPE_UNKNOWN_, OP_TYPE_FUNCTION_: {
- // Got an unexpected parameter.
- if (opcount == 0) {
- return i / 2 + 1;
- }
- // Decrement the remaining number of parameters.
- // The variable OPs like `CASETBL` have negative
- // parameter counts, so will never not match.
- --opcount;
- searcher[CodeScanMatcher_code][i + 0] = partype;
- searcher[CodeScanMatcher_code][i + 1] = getarg(i + 2);
- i += 2;
- }
- default: {
- break;
- }
- }
- }
- // Missing a required (non-optional) parameter.
- if (opcount > 0) {
- return i / 2 + 1;
- }
- }
- case OP_TYPE_SKIP_: {
- }
- default: {
- // Incorrect parameter type. Return the op where it happened.
- return i / 2 + 1;
- }
- }
- }
- searcher[CodeScanMatcher_len] = len;
- // No error.
- return 0;
- }
- // Note the lack of trailing comma. This is to make the code patterns work.
- #define CodeScanMatcherPattern(%0, CodeScanMatcherPattern_(%0
- static stock CodeScanDeref(v) {
- static
- lFakeMatcher[CodeScanMatcher];
- #pragma unused v
- #emit load.s.pri 12 // First argument.
- #emit stor.s.pri 16 // Secret argument.
- #emit retn
- return lFakeMatcher; // Make compiler happy, and teach it the array return.
- }
- static stock bool:CodeScanCheck(Opcode:op, dctx[DisasmContext], cs[CodeScanMatcher], fctx[CodeScanner], &next) {
- // Returns an address of a callback if it passes.
- if (!cs[CodeScanMatcher_len]) {
- return
- next = cs[CodeScanMatcher_next],
- false;
- }
- new
- bool:zero = true,
- off = cs[CodeScanMatcher_flags];
- if (off) {
- // To deal with differences in different compilation modes, we just mark
- // these opcodes as fully ignorable (because they are mostly used for
- // debugging and not real user code).
- switch (op) {
- case OP_NOP: {
- if (off & SCANNER_IGNORE_NOP) {
- return
- next = cs[CodeScanMatcher_next],
- false;
- }
- }
- case OP_BOUNDS: {
- if (off & SCANNER_IGNORE_BOUNDS) {
- return
- next = cs[CodeScanMatcher_next],
- false;
- }
- }
- case OP_BREAK: {
- if (off & SCANNER_IGNORE_BREAK) {
- return
- next = cs[CodeScanMatcher_next],
- false;
- }
- }
- case OP_HALT: {
- if (off & SCANNER_IGNORE_HALT) {
- return
- next = cs[CodeScanMatcher_next],
- false;
- }
- }
- }
- }
- new
- cnt = DisasmGetNumOperands(dctx),
- len = cs[CodeScanMatcher_len];
- for (new idx = 0; idx != CODE_SCAN_MAX_PARALLEL; ++idx) {
- off = cs[CodeScanMatcher_offset][idx];
- // Ensure that only one of the parallel scanners starts from the
- // beginning on each instruction.
- if (off) {
- } else if (zero) {
- // Get the start point of this match.
- cs[CodeScanMatcher_start][idx] = DisasmGetCurIp(dctx),
- zero = false;
- } else {
- continue;
- }
- if (cs[CodeScanMatcher_code][off] == OP_TYPE_SKIP_) {
- off += 2;
- if (off == len) {
- return
- memcpy(fctx[CodeScanMatch_holes], cs[CodeScanMatcher_holes], idx * CODE_SCAN_MAX_HOLES, cs[CodeScanMatcher_holeidx][idx] * cellbytes, CODE_SCAN_MAX_HOLES),
- fctx[CodeScanMatch_cip] = cs[CodeScanMatcher_start][idx],
- fctx[CodeScanMatch_size] = DisasmGetNextIp(dctx) - cs[CodeScanMatcher_start][idx],
- true;
- } else if (cs[CodeScanMatcher_code][off] == OP_TYPE_OPCODE_ && Opcode:cs[CodeScanMatcher_code][off + 1] == op) {
- // Found the match after the current "missing" instruction.
- goto CodeScanCheck_pass;
- } else {
- // The "== op" check is done twice because in this case we don't
- // want to fail the scanner if it doesn't match.
- continue;
- }
- }
- if (cs[CodeScanMatcher_code][off] == OP_TYPE_OPCODE_ && Opcode:cs[CodeScanMatcher_code][off + 1] == op) {
- CodeScanCheck_pass:
- // Check if there are enough parameters for this opcode.
- off += 2;
- for (new i = 0; i != cnt; ++i) {
- switch (cs[CodeScanMatcher_code][off++]) {
- // Because we now abstract relocations to the disasm system,
- // we don't need to differentiate between fixed parameters
- // and function parameters any more - they are always fully
- // resolved.
- case OP_TYPE_INTEGER_, OP_TYPE_FUNCTION_: {
- if (cs[CodeScanMatcher_code][off++] != DisasmGetOperandReloc(dctx, i)) {
- goto CodeScanCheck_fail;
- }
- }
- case OP_TYPE_UNKNOWN_: {
- // Save the parameter.
- ++off,
- cs[CodeScanMatcher_holes][idx * CODE_SCAN_MAX_HOLES + cs[CodeScanMatcher_holeidx][idx]++] = DisasmGetOperandReloc(dctx, i);
- }
- case OP_TYPE_OPCODE_, OP_TYPE_SKIP_: {
- goto CodeScanCheck_fail;
- }
- }
- }
- if (off == len) {
- // Get the address of the START of the match.
- return
- memcpy(fctx[CodeScanMatch_holes], cs[CodeScanMatcher_holes][idx * CODE_SCAN_MAX_HOLES], 0, cs[CodeScanMatcher_holeidx][idx] * cellbytes, CODE_SCAN_MAX_HOLES),
- fctx[CodeScanMatch_cip] = cs[CodeScanMatcher_start][idx],
- fctx[CodeScanMatch_size] = DisasmGetNextIp(dctx) - cs[CodeScanMatcher_start][idx],
- true;
- } else switch (cs[CodeScanMatcher_code][off]) {
- case OP_TYPE_INTEGER_, OP_TYPE_FUNCTION_, OP_TYPE_UNKNOWN_: {
- // Parameters remaining, none expected.
- goto CodeScanCheck_fail;
- }
- default: {
- // Out of parameters to check but still looking correct.
- cs[CodeScanMatcher_offset][idx] = off;
- continue;
- }
- }
- }
- CodeScanCheck_fail:
- // The parameter is wrong.
- cs[CodeScanMatcher_holeidx][idx] = cs[CodeScanMatcher_offset][idx] = 0;
- }
- return
- next = cs[CodeScanMatcher_next],
- false;
- }
- static stock bool:CodeScanGetFuncName(addr, name[]) {
- // The "name" parameter is longer than 32 (which is the maximum function
- // name length normally) beacause we append states to some.
- // Name not found.
- new
- index = GetPublicIndexFromAddress(addr);
- if (index < 0) {
- return
- name[0] = 0,
- false;
- }
- // This code will not return great results for public functions with states.
- return
- GetPublicNameFromIndex(index, name, 32),
- true;
- }
- static stock bool:CodeScanStepInternal(dctx[DisasmContext], csState[CodeScanner], &parseState, &parseParam) {
- // Loop over the data. Since our end condition is "out of data", we know
- // that any "false" returns are because of invalid data since the "< 0"
- // check is also the only other way that "false" can be returned and we pre-
- // empt that one.
- switch (DisasmNext(dctx)) {
- case DISASM_OK: {
- new
- stk = csState[CodeScanMatch_stack],
- hea = csState[CodeScanMatch_heap],
- cip = DisasmGetCurIp(dctx),
- Opcode:op = DisasmGetOpcode(dctx);
- // The compiler sometimes inserts extra instructions like "NOP" and
- // "BREAK" for debugging and padding (as do we) - maybe ignore them.
- CodeScanCheckJumpTarget(cip, cip + gBase, stk, hea, csState);
- switch (op) {
- case OP_HALT: {
- if (parseState == 4) {
- csState[CodeScanMatch_type] = SCANNER_FUNC_HALT_NO_NAME,
- csState[CodeScanMatch_func] = cip,
- stk = hea = 0,
- CodeScanResetJumpTargets(csState);
- }
- }
- case OP_PROC: {
- // This is the start of a new function. The only functions
- // that don't start like this are the automata stubs.
- csState[CodeScanMatch_type] = SCANNER_FUNC_UNKNOWN,
- csState[CodeScanMatch_func] = cip,
- CodeScanResetJumpTargets(csState),
- stk = hea = parseState = 0;
- }
- case OP_LOAD_PRI: {
- // If we are not in the main functions yet and this is the
- // first instruction seen, then it is the start of an
- // automata function stub.
- if (parseState == 4) {
- csState[CodeScanMatch_type] = SCANNER_FUNC_AUTOMATA_NO_NAME,
- csState[CodeScanMatch_func] = cip,
- stk = hea = 0,
- CodeScanResetJumpTargets(csState);
- }
- }
- case OP_PUSH_PRI, OP_PUSH_ALT, OP_PUSH_R, OP_PUSH_S, OP_PUSH, OP_PUSH_ADR: {
- if (stk != cellmin) {
- stk += cellbytes;
- }
- parseState = 0;
- }
- case OP_STACK: {
- // The stack grows down, but our count is positive.
- if (stk != cellmin) {
- stk -= DisasmGetOperand(dctx);
- }
- parseState = 0;
- }
- case OP_HEAP: {
- if (hea != cellmin) {
- hea += DisasmGetOperand(dctx);
- }
- parseState = 0;
- }
- case OP_POP_PRI, OP_POP_ALT: {
- if (stk != cellmin) {
- stk -= cellbytes;
- }
- parseState = 0;
- }
- case OP_CALL, OP_CALL_PRI: {
- // Remove all the function parameters.
- if (parseState == 3) {
- stk -= parseParam;
- }
- parseState = 0;
- }
- case OP_PUSH_C: {
- // The "+ cellbytes" is because when calling a function, the
- // parameter is the number of bytes pushed, not including
- // this one, with that one implicitly popped on return.
- parseParam = DisasmGetOperand(dctx) + cellbytes;
- if (stk != cellmin) {
- stk += cellbytes,
- parseState = 3;
- }
- }
- // There is a code-get pattern of:
- //
- // LCTRL 5
- // ADD.C n
- // SCTRL 4
- //
- // Which adjusts the stack to the correct size after "goto". We
- // have to deal with that explcitly. Note that the "ADD.C" may
- // be missing if there are no variables currently in scope.
- case OP_LCTRL: {
- if (DisasmGetOperand(dctx) == 5) {
- parseParam = 0;
- parseState = 1;
- } else {
- parseState = 0;
- }
- }
- case OP_ADD_C: {
- if (parseState == 1) {
- parseParam = -DisasmGetOperand(dctx),
- parseState = 2;
- } else {
- parseState = 0;
- }
- }
- case OP_SCTRL: {
- // This is the tricky one, since it can mess up the stack in
- // strange ways. Deal with the case where it comes from
- // "goto", even though that is generally considered bad.
- switch (DisasmGetOperand(dctx)) {
- case 2: {
- hea = cellmin;
- }
- case 4: {
- switch (parseState) {
- case 1: {
- stk = 0;
- }
- case 2: {
- stk = parseParam;
- }
- default: {
- stk = cellmin;
- }
- }
- }
- case 5: {
- stk = cellmin;
- }
- }
- parseState = 0;
- }
- case OP_JUMP, OP_JZER, OP_JNZ, OP_JEQ, OP_JNEQ, OP_JLESS, OP_JLEQ, OP_JGRTR, OP_JGEQ, OP_JSLESS, OP_JSLEQ, OP_JSGRTR, OP_JSGEQ: {
- // Add a jump target. These require relocation as they are
- // translated to absolute RAM locations. "DisasmNeedReloc"
- // will return "true", but we don't need to call it.
- // Relocate it relative to "dat" not "cod" for simpler
- // comparisons - just see if the read address matches
- // instead of the true code address.
- //
- // val = val - (base + cod) + (cod - dat);
- // val = val - base - cod + cod - dat;
- // val = val - base - dat;
- // val = val - (base + dat);
- // base = base + dat;
- // val = val - base;
- //
- // Only jumps that go forwards.
- parseParam = DisasmGetOperand(dctx) - gBase,
- parseState = 0;
- if (parseParam > cip) {
- CodeScanAddJumpTarget(parseParam, stk, hea, csState);
- }
- }
- case OP_JREL: {
- // Add a jump target. Only jumps that go forwards.
- parseParam = DisasmGetOperand(dctx) + cip,
- parseState = 0;
- if (parseParam > cip) {
- CodeScanAddJumpTarget(parseParam, stk, hea, csState);
- }
- }
- case OP_SWITCH: {
- // Add a jump target. These are always forwards.
- CodeScanAddSwitchTarget(dctx, stk, hea, csState),
- parseState = 0;
- }
- default: {
- parseState = 0;
- }
- }
- csState[CodeScanMatch_stack] = stk,
- csState[CodeScanMatch_heap] = hea;
- }
- case DISASM_DONE: {
- return false;
- }
- case DISASM_NOP: {
- parseState = 0;
- }
- }
- return true;
- }
- stock bool:CodeScanStep(dctx[DisasmContext], csState[CodeScanner]) {
- return CodeScanStepInternal(dctx, csState, csState[CodeScanner_state], csState[CodeScanner_param]);
- }
- static stock CodeScanCall(cs[CodeScanMatcher], csState[CodeScanner]) {
- // If I wrote way more assembly I could get away with not calling
- // `CodeScanDeref(cur)` below, and not need to assign `param` to a variable
- // before pushing it. But I'm not going to - it isn't worth the effort.
- new
- func = cs[CodeScanMatcher_func];
- if (cs[CodeScanMatcher_flags] & SCANNER_HAS_USER_DATA) {
- new
- param = cs[CodeScanMatcher_user_data];
- #emit PUSH.S param
- #emit PUSH.S csState
- #emit PUSH.C 8
- #emit LCTRL 6
- #emit ADD.C 36
- #emit LCTRL 8
- #emit PUSH.pri
- #emit LOAD.S.pri func
- #emit SCTRL 6
- #emit STOR.S.pri func
- } else {
- #emit PUSH.S csState
- #emit PUSH.C 4
- #emit LCTRL 6
- #emit ADD.C 36
- #emit LCTRL 8
- #emit PUSH.pri
- #emit LOAD.S.pri func
- #emit SCTRL 6
- #emit STOR.S.pri func
- }
- return func;
- }
- stock bool:CodeScanRun(csState[CodeScanner]) {
- if (csState[CodeScanner_first] == -1) {
- return true;
- }
- new
- dctx[DisasmContext],
- cur,
- Opcode:op,
- parseState = 4,
- parseParam;
- DisasmInit(dctx);
- for (cur = csState[CodeScanner_first]; cur != -1; CodeScanReset(CodeScanDeref(cur), cur)) { }
- while (CodeScanStepInternal(dctx, csState, parseState, parseParam)) {
- // Check the address - if it is a jump target that changes the stack
- // size BEFORE the instruction, while the instruction itself changes
- // it after.
- // Found a valid instruction that we don't want to ignore. Finally
- // do the actual comparisons to various defined scanners.
- for (cur = csState[CodeScanner_first], op = DisasmGetOpcode(dctx); cur != -1; ) {
- if (CodeScanCheck(op, dctx, CodeScanDeref(cur), csState, cur)) {
- switch (CodeScanCall(CodeScanDeref(cur), csState)) {
- case -1: {
- // Want to skip this match. However, it was a full
- // match so does need resetting.
- CodeScanReset(CodeScanDeref(cur), cur);
- continue;
- }
- case 0: {
- // Do nothing except ignore.
- }
- default: {
- // If code was written, reparse this function.
- dctx[DisasmContext_nip] = csState[CodeScanMatch_func];
- }
- }
- // Reset to the start of the function, to reparse.
- for (cur = csState[CodeScanner_first]; cur != -1; CodeScanReset(CodeScanDeref(cur), cur)) { }
- break;
- }
- }
- }
- return true;
- }
- stock CodeScanInit(scanner[CodeScanner]) {
- // I debated inlining DisasmInit to avoid two calls to "GetAmxHeader", but
- // it isn't worth the effort and code duplication. No "start" and "end"
- // parameters, so scans the entire code range.
- GetAmxHeader(gHdr),
- gBase = GetAmxBaseAddress() + gHdr[AMX_HDR_DAT],
- gDat = gHdr[AMX_HDR_COD] - gHdr[AMX_HDR_DAT],
- CodeScanResetJumpTargets(scanner),
- scanner[CodeScanMatch_type] =
- scanner[CodeScanMatch_name] =
- scanner[CodeScanner_param] =
- scanner[CodeScanner_state] =
- scanner[CodeScanMatch_heap] =
- scanner[CodeScanMatch_stack] = 0,
- scanner[CodeScanMatch_params] = cellmin,
- scanner[CodeScanner_first] = -1;
- }
- stock CodeScanGetFunctionScanner(csm[CodeScanner], ret[CodeScanner], ctx[DisasmContext]) {
- // Doesn't do any decompilation, just gets the information for decompiling
- // the whole of the current function.
- CodeScanInit(ret),
- ctx[DisasmContext_end_ip] = 0,
- ctx[DisasmContext_start_ip] = ctx[DisasmContext_nip] = ctx[DisasmContext_cip] = csm[CodeScanMatch_func];
- switch (csm[CodeScanMatch_type]) {
- case 0, SCANNER_FUNC_AUTOMATA, SCANNER_FUNC_HALT, SCANNER_FUNC_AUTOMATA_NO_NAME, SCANNER_FUNC_HALT_NO_NAME: {
- ret[CodeScanner_state] = 4;
- }
- default: {
- ret[CodeScanner_state] = 0;
- }
- }
- }
- stock CodeScanGetMatchScanner(csm[CodeScanner], ret[CodeScanner], ctx[DisasmContext], bool:accurate = false) {
- // Doesn't do any decompilation, just gets the information for decompiling
- // the currently found match.
- CodeScanGetFunctionScanner(csm, ret, ctx);
- if (accurate) {
- // To be accurate in terms of jump targets, we re-run the scanner over
- // the function back up to this point.
- while (ctx[DisasmContext_nip] < csm[CodeScanMatch_cip]) {
- CodeScanStepInternal(ctx, ret, ret[CodeScanner_state], ret[CodeScanner_param]);
- }
- } else {
- // For speed, we just change the current instruction pointers.
- ctx[DisasmContext_start_ip] = ctx[DisasmContext_nip] = ctx[DisasmContext_cip] = csm[CodeScanMatch_cip];
- }
- }
- stock CodeScanGetFunctionDisasm(csm[CodeScanner], ctx[DisasmContext], offset = 0) {
- // Doesn't do any decompilation, just gets the information for decompiling
- // the whole of the current function.
- ctx[DisasmContext_end_ip] = 0,
- ctx[DisasmContext_start_ip] = ctx[DisasmContext_nip] = ctx[DisasmContext_cip] = csm[CodeScanMatch_func] + offset;
- }
- stock CodeScanGetMatchDisasm(csm[CodeScanner], ctx[DisasmContext], offset = 0) {
- // Doesn't do any decompilation, just gets the information for decompiling
- // the currently found match.
- ctx[DisasmContext_end_ip] = 0,
- ctx[DisasmContext_start_ip] = ctx[DisasmContext_nip] = ctx[DisasmContext_cip] = csm[CodeScanMatch_cip] + offset;
- }
- stock CodeScanGetFunctionAsm(csm[CodeScanner], ctx[AsmContext], offset = 0) {
- // Doesn't do any decompilation, just gets the information for writing to
- // the whole of the current function.
- AsmInitPtr(ctx, csm[CodeScanMatch_func] + offset, cellmax);
- }
- stock CodeScanGetMatchAsm(csm[CodeScanner], ctx[AsmContext], offset = 0) {
- // Doesn't do any decompilation, just gets the information for writing to
- // the currently found match.
- AsmInitPtr(ctx, csm[CodeScanMatch_cip] + offset, cellmax);
- }
- stock CodeScanGetMatchFunc(csm[CodeScanner]) {
- // The stored value is relative to "DAT", return relative to "COD".
- return csm[CodeScanMatch_func] - gDat;
- }
- stock CodeScanGetMatchAddress(csm[CodeScanner]) {
- // The stored value is relative to "DAT", return relative to "COD".
- return csm[CodeScanMatch_cip] - gDat;
- }
- stock CodeScanGetMatchFuncData(csm[CodeScanner]) {
- // Return relative to "DAT".
- return csm[CodeScanMatch_func];
- }
- stock CodeScanGetMatchAddressData(csm[CodeScanner]) {
- // Return relative to "DAT".
- return csm[CodeScanMatch_cip];
- }
- stock CodeScanGetMatchLength(csm[CodeScanner]) {
- return csm[CodeScanMatch_size];
- }
- stock CodeScanGetMatchType(csm[CodeScanner]) {
- // Lazilly get the names and types of functions when requested.
- if (csm[CodeScanMatch_type] >= SCANNER_FUNC_UNKNOWN) {
- csm[CodeScanMatch_name][0] = '\0';
- if (CodeScanGetFuncName(csm[CodeScanMatch_func], csm[CodeScanMatch_name])) {
- csm[CodeScanMatch_type] -= 4;
- } else {
- csm[CodeScanMatch_type] /= 2;
- // We could check for functions that are state implementations.
- // Currently public functions with states will only get their names
- // for the state stub, not for the various implementations.
- }
- }
- // There are four types:
- //
- // PUBLIC - Public functions.
- // HALT - The "halt" instructions at the very start.
- // AUTOMATA - A state determining stub.
- // OTHER - A normal function.
- //
- // These names are always prefixed by "SCANNER_FUNC_", and only "PUBLIC" is
- // guaranteed to have a name - the types are partially determined in other
- // ways ("OTHER" will never have a name).
- //
- // There is also "0", which just means that nothing has been scanned yet.
- return csm[CodeScanMatch_type];
- }
- stock CodeScanGetMatchHeap(csm[CodeScanner]) {
- return csm[CodeScanMatch_heap];
- }
- stock CodeScanGetMatchStack(csm[CodeScanner]) {
- return csm[CodeScanMatch_stack];
- }
- stock CodeScanGetMatchHole(csm[CodeScanner], idx) {
- return csm[CodeScanMatch_holes][idx];
- }
- stock CodeScanGetMatchName(csm[CodeScanner], name[]) {
- if (csm[CodeScanMatch_type] >= SCANNER_FUNC_UNKNOWN) {
- // We get the type, because the type is based on the name.
- CodeScanGetMatchType(csm);
- }
- name[0] = '\0',
- strcat(name, csm[CodeScanMatch_name], 32);
- }
|