codescan.inc 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968
  1. // Copyright (C) 2016 Y_Less
  2. //
  3. // Permission is hereby granted, free of charge, to any person obtaining a
  4. // copy of this software and associated documentation files (the "Software"),
  5. // to deal in the Software without restriction, including without limitation
  6. // the rights to use, copy, modify, merge, publish, distribute, sublicense,
  7. // and/or sell copies of the Software, and to permit persons to whom the
  8. // Software is furnished to do so, subject to the following conditions:
  9. //
  10. // The above copyright notice and this permission notice shall be included in
  11. // all copies or substantial portions of the Software.
  12. //
  13. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  14. // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  16. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  17. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  18. // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  19. // DEALINGS IN THE SOFTWARE.
  20. #if defined CODESCAN_INC
  21. #endinput
  22. #endif
  23. #define CODESCAN_INC
  24. /*
  25. // Example:
  26. forward TailCall_FoundCallback(m[CodeScanner])
  27. main() {
  28. new scanner[CodeScanner];
  29. CodeScanInit(scanner);
  30. new csm0[CodeScanMatcher];
  31. CodeScanMatcherInit(csm0, &TailCall_FoundCallback);
  32. CodeScanMatcherPattern(csm0,
  33. OP(PUSH_C, ???)
  34. OP(CALL, &MyFunc)
  35. OP(RETN)
  36. );
  37. CodeScanAddMatcher(scanner, csm0);
  38. // Add other matcher patterns here.
  39. // Run all the scanners in parallel.
  40. CodeScanRun(scanner);
  41. }
  42. public TailCall_FoundCallback(m[CodeScanner]) {
  43. // Do something with the found address (of the START of the match), and the
  44. // stack size (of the END of the match) - different for reasons...
  45. }
  46. // Create a default call for this function, so that we can include it in the AMX
  47. // and take the address in "OP". Note that you do NOT need to do this for
  48. // scanner callbacks if you only use their address in "CodeScanMatcherInit".
  49. #define CALL@MyFunc MyFunc(0, "")
  50. stock MyFunc(a, b[], ...) {
  51. // Normal function.
  52. }
  53. */
  54. #include <core>
  55. #include "frame_info"
  56. #include "disasm"
  57. #include "asm"
  58. #include "addressof"
  59. #define SCANNER_FAIL_ON_INVALID (1)
  60. #define SCANNER_IGNORE_NOP (2)
  61. #define SCANNER_IGNORE_BREAK (4)
  62. #define SCANNER_NAME_FUNCTIONS (8)
  63. #define SCANNER_IGNORE_HALT (16)
  64. #define SCANNER_IGNORE_BOUNDS (32)
  65. #define SCANNER_HAS_USER_DATA (64)
  66. #define O@I_ (0) // Type integer.
  67. #define O@U_ (1) // Type unknown (???).
  68. #define O@F_ (2) // Type function (&func).
  69. #define O@O_ (4) // Type opcode.
  70. #define O@S_ (5) // Type skipped.
  71. #define OP_TYPE_INTEGER_ (O@I_) // Type integer.
  72. #define OP_TYPE_UNKNOWN_ (O@U_) // Type unknown (???).
  73. #define OP_TYPE_FUNCTION_ (O@F_) // Type function (&func).
  74. #define OP_TYPE_OPCODE_ (O@O_) // Type opcode.
  75. #define OP_TYPE_SKIP_ (O@S_) // Type skipped.
  76. // If we can determine a function's name, we can determine if it is a public or
  77. // not. If we can't name it, it is a normal one. However, if naming is skipped
  78. // then we will have no idea what type it is.
  79. #define SCANNER_FUNC_PUBLIC (1)
  80. #define SCANNER_FUNC_OTHER (2)
  81. #define SCANNER_FUNC_AUTOMATA (3)
  82. #define SCANNER_FUNC_HALT (4)
  83. #define SCANNER_FUNC_UNKNOWN (5)
  84. #define SCANNER_FUNC_AUTOMATA_NO_NAME (7)
  85. #define SCANNER_FUNC_HALT_NO_NAME (8)
  86. // The "OP()" macro is used to easilly define code patterns to scan for:
  87. //
  88. // new csm[CodeScanMatcher];
  89. // CodeScanMatcherInit(csm, &callback);
  90. // CodeScanMatcherPattern(csm,
  91. // OP(CONST_PRI, 42)
  92. // OP(ADD_C, ???)
  93. // OP(CALL, &my_func)
  94. // )
  95. //
  96. // Any function that you want to take the address of in this way must have its
  97. // call pattern defined as:
  98. //
  99. // #define CALL@my_func my_func(0, "hi", false)
  100. //
  101. // Because otherwise a) the code can't guarantee that the function will be in
  102. // the final amx, and b) we need a call to it from which to extract the addr.
  103. //
  104. // You can use this style explcitly within an "OP" scanner, or there is a new
  105. // dedicated keyword for it - "addressof(func)" (note the lack of "&" there).
  106. //
  107. #define OP(%0) ,(_:O@T_:O@O_),(Opcode:O@X_:O@Y_:O@W_:$OP_%0)
  108. #define OP_%0\32;%1) OP_%0%1)
  109. #define O@X_:%9$%0,%1,%2) %0),(_:O@1_:O@2_:O@3_:$%1|||,%2)
  110. #define O@Y_:%9$%0,%1) %0),(_:O@1_:O@2_:O@3_:$%1|||)
  111. #define O@Z_:%9$%0) %0)
  112. #define O@W_:%9$%0) %0)
  113. #define O@T_:O@O_),(Opcode:O@X_:O@Y_:O@W_:$OP_???%0) O@S_),(0)
  114. #define O@1_:%9$%0???%1|||%2) O@U_ ),(_:O@X_:O@Y_:O@Z_:$0%2)
  115. #define O@2_:%9$%0&%1|||%2) O@F_),(O@A_()?(((CALL@%1),O@V_)?1:2):_:O@X_:O@Y_:O@Z_:$(O@V_)%2)
  116. #define O@3_:%9$%1|||%2) O@I_ ),(_:O@X_:O@Y_:O@Z_:$(%1)%2)
  117. #if !defined cellbytes
  118. #define cellbytes (cellbits / 8)
  119. #endif
  120. #if !defined CODE_SCAN_MAX_PATTERN
  121. #define CODE_SCAN_MAX_PATTERN (16)
  122. #endif
  123. #define CODE_SCAN_MAX_PATTERN_ARRAY (CODE_SCAN_MAX_PATTERN * 4)
  124. #define CODE_SCAN_MAX_HOLES (CODE_SCAN_MAX_PATTERN / 2)
  125. #if !defined CODE_SCAN_MAX_PARALLEL
  126. #define CODE_SCAN_MAX_PARALLEL (2)
  127. #endif
  128. #if !defined CODE_SCAN_MAX_JUMP_TARGETS
  129. #define CODE_SCAN_MAX_JUMP_TARGETS (32)
  130. #endif
  131. // All the information for scanning through an AMX and extracting lots of nice
  132. // information about it.
  133. enum CodeScanner {
  134. CodeScanMatch_func, // Start of the containing function.
  135. CodeScanMatch_size, // Size of the match.
  136. CodeScanMatch_type, // Public, normal, automata, etc.
  137. CodeScanMatch_heap, // At the point of this scanner.
  138. CodeScanMatch_stack, // At the point of this scanner.
  139. CodeScanMatch_params, // Likely unknown statically.
  140. CodeScanMatch_cip, // The point of the pattern match.
  141. CodeScanMatch_holes[CODE_SCAN_MAX_HOLES], // Results of "???"s.
  142. CodeScanMatch_name[32 char],
  143. CodeScanner_first,
  144. CodeScanner_minn,
  145. CodeScanner_jump_switch[CODE_SCAN_MAX_JUMP_TARGETS], // For "CASETBL" not regular jumps.
  146. CodeScanner_jump_target[CODE_SCAN_MAX_JUMP_TARGETS], // Zero when this slot is available.
  147. CodeScanner_jump_stack [CODE_SCAN_MAX_JUMP_TARGETS], // Sizes at the time of the jump.
  148. CodeScanner_jump_heap [CODE_SCAN_MAX_JUMP_TARGETS], // Sizes at the time of the jump.
  149. CodeScanner_state,
  150. CodeScanner_param
  151. }
  152. enum CodeScanMatcher {
  153. CodeScanMatcher_func, // A pointer to the callback.
  154. CodeScanMatcher_user_data, // User data to pass to their callback.
  155. CodeScanMatcher_code[CODE_SCAN_MAX_PATTERN_ARRAY], // The code to look for.
  156. CodeScanMatcher_len,
  157. CodeScanMatcher_offset[CODE_SCAN_MAX_PARALLEL], // Where the current scanner is in this code.
  158. CodeScanMatcher_start[CODE_SCAN_MAX_PARALLEL],
  159. CodeScanMatcher_holeidx[CODE_SCAN_MAX_PARALLEL],
  160. CodeScanMatcher_holes[CODE_SCAN_MAX_PARALLEL * CODE_SCAN_MAX_HOLES],
  161. CodeScanMatcher_next, // The next match array.
  162. CodeScanMatcher_flags // Customisation.
  163. }
  164. // This macro is to let anyone use `&callback` for a scanner callback without
  165. // having to define the `CALL@...` macro for the required parameters (since we
  166. // know to call scanner callbacks in this code).
  167. #define addressof_ScannerCallback_(%1) (O@A_()?(((%1((gCodeScanCallback_match))),O@V_)?1:2):(O@V_))
  168. stock
  169. gCodeScanCallback_match[CodeScanner];
  170. static stock
  171. gHdr[AMX_HDR],
  172. gBase,
  173. gDat;
  174. static stock bool:CodeScanCheckJumpTarget(cip, deloc, &stk, &hea, jumpTargets[CodeScanner], num = CODE_SCAN_MAX_JUMP_TARGETS) {
  175. // Use "minn" to restrict the number of jump targets that we check. Returns
  176. // "true" if the current address is equal to an address that any jump goes
  177. // to.
  178. new
  179. minn = jumpTargets[CodeScanner_minn],
  180. sip,
  181. count;
  182. while (num-- > minn) {
  183. if (jumpTargets[CodeScanner_jump_target][num]) {
  184. if ((sip = jumpTargets[CodeScanner_jump_switch][num])) {
  185. count = ReadAmxMemory(sip) + 1,
  186. sip += cellbytes;
  187. while (count--) {
  188. if (ReadAmxMemory(sip) == deloc) {
  189. return
  190. --jumpTargets[CodeScanner_jump_target][num],
  191. stk = jumpTargets[CodeScanner_jump_stack][num],
  192. hea = jumpTargets[CodeScanner_jump_heap][num],
  193. true;
  194. }
  195. sip += 2 * cellbytes;
  196. }
  197. } else if (jumpTargets[CodeScanner_jump_target][num] == cip) {
  198. return
  199. jumpTargets[CodeScanner_jump_target][num] = 0,
  200. stk = jumpTargets[CodeScanner_jump_stack][num],
  201. hea = jumpTargets[CodeScanner_jump_heap][num],
  202. true;
  203. }
  204. }
  205. }
  206. return false;
  207. }
  208. static stock CodeScanResetJumpTargets(jumpTargets[CodeScanner], num = CODE_SCAN_MAX_JUMP_TARGETS) {
  209. jumpTargets[CodeScanner_minn] = num;
  210. while (num--) {
  211. jumpTargets[CodeScanner_jump_target][num] = 0;
  212. }
  213. }
  214. static stock CodeScanAddJumpTarget(cip, stk, hea, jumpTargets[CodeScanner], num = CODE_SCAN_MAX_JUMP_TARGETS) {
  215. while (num--) {
  216. // Multiple jumps to the same place?
  217. if (jumpTargets[CodeScanner_jump_target][num] == cip) {
  218. return;
  219. } else if (!jumpTargets[CodeScanner_jump_target][num]) {
  220. jumpTargets[CodeScanner_jump_switch][num] = 0;
  221. jumpTargets[CodeScanner_jump_target][num] = cip;
  222. jumpTargets[CodeScanner_jump_stack][num] = stk;
  223. jumpTargets[CodeScanner_jump_heap][num] = hea;
  224. jumpTargets[CodeScanner_minn] = min(jumpTargets[CodeScanner_minn], num);
  225. return;
  226. }
  227. }
  228. }
  229. static stock CodeScanAddSwitchTarget(dctx[DisasmContext], stk, hea, jumpTargets[CodeScanner], num = CODE_SCAN_MAX_JUMP_TARGETS) {
  230. new
  231. sip = DisasmGetOperand(dctx) - gBase,
  232. codepos = sip + gHdr[AMX_HDR_DAT] - gHdr[AMX_HDR_COD];
  233. if (codepos < 0 || codepos > gHdr[AMX_HDR_DAT] || UnrelocateOpcode(Opcode:ReadAmxMemory(sip)) != OP_CASETBL) {
  234. // Can happen when we parse "RelocateOpcodeNow" because it has an
  235. // explicit "#emit switch 0" in.
  236. return;
  237. }
  238. while (num--) {
  239. // Multiple jumps to the same place?
  240. if (!jumpTargets[CodeScanner_jump_target][num]) {
  241. jumpTargets[CodeScanner_jump_switch][num] = sip + cellbytes,
  242. jumpTargets[CodeScanner_jump_target][num] = ReadAmxMemory(sip + cellbytes) + 1,
  243. jumpTargets[CodeScanner_jump_stack][num] = stk,
  244. jumpTargets[CodeScanner_jump_heap][num] = hea,
  245. jumpTargets[CodeScanner_minn] = min(jumpTargets[CodeScanner_minn], num);
  246. return;
  247. }
  248. }
  249. }
  250. static stock CodeScanReset(cs[CodeScanMatcher], &next) {
  251. static
  252. lReset[CODE_SCAN_MAX_PARALLEL];
  253. next = cs[CodeScanMatcher_next],
  254. cs[CodeScanMatcher_offset] = lReset,
  255. cs[CodeScanMatcher_holeidx] = lReset;
  256. if (!cs[CodeScanMatcher_func]) {
  257. cs[CodeScanMatcher_len] = 0;
  258. }
  259. }
  260. stock CodeScanAddMatcher(scanner[CodeScanner], searcher[CodeScanMatcher]) {
  261. searcher[CodeScanMatcher_next] = scanner[CodeScanner_first],
  262. scanner[CodeScanner_first] = ref(searcher);
  263. }
  264. stock CodeScanMatcherInit_(searcher[CodeScanMatcher], address, flags = SCANNER_IGNORE_NOP | SCANNER_IGNORE_BOUNDS | SCANNER_IGNORE_BREAK | SCANNER_IGNORE_HALT) {
  265. // This used to look the function up by name from the public functions
  266. // table, but that was very silly since we already had code in this exact
  267. // file to get the address of ANY function at run-time (`addressof`). The
  268. // only difference between the normal `addressof` and the one used here is
  269. // that because we know exactly what sort of functions we are expecting, we
  270. // know exactly what parameters they require to construct the fake call, so
  271. // we can do away with the `CALL@...` macro requirement for defining the
  272. // standard call pattern. This also means that we actually ensure that the
  273. // passed function has the correct shape.
  274. searcher[CodeScanMatcher_func] = address,
  275. searcher[CodeScanMatcher_flags] = flags & ~SCANNER_HAS_USER_DATA,
  276. searcher[CodeScanMatcher_next] = -1,
  277. searcher[CodeScanMatcher_len] = 0,
  278. searcher[CodeScanMatcher_user_data] = 0,
  279. CodeScanReset(searcher, flags);
  280. }
  281. stock CodeScanMatcherData(searcher[CodeScanMatcher], val) {
  282. // Use `ref()` to pass an array.
  283. searcher[CodeScanMatcher_flags] |= SCANNER_HAS_USER_DATA,
  284. searcher[CodeScanMatcher_user_data] = val;
  285. }
  286. // Will not call the function because the check will fail, but will not compile
  287. // if the function doesn't exist, while still passing it in as a string.
  288. #define CodeScanMatcherInit(%0,&%1) CodeScanMatcherInit_((%0),addressof_ScannerCallback_(%1))
  289. #define CodeScanMatcherInit_(%0,addressof_ScannerCallback_(%1,%2)) CodeScanMatcherInit_(%0,addressof_ScannerCallback_(%1),%2)
  290. stock CodeScanMatcherPattern_(searcher[CodeScanMatcher], {Opcode, Float, _}:...) {
  291. new
  292. len = numargs() - 1;
  293. if (len > CODE_SCAN_MAX_PATTERN_ARRAY) {
  294. return -1;
  295. }
  296. if (len & 0x01) {
  297. // Not a multiple of 2 in the scanner.
  298. return -2;
  299. }
  300. for (new i = 0; i != len; ) {
  301. new
  302. optype = getarg(i + 1),
  303. Opcode:op = Opcode:getarg(i + 2);
  304. searcher[CodeScanMatcher_code][i + 0] = optype;
  305. searcher[CodeScanMatcher_code][i + 1] = _:op;
  306. i += 2;
  307. switch (optype) {
  308. case OP_TYPE_OPCODE_: {
  309. new opcount = GetOpcodeInstructionParameters(op);
  310. for (new partype; i != len; )
  311. {
  312. partype = getarg(i + 1);
  313. switch (partype) {
  314. case OP_TYPE_INTEGER_, OP_TYPE_UNKNOWN_, OP_TYPE_FUNCTION_: {
  315. // Got an unexpected parameter.
  316. if (opcount == 0) {
  317. return i / 2 + 1;
  318. }
  319. // Decrement the remaining number of parameters.
  320. // The variable OPs like `CASETBL` have negative
  321. // parameter counts, so will never not match.
  322. --opcount;
  323. searcher[CodeScanMatcher_code][i + 0] = partype;
  324. searcher[CodeScanMatcher_code][i + 1] = getarg(i + 2);
  325. i += 2;
  326. }
  327. default: {
  328. break;
  329. }
  330. }
  331. }
  332. // Missing a required (non-optional) parameter.
  333. if (opcount > 0) {
  334. return i / 2 + 1;
  335. }
  336. }
  337. case OP_TYPE_SKIP_: {
  338. }
  339. default: {
  340. // Incorrect parameter type. Return the op where it happened.
  341. return i / 2 + 1;
  342. }
  343. }
  344. }
  345. searcher[CodeScanMatcher_len] = len;
  346. // No error.
  347. return 0;
  348. }
  349. // Note the lack of trailing comma. This is to make the code patterns work.
  350. #define CodeScanMatcherPattern(%0, CodeScanMatcherPattern_(%0
  351. static stock CodeScanDeref(v) {
  352. static
  353. lFakeMatcher[CodeScanMatcher];
  354. #pragma unused v
  355. #emit load.s.pri 12 // First argument.
  356. #emit stor.s.pri 16 // Secret argument.
  357. #emit retn
  358. return lFakeMatcher; // Make compiler happy, and teach it the array return.
  359. }
  360. static stock bool:CodeScanCheck(Opcode:op, dctx[DisasmContext], cs[CodeScanMatcher], fctx[CodeScanner], &next) {
  361. // Returns an address of a callback if it passes.
  362. if (!cs[CodeScanMatcher_len]) {
  363. return
  364. next = cs[CodeScanMatcher_next],
  365. false;
  366. }
  367. new
  368. bool:zero = true,
  369. off = cs[CodeScanMatcher_flags];
  370. if (off) {
  371. // To deal with differences in different compilation modes, we just mark
  372. // these opcodes as fully ignorable (because they are mostly used for
  373. // debugging and not real user code).
  374. switch (op) {
  375. case OP_NOP: {
  376. if (off & SCANNER_IGNORE_NOP) {
  377. return
  378. next = cs[CodeScanMatcher_next],
  379. false;
  380. }
  381. }
  382. case OP_BOUNDS: {
  383. if (off & SCANNER_IGNORE_BOUNDS) {
  384. return
  385. next = cs[CodeScanMatcher_next],
  386. false;
  387. }
  388. }
  389. case OP_BREAK: {
  390. if (off & SCANNER_IGNORE_BREAK) {
  391. return
  392. next = cs[CodeScanMatcher_next],
  393. false;
  394. }
  395. }
  396. case OP_HALT: {
  397. if (off & SCANNER_IGNORE_HALT) {
  398. return
  399. next = cs[CodeScanMatcher_next],
  400. false;
  401. }
  402. }
  403. }
  404. }
  405. new
  406. cnt = DisasmGetNumOperands(dctx),
  407. len = cs[CodeScanMatcher_len];
  408. for (new idx = 0; idx != CODE_SCAN_MAX_PARALLEL; ++idx) {
  409. off = cs[CodeScanMatcher_offset][idx];
  410. // Ensure that only one of the parallel scanners starts from the
  411. // beginning on each instruction.
  412. if (off) {
  413. } else if (zero) {
  414. // Get the start point of this match.
  415. cs[CodeScanMatcher_start][idx] = DisasmGetCurIp(dctx),
  416. zero = false;
  417. } else {
  418. continue;
  419. }
  420. if (cs[CodeScanMatcher_code][off] == OP_TYPE_SKIP_) {
  421. off += 2;
  422. if (off == len) {
  423. return
  424. memcpy(fctx[CodeScanMatch_holes], cs[CodeScanMatcher_holes], idx * CODE_SCAN_MAX_HOLES, cs[CodeScanMatcher_holeidx][idx] * cellbytes, CODE_SCAN_MAX_HOLES),
  425. fctx[CodeScanMatch_cip] = cs[CodeScanMatcher_start][idx],
  426. fctx[CodeScanMatch_size] = DisasmGetNextIp(dctx) - cs[CodeScanMatcher_start][idx],
  427. true;
  428. } else if (cs[CodeScanMatcher_code][off] == OP_TYPE_OPCODE_ && Opcode:cs[CodeScanMatcher_code][off + 1] == op) {
  429. // Found the match after the current "missing" instruction.
  430. goto CodeScanCheck_pass;
  431. } else {
  432. // The "== op" check is done twice because in this case we don't
  433. // want to fail the scanner if it doesn't match.
  434. continue;
  435. }
  436. }
  437. if (cs[CodeScanMatcher_code][off] == OP_TYPE_OPCODE_ && Opcode:cs[CodeScanMatcher_code][off + 1] == op) {
  438. CodeScanCheck_pass:
  439. // Check if there are enough parameters for this opcode.
  440. off += 2;
  441. for (new i = 0; i != cnt; ++i) {
  442. switch (cs[CodeScanMatcher_code][off++]) {
  443. // Because we now abstract relocations to the disasm system,
  444. // we don't need to differentiate between fixed parameters
  445. // and function parameters any more - they are always fully
  446. // resolved.
  447. case OP_TYPE_INTEGER_, OP_TYPE_FUNCTION_: {
  448. if (cs[CodeScanMatcher_code][off++] != DisasmGetOperandReloc(dctx, i)) {
  449. goto CodeScanCheck_fail;
  450. }
  451. }
  452. case OP_TYPE_UNKNOWN_: {
  453. // Save the parameter.
  454. ++off,
  455. cs[CodeScanMatcher_holes][idx * CODE_SCAN_MAX_HOLES + cs[CodeScanMatcher_holeidx][idx]++] = DisasmGetOperandReloc(dctx, i);
  456. }
  457. case OP_TYPE_OPCODE_, OP_TYPE_SKIP_: {
  458. goto CodeScanCheck_fail;
  459. }
  460. }
  461. }
  462. if (off == len) {
  463. // Get the address of the START of the match.
  464. return
  465. memcpy(fctx[CodeScanMatch_holes], cs[CodeScanMatcher_holes][idx * CODE_SCAN_MAX_HOLES], 0, cs[CodeScanMatcher_holeidx][idx] * cellbytes, CODE_SCAN_MAX_HOLES),
  466. fctx[CodeScanMatch_cip] = cs[CodeScanMatcher_start][idx],
  467. fctx[CodeScanMatch_size] = DisasmGetNextIp(dctx) - cs[CodeScanMatcher_start][idx],
  468. true;
  469. } else switch (cs[CodeScanMatcher_code][off]) {
  470. case OP_TYPE_INTEGER_, OP_TYPE_FUNCTION_, OP_TYPE_UNKNOWN_: {
  471. // Parameters remaining, none expected.
  472. goto CodeScanCheck_fail;
  473. }
  474. default: {
  475. // Out of parameters to check but still looking correct.
  476. cs[CodeScanMatcher_offset][idx] = off;
  477. continue;
  478. }
  479. }
  480. }
  481. CodeScanCheck_fail:
  482. // The parameter is wrong.
  483. cs[CodeScanMatcher_holeidx][idx] = cs[CodeScanMatcher_offset][idx] = 0;
  484. }
  485. return
  486. next = cs[CodeScanMatcher_next],
  487. false;
  488. }
  489. static stock bool:CodeScanGetFuncName(addr, name[]) {
  490. // The "name" parameter is longer than 32 (which is the maximum function
  491. // name length normally) beacause we append states to some.
  492. // Name not found.
  493. new
  494. index = GetPublicIndexFromAddress(addr);
  495. if (index < 0) {
  496. return
  497. name[0] = 0,
  498. false;
  499. }
  500. // This code will not return great results for public functions with states.
  501. return
  502. GetPublicNameFromIndex(index, name, 32),
  503. true;
  504. }
  505. static stock bool:CodeScanStepInternal(dctx[DisasmContext], csState[CodeScanner], &parseState, &parseParam) {
  506. // Loop over the data. Since our end condition is "out of data", we know
  507. // that any "false" returns are because of invalid data since the "< 0"
  508. // check is also the only other way that "false" can be returned and we pre-
  509. // empt that one.
  510. switch (DisasmNext(dctx)) {
  511. case DISASM_OK: {
  512. new
  513. stk = csState[CodeScanMatch_stack],
  514. hea = csState[CodeScanMatch_heap],
  515. cip = DisasmGetCurIp(dctx),
  516. Opcode:op = DisasmGetOpcode(dctx);
  517. // The compiler sometimes inserts extra instructions like "NOP" and
  518. // "BREAK" for debugging and padding (as do we) - maybe ignore them.
  519. CodeScanCheckJumpTarget(cip, cip + gBase, stk, hea, csState);
  520. switch (op) {
  521. case OP_HALT: {
  522. if (parseState == 4) {
  523. csState[CodeScanMatch_type] = SCANNER_FUNC_HALT_NO_NAME,
  524. csState[CodeScanMatch_func] = cip,
  525. stk = hea = 0,
  526. CodeScanResetJumpTargets(csState);
  527. }
  528. }
  529. case OP_PROC: {
  530. // This is the start of a new function. The only functions
  531. // that don't start like this are the automata stubs.
  532. csState[CodeScanMatch_type] = SCANNER_FUNC_UNKNOWN,
  533. csState[CodeScanMatch_func] = cip,
  534. CodeScanResetJumpTargets(csState),
  535. stk = hea = parseState = 0;
  536. }
  537. case OP_LOAD_PRI: {
  538. // If we are not in the main functions yet and this is the
  539. // first instruction seen, then it is the start of an
  540. // automata function stub.
  541. if (parseState == 4) {
  542. csState[CodeScanMatch_type] = SCANNER_FUNC_AUTOMATA_NO_NAME,
  543. csState[CodeScanMatch_func] = cip,
  544. stk = hea = 0,
  545. CodeScanResetJumpTargets(csState);
  546. }
  547. }
  548. case OP_PUSH_PRI, OP_PUSH_ALT, OP_PUSH_R, OP_PUSH_S, OP_PUSH, OP_PUSH_ADR: {
  549. if (stk != cellmin) {
  550. stk += cellbytes;
  551. }
  552. parseState = 0;
  553. }
  554. case OP_STACK: {
  555. // The stack grows down, but our count is positive.
  556. if (stk != cellmin) {
  557. stk -= DisasmGetOperand(dctx);
  558. }
  559. parseState = 0;
  560. }
  561. case OP_HEAP: {
  562. if (hea != cellmin) {
  563. hea += DisasmGetOperand(dctx);
  564. }
  565. parseState = 0;
  566. }
  567. case OP_POP_PRI, OP_POP_ALT: {
  568. if (stk != cellmin) {
  569. stk -= cellbytes;
  570. }
  571. parseState = 0;
  572. }
  573. case OP_CALL, OP_CALL_PRI: {
  574. // Remove all the function parameters.
  575. if (parseState == 3) {
  576. stk -= parseParam;
  577. }
  578. parseState = 0;
  579. }
  580. case OP_PUSH_C: {
  581. // The "+ cellbytes" is because when calling a function, the
  582. // parameter is the number of bytes pushed, not including
  583. // this one, with that one implicitly popped on return.
  584. parseParam = DisasmGetOperand(dctx) + cellbytes;
  585. if (stk != cellmin) {
  586. stk += cellbytes,
  587. parseState = 3;
  588. }
  589. }
  590. // There is a code-get pattern of:
  591. //
  592. // LCTRL 5
  593. // ADD.C n
  594. // SCTRL 4
  595. //
  596. // Which adjusts the stack to the correct size after "goto". We
  597. // have to deal with that explcitly. Note that the "ADD.C" may
  598. // be missing if there are no variables currently in scope.
  599. case OP_LCTRL: {
  600. if (DisasmGetOperand(dctx) == 5) {
  601. parseParam = 0;
  602. parseState = 1;
  603. } else {
  604. parseState = 0;
  605. }
  606. }
  607. case OP_ADD_C: {
  608. if (parseState == 1) {
  609. parseParam = -DisasmGetOperand(dctx),
  610. parseState = 2;
  611. } else {
  612. parseState = 0;
  613. }
  614. }
  615. case OP_SCTRL: {
  616. // This is the tricky one, since it can mess up the stack in
  617. // strange ways. Deal with the case where it comes from
  618. // "goto", even though that is generally considered bad.
  619. switch (DisasmGetOperand(dctx)) {
  620. case 2: {
  621. hea = cellmin;
  622. }
  623. case 4: {
  624. switch (parseState) {
  625. case 1: {
  626. stk = 0;
  627. }
  628. case 2: {
  629. stk = parseParam;
  630. }
  631. default: {
  632. stk = cellmin;
  633. }
  634. }
  635. }
  636. case 5: {
  637. stk = cellmin;
  638. }
  639. }
  640. parseState = 0;
  641. }
  642. case OP_JUMP, OP_JZER, OP_JNZ, OP_JEQ, OP_JNEQ, OP_JLESS, OP_JLEQ, OP_JGRTR, OP_JGEQ, OP_JSLESS, OP_JSLEQ, OP_JSGRTR, OP_JSGEQ: {
  643. // Add a jump target. These require relocation as they are
  644. // translated to absolute RAM locations. "DisasmNeedReloc"
  645. // will return "true", but we don't need to call it.
  646. // Relocate it relative to "dat" not "cod" for simpler
  647. // comparisons - just see if the read address matches
  648. // instead of the true code address.
  649. //
  650. // val = val - (base + cod) + (cod - dat);
  651. // val = val - base - cod + cod - dat;
  652. // val = val - base - dat;
  653. // val = val - (base + dat);
  654. // base = base + dat;
  655. // val = val - base;
  656. //
  657. // Only jumps that go forwards.
  658. parseParam = DisasmGetOperand(dctx) - gBase,
  659. parseState = 0;
  660. if (parseParam > cip) {
  661. CodeScanAddJumpTarget(parseParam, stk, hea, csState);
  662. }
  663. }
  664. case OP_JREL: {
  665. // Add a jump target. Only jumps that go forwards.
  666. parseParam = DisasmGetOperand(dctx) + cip,
  667. parseState = 0;
  668. if (parseParam > cip) {
  669. CodeScanAddJumpTarget(parseParam, stk, hea, csState);
  670. }
  671. }
  672. case OP_SWITCH: {
  673. // Add a jump target. These are always forwards.
  674. CodeScanAddSwitchTarget(dctx, stk, hea, csState),
  675. parseState = 0;
  676. }
  677. default: {
  678. parseState = 0;
  679. }
  680. }
  681. csState[CodeScanMatch_stack] = stk,
  682. csState[CodeScanMatch_heap] = hea;
  683. }
  684. case DISASM_DONE: {
  685. return false;
  686. }
  687. case DISASM_NOP: {
  688. parseState = 0;
  689. }
  690. }
  691. return true;
  692. }
  693. stock bool:CodeScanStep(dctx[DisasmContext], csState[CodeScanner]) {
  694. return CodeScanStepInternal(dctx, csState, csState[CodeScanner_state], csState[CodeScanner_param]);
  695. }
  696. static stock CodeScanCall(cs[CodeScanMatcher], csState[CodeScanner]) {
  697. // If I wrote way more assembly I could get away with not calling
  698. // `CodeScanDeref(cur)` below, and not need to assign `param` to a variable
  699. // before pushing it. But I'm not going to - it isn't worth the effort.
  700. new
  701. func = cs[CodeScanMatcher_func];
  702. if (cs[CodeScanMatcher_flags] & SCANNER_HAS_USER_DATA) {
  703. new
  704. param = cs[CodeScanMatcher_user_data];
  705. #emit PUSH.S param
  706. #emit PUSH.S csState
  707. #emit PUSH.C 8
  708. #emit LCTRL 6
  709. #emit ADD.C 36
  710. #emit LCTRL 8
  711. #emit PUSH.pri
  712. #emit LOAD.S.pri func
  713. #emit SCTRL 6
  714. #emit STOR.S.pri func
  715. } else {
  716. #emit PUSH.S csState
  717. #emit PUSH.C 4
  718. #emit LCTRL 6
  719. #emit ADD.C 36
  720. #emit LCTRL 8
  721. #emit PUSH.pri
  722. #emit LOAD.S.pri func
  723. #emit SCTRL 6
  724. #emit STOR.S.pri func
  725. }
  726. return func;
  727. }
  728. stock bool:CodeScanRun(csState[CodeScanner]) {
  729. if (csState[CodeScanner_first] == -1) {
  730. return true;
  731. }
  732. new
  733. dctx[DisasmContext],
  734. cur,
  735. Opcode:op,
  736. parseState = 4,
  737. parseParam;
  738. DisasmInit(dctx);
  739. for (cur = csState[CodeScanner_first]; cur != -1; CodeScanReset(CodeScanDeref(cur), cur)) { }
  740. while (CodeScanStepInternal(dctx, csState, parseState, parseParam)) {
  741. // Check the address - if it is a jump target that changes the stack
  742. // size BEFORE the instruction, while the instruction itself changes
  743. // it after.
  744. // Found a valid instruction that we don't want to ignore. Finally
  745. // do the actual comparisons to various defined scanners.
  746. for (cur = csState[CodeScanner_first], op = DisasmGetOpcode(dctx); cur != -1; ) {
  747. if (CodeScanCheck(op, dctx, CodeScanDeref(cur), csState, cur)) {
  748. switch (CodeScanCall(CodeScanDeref(cur), csState)) {
  749. case -1: {
  750. // Want to skip this match. However, it was a full
  751. // match so does need resetting.
  752. CodeScanReset(CodeScanDeref(cur), cur);
  753. continue;
  754. }
  755. case 0: {
  756. // Do nothing except ignore.
  757. }
  758. default: {
  759. // If code was written, reparse this function.
  760. dctx[DisasmContext_nip] = csState[CodeScanMatch_func];
  761. }
  762. }
  763. // Reset to the start of the function, to reparse.
  764. for (cur = csState[CodeScanner_first]; cur != -1; CodeScanReset(CodeScanDeref(cur), cur)) { }
  765. break;
  766. }
  767. }
  768. }
  769. return true;
  770. }
  771. stock CodeScanInit(scanner[CodeScanner]) {
  772. // I debated inlining DisasmInit to avoid two calls to "GetAmxHeader", but
  773. // it isn't worth the effort and code duplication. No "start" and "end"
  774. // parameters, so scans the entire code range.
  775. GetAmxHeader(gHdr),
  776. gBase = GetAmxBaseAddress() + gHdr[AMX_HDR_DAT],
  777. gDat = gHdr[AMX_HDR_COD] - gHdr[AMX_HDR_DAT],
  778. CodeScanResetJumpTargets(scanner),
  779. scanner[CodeScanMatch_type] =
  780. scanner[CodeScanMatch_name] =
  781. scanner[CodeScanner_param] =
  782. scanner[CodeScanner_state] =
  783. scanner[CodeScanMatch_heap] =
  784. scanner[CodeScanMatch_stack] = 0,
  785. scanner[CodeScanMatch_params] = cellmin,
  786. scanner[CodeScanner_first] = -1;
  787. }
  788. stock CodeScanGetFunctionScanner(csm[CodeScanner], ret[CodeScanner], ctx[DisasmContext]) {
  789. // Doesn't do any decompilation, just gets the information for decompiling
  790. // the whole of the current function.
  791. CodeScanInit(ret),
  792. ctx[DisasmContext_end_ip] = 0,
  793. ctx[DisasmContext_start_ip] = ctx[DisasmContext_nip] = ctx[DisasmContext_cip] = csm[CodeScanMatch_func];
  794. switch (csm[CodeScanMatch_type]) {
  795. case 0, SCANNER_FUNC_AUTOMATA, SCANNER_FUNC_HALT, SCANNER_FUNC_AUTOMATA_NO_NAME, SCANNER_FUNC_HALT_NO_NAME: {
  796. ret[CodeScanner_state] = 4;
  797. }
  798. default: {
  799. ret[CodeScanner_state] = 0;
  800. }
  801. }
  802. }
  803. stock CodeScanGetMatchScanner(csm[CodeScanner], ret[CodeScanner], ctx[DisasmContext], bool:accurate = false) {
  804. // Doesn't do any decompilation, just gets the information for decompiling
  805. // the currently found match.
  806. CodeScanGetFunctionScanner(csm, ret, ctx);
  807. if (accurate) {
  808. // To be accurate in terms of jump targets, we re-run the scanner over
  809. // the function back up to this point.
  810. while (ctx[DisasmContext_nip] < csm[CodeScanMatch_cip]) {
  811. CodeScanStepInternal(ctx, ret, ret[CodeScanner_state], ret[CodeScanner_param]);
  812. }
  813. } else {
  814. // For speed, we just change the current instruction pointers.
  815. ctx[DisasmContext_start_ip] = ctx[DisasmContext_nip] = ctx[DisasmContext_cip] = csm[CodeScanMatch_cip];
  816. }
  817. }
  818. stock CodeScanGetFunctionDisasm(csm[CodeScanner], ctx[DisasmContext], offset = 0) {
  819. // Doesn't do any decompilation, just gets the information for decompiling
  820. // the whole of the current function.
  821. ctx[DisasmContext_end_ip] = 0,
  822. ctx[DisasmContext_start_ip] = ctx[DisasmContext_nip] = ctx[DisasmContext_cip] = csm[CodeScanMatch_func] + offset;
  823. }
  824. stock CodeScanGetMatchDisasm(csm[CodeScanner], ctx[DisasmContext], offset = 0) {
  825. // Doesn't do any decompilation, just gets the information for decompiling
  826. // the currently found match.
  827. ctx[DisasmContext_end_ip] = 0,
  828. ctx[DisasmContext_start_ip] = ctx[DisasmContext_nip] = ctx[DisasmContext_cip] = csm[CodeScanMatch_cip] + offset;
  829. }
  830. stock CodeScanGetFunctionAsm(csm[CodeScanner], ctx[AsmContext], offset = 0) {
  831. // Doesn't do any decompilation, just gets the information for writing to
  832. // the whole of the current function.
  833. AsmInitPtr(ctx, csm[CodeScanMatch_func] + offset, cellmax);
  834. }
  835. stock CodeScanGetMatchAsm(csm[CodeScanner], ctx[AsmContext], offset = 0) {
  836. // Doesn't do any decompilation, just gets the information for writing to
  837. // the currently found match.
  838. AsmInitPtr(ctx, csm[CodeScanMatch_cip] + offset, cellmax);
  839. }
  840. stock CodeScanGetMatchFunc(csm[CodeScanner]) {
  841. // The stored value is relative to "DAT", return relative to "COD".
  842. return csm[CodeScanMatch_func] - gDat;
  843. }
  844. stock CodeScanGetMatchAddress(csm[CodeScanner]) {
  845. // The stored value is relative to "DAT", return relative to "COD".
  846. return csm[CodeScanMatch_cip] - gDat;
  847. }
  848. stock CodeScanGetMatchFuncData(csm[CodeScanner]) {
  849. // Return relative to "DAT".
  850. return csm[CodeScanMatch_func];
  851. }
  852. stock CodeScanGetMatchAddressData(csm[CodeScanner]) {
  853. // Return relative to "DAT".
  854. return csm[CodeScanMatch_cip];
  855. }
  856. stock CodeScanGetMatchLength(csm[CodeScanner]) {
  857. return csm[CodeScanMatch_size];
  858. }
  859. stock CodeScanGetMatchType(csm[CodeScanner]) {
  860. // Lazilly get the names and types of functions when requested.
  861. if (csm[CodeScanMatch_type] >= SCANNER_FUNC_UNKNOWN) {
  862. csm[CodeScanMatch_name][0] = '\0';
  863. if (CodeScanGetFuncName(csm[CodeScanMatch_func], csm[CodeScanMatch_name])) {
  864. csm[CodeScanMatch_type] -= 4;
  865. } else {
  866. csm[CodeScanMatch_type] /= 2;
  867. // We could check for functions that are state implementations.
  868. // Currently public functions with states will only get their names
  869. // for the state stub, not for the various implementations.
  870. }
  871. }
  872. // There are four types:
  873. //
  874. // PUBLIC - Public functions.
  875. // HALT - The "halt" instructions at the very start.
  876. // AUTOMATA - A state determining stub.
  877. // OTHER - A normal function.
  878. //
  879. // These names are always prefixed by "SCANNER_FUNC_", and only "PUBLIC" is
  880. // guaranteed to have a name - the types are partially determined in other
  881. // ways ("OTHER" will never have a name).
  882. //
  883. // There is also "0", which just means that nothing has been scanned yet.
  884. return csm[CodeScanMatch_type];
  885. }
  886. stock CodeScanGetMatchHeap(csm[CodeScanner]) {
  887. return csm[CodeScanMatch_heap];
  888. }
  889. stock CodeScanGetMatchStack(csm[CodeScanner]) {
  890. return csm[CodeScanMatch_stack];
  891. }
  892. stock CodeScanGetMatchHole(csm[CodeScanner], idx) {
  893. return csm[CodeScanMatch_holes][idx];
  894. }
  895. stock CodeScanGetMatchName(csm[CodeScanner], name[]) {
  896. if (csm[CodeScanMatch_type] >= SCANNER_FUNC_UNKNOWN) {
  897. // We get the type, because the type is based on the name.
  898. CodeScanGetMatchType(csm);
  899. }
  900. name[0] = '\0',
  901. strcat(name, csm[CodeScanMatch_name], 32);
  902. }