diff --git a/Ghidra/Features/Base/certification.manifest b/Ghidra/Features/Base/certification.manifest index 822aa82c5a..55fb461fc3 100644 --- a/Ghidra/Features/Base/certification.manifest +++ b/Ghidra/Features/Base/certification.manifest @@ -72,6 +72,8 @@ data/typeinfo/win32/msvcrt/iids.txt||GHIDRA||reviewed||END| data/typeinfo/win32/msvcrt/syntaxes.txt||GHIDRA||||END| data/typeinfo/win32/windows_vs12_32.gdt||GHIDRA||||END| data/typeinfo/win32/windows_vs12_64.gdt||GHIDRA||||END| +data/x64_linux_syscall_numbers||GHIDRA||||END| +data/x86_linux_syscall_numbers||GHIDRA||||END| ghidra_scripts/AskScript.properties||GHIDRA||||END| ghidra_scripts/RecursiveStringFinder.py||GHIDRA||||END| ghidra_scripts/mark_in_out.py||GHIDRA||reviewed||END| diff --git a/Ghidra/Features/Base/data/x64_linux_syscall_numbers b/Ghidra/Features/Base/data/x64_linux_syscall_numbers new file mode 100644 index 0000000000..d06363c84d --- /dev/null +++ b/Ghidra/Features/Base/data/x64_linux_syscall_numbers @@ -0,0 +1,322 @@ +#format = number(decimal) syscall_name +0 read +1 write +2 open +3 close +4 stat +5 fstat +6 lstat +7 poll +8 lseek +9 mmap +10 mprotect +11 munmap +12 sys_brk +13 rt_sigaction +14 rt_sigprocmask +16 ioctl +17 pread64 +18 pwrite64 +19 readv +2 writev +21 access +22 pipe +23 select +24 sched_yield +25 mremap +26 msync +27 mincore +28 madvise +29 shmget +30 shmat +31 shmctl +32 dup +33 dup2 +34 pause +35 nanosleep +36 getitimer +37 alarm +38 setitimer +39 getpid +40 sendfile +41 socket +42 connect +43 accept +44 sendto +45 recvfrom +46 sendmsg +47 recvmsg +48 shutdown +49 bind +50 listen +51 getsockname +52 getpeername +53 socketpair +54 setsockopt +55 getsockopt +56 clone +57 fork +58 vfork +59 execve +60 exit +61 wait4 +62 kill +63 uname +64 semget +65 semop +66 semctl +67 shmdt +68 msgget +69 msgsnd +70 msgrcv +71 msgctl +72 fcntl +73 flock +74 fsync +75 fdatasync +76 truncate +77 ftruncate +78 getdents +79 getcwd +80 chdir +81 fchdir +82 rename +83 mkdir +84 rmdir +85 creat +86 link +87 unlink +88 symlink +89 readlink +90 chmod +91 fchmod +92 chown +93 fchown +94 lchown +95 umask +96 gettimeofday +97 getrlimit +98 getrusage +99 sysinfo +100 times +101 ptrace +102 getuid +103 syslog +104 getgid +105 setuid +106 setgid +107 geteuid +108 getegid +109 setpgid +110 getppid +111 getpgrp +112 setsid +113 setreuid +114 setregid +115 getgroups +116 setgroups +117 setresuid +118 getresuid +119 setresgid +120 getresgid +121 getpgid +122 setfsuid +123 setfsgid +124 getsid +125 capget +126 capset +127 rt_sigpending +128 rt_sigtimedwait +129 rt_sigqueueinfo +130 rt_sigsuspend +131 sigaltstack +132 utime +133 mknod +134 uselib +135 personality +136 ustat +137 statfs +138 fstatfs +139 sysfs +140 getpriority +141 setpriority +142 sched_setparam +143 sched_getparam +144 sched_setscheduler +145 sched_getscheduler +146 sched_get_priority_max +147 sched_get_priority_min +148 sched_rr_get_interval +149 mlock +150 munlock +151 mlockall +152 munlockall +153 vhangup +154 modify_ldt +155 pivot_root +156 _sysctl +157 prctl +158 arch_prctl +159 adjtimex +160 setrlimit +161 chroot +162 sync +163 acct +164 settimeofday +165 mount +166 umount2 +167 swapon +168 swapoff +169 reboot +170 sethostname +171 setdomainname +172 iopl +173 ioperm +174 create_module +175 init_module +176 delete_module +177 get_kernel_syms +178 query_module +179 quotactl +180 nfsservctl +181 getpmsg +182 putpmsg +183 afs_syscall +184 tuxcall +185 security +186 gettid +187 readahead +188 setxattr +189 lsetxattr +190 fsetxattr +191 getxattr +192 lgetxattr +193 fgetxattr +194 listxattr +195 llistxattr +196 flistxattr +197 removexattr +198 lremovexattr +199 fremovexattr +200 tkill +201 time +202 futex +203 sched_setaffinity +204 sched_getaffinity +206 io_setup +207 io_destroy +208 io_getevents +209 io_submit +210 io_cancel +213 epoll_create +216 remap_file_pages +217 getdents64 +218 set_tid_address +219 restart_syscall +220 semtimedop +221 fadvise64 +222 timer_create +223 timer_settime +224 timer_gettime +225 timer_getoverrun +226 timer_delete +227 clock_settime +228 clock_gettime +229 clock_getres +230 clock_nanosleep +231 exit_group +232 epoll_wait +233 epoll_ctl +234 tgkill +235 utimes +237 mbind +238 set_mempolicy +239 get_mempolicy +240 mq_open +241 mq_unlink +242 mq_timedsend +243 mq_timedreceive +244 mq_notify +245 mq_getsetattr +246 kexec_load +247 waitid +248 add_key +249 request_key +250 keyctl +251 ioprio_set +252 ioprio_get +253 inotify_init +254 inotify_add_watch +255 inotify_rm_watch +256 migrate_pages +257 openat +258 mkdirat +259 mknodat +260 fchownat +261 futimesat +262 newfstatat +263 unlinkat +264 renameat +265 linkat +266 symlinkat +267 readlinkat +268 fchmodat +269 faccessat +270 pselect6 +271 ppoll +272 unshare +273 set_robust_list +274 get_robust_list +275 splice +276 tee +277 sync_file_range +278 vmsplice +279 move_pages +280 utimensat +281 epoll_pwait +282 signalfd +283 timerfd_create +284 eventfd +285 fallocate +286 timerfd_settime +287 timerfd_gettime +288 accept4 +289 signalfd4 +290 eventfd2 +291 epoll_create1 +292 dup3 +293 pipe2 +294 inotify_init1 +295 preadv +296 pwritev +297 rt_tgsigqueueinfo +298 perf_event_open +299 recvmmsg +300 fanotify_init +301 fanotify_mark +302 prlimit64 +303 name_to_handle_at +304 open_by_handle_at +305 clock_adjtime +306 syncfs +307 sendmmsg +308 setns +309 getcpu +310 process_vm_readv +311 process_vm_writev +312 kcmp +313 finit_module +314 sched_setattr +315 sched_getattr +317 seccomp +318 getrandom +319 memfd_create +320 kexec_file_load +321 bpf +323 userfaultfd +324 membarrier +325 mlock2 +326 copy_file_range +329 pkey_mprotect +330 pkey_alloc +331 pkey_free diff --git a/Ghidra/Features/Base/data/x86_linux_syscall_numbers b/Ghidra/Features/Base/data/x86_linux_syscall_numbers new file mode 100644 index 0000000000..7b83ea1da0 --- /dev/null +++ b/Ghidra/Features/Base/data/x86_linux_syscall_numbers @@ -0,0 +1,192 @@ +#format = number(decimal) syscall_name +00 setup +01 exit +02 fork +03 read +04 write +05 open +06 close +07 waitpid +08 creat +09 link +10 unlink +11 execve +12 chdir +13 time +14 mknod +15 chmod +16 lchown +17 break +18 oldstat +19 lseek +20 getpid +21 mount +22 umount +23 setuid +24 getuid +25 stime +26 ptrace +27 alarm +28 oldfstat +29 pause +30 utime +31 stty +32 gtty +33 access +34 nice +35 ftime +36 sync +37 kill +38 rename +39 mkdir +40 rmdir +41 dup +42 pipe +43 times +44 prof +45 brk +46 setgid +47 getgid +48 signal +49 geteuid +50 getegid +51 acct +52 umount2 +53 lock +54 ioctl +55 fcntl +56 mpx +57 setpgid +58 ulimit +59 oldolduname +60 umask +61 chroot +62 ustat +63 dup2 +64 getppid +65 getpgrp +66 setsid +67 sigaction +68 sgetmask +69 ssetmask +70 setreuid +71 setregid +72 sigsuspend +73 sigpending +74 sethostname +75 setrlimit +76 getrlimit +77 getrusage +78 gettimeofday +79 settimeofday +80 getgroups +81 setgroups +82 select +83 symlink +84 oldlstat +85 readlink +86 uselib +87 swapon +88 reboot +89 readdir +90 mmap +91 munmap +92 truncate +93 ftruncate +94 fchmod +95 fchown +96 getpriority +97 setpriority +98 profil +99 statfs +100 fstatfs +101 ioperm +102 socketcall +103 syslog +104 setitimer +105 getitimer +106 stat +107 lstat +108 fstat +109 olduname +110 iopl +111 vhangup +112 idle +113 vm86old +114 wait4 +115 swapoff +116 sysinfo +117 ipc +118 fsync +119 sigreturn +120 clone +121 setdomainname +122 uname +123 modify_ldt +124 adjtimex +125 mprotect +126 sigprocmask +127 create_module +128 init_module +129 delete_module +130 get_kernel_syms +131 quotactl +132 getpgid +133 fchdir +134 bdflush +135 sysfs +136 personality +137 afs_syscall +138 setfsuid +139 setfsgid +140 _llseek +141 getdents +142 _newselect +143 flock +144 msync +145 readv +146 writev +147 getsid +148 fdatasync +149 _sysctl +150 mlock +151 munlock +152 mlockall +153 munlockall +154 sched_setparam +155 sched_getparam +156 sched_setscheduler +157 sched_getscheduler +158 sched_yield +159 sched_get_priority_max +160 sched_get_priority_min +161 sched_rr_get_interval +162 nanosleep +163 mremap +164 setresuid +165 getresuid +166 vm86 +167 query_module +168 poll +169 nfsservctl +170 setresgid +171 getresgid +172 prctl +173 rt_sigreturn +174 rt_sigaction +175 rt_sigprocmask +176 rt_sigpending +177 rt_sigtimedwait +178 rt_sigqueueinfo +179 rt_sigsuspend +180 pread +181 pwrite +182 chown +183 getcwd +184 capget +185 capset +186 sigaltstack +187 sendfile +188 getpmsg +189 putpmsg +190 vfork diff --git a/Ghidra/Features/Base/ghidra_scripts/ResolveX86orX64LinuxSyscallsScript.java b/Ghidra/Features/Base/ghidra_scripts/ResolveX86orX64LinuxSyscallsScript.java new file mode 100644 index 0000000000..bd5010b97d --- /dev/null +++ b/Ghidra/Features/Base/ghidra_scripts/ResolveX86orX64LinuxSyscallsScript.java @@ -0,0 +1,324 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +//Uses overriding references and the symbolic propogator to resolve system calls +//@category Analysis +import java.io.*; +import java.util.*; +import java.util.Map.Entry; +import java.util.function.Predicate; + +import generic.jar.ResourceFile; +import ghidra.app.cmd.function.ApplyFunctionDataTypesCmd; +import ghidra.app.cmd.memory.AddUninitializedMemoryBlockCmd; +import ghidra.app.plugin.core.analysis.AutoAnalysisManager; +import ghidra.app.plugin.core.analysis.ConstantPropagationContextEvaluator; +import ghidra.app.script.GhidraScript; +import ghidra.app.services.DataTypeManagerService; +import ghidra.app.util.opinion.ElfLoader; +import ghidra.framework.Application; +import ghidra.program.model.address.*; +import ghidra.program.model.data.DataTypeManager; +import ghidra.program.model.lang.BasicCompilerSpec; +import ghidra.program.model.lang.Register; +import ghidra.program.model.listing.*; +import ghidra.program.model.mem.MemoryAccessException; +import ghidra.program.model.pcode.PcodeOp; +import ghidra.program.model.symbol.*; +import ghidra.program.util.ContextEvaluator; +import ghidra.program.util.SymbolicPropogator; +import ghidra.program.util.SymbolicPropogator.Value; +import ghidra.util.Msg; +import ghidra.util.exception.CancelledException; +import ghidra.util.task.TaskMonitor; + +/** + * This script will resolve system calls for x86 or x64 Linux binaries. + * It assumes that in the x64 case, the syscall native instruction is used to make system calls, + * and in the x86 case, system calls are made via an indirect call to GS:[0x10]. + * It should be straightforward to modify this script for other cases. + */ +public class ResolveX86orX64LinuxSyscallsScript extends GhidraScript { + + //disassembles to "CALL dword ptr GS:[0x10]" + private static final byte[] x86_bytes = { 0x65, -1, 0x15, 0x10, 0x00, 0x00, 0x00 }; + + private static final String X86 = "x86"; + + private static final String SYSCALL_SPACE_NAME = "syscall"; + + private static final int SYSCALL_SPACE_LENGTH = 0x10000; + + //this is the name of the userop (aka CALLOTHER) in the pcode translation of the + //native "syscall" instruction + private static final String SYSCALL_X64_CALLOTHER = "syscall"; + + //tests whether an instruction is making a system call + private Predicate tester; + + //register holding the syscall number + private String syscallRegister; + + //datatype archive containing signature of system calls + private String datatypeArchiveName; + + //file containing map from syscall numbers to syscall names + //note that different architectures can have different system call numbers, even + //if they're both Linux... + private String syscallFileName; + + //the type of overriding reference to apply + private RefType overrideType; + + //the calling convention to use for system calls (must be defined in the appropriate .cspec file) + private String callingConvention; + + @Override + protected void run() throws Exception { + + if (!(currentProgram.getExecutableFormat().equals(ElfLoader.ELF_NAME) && + currentProgram.getLanguage().getProcessor().toString().equals(X86))) { + popup("This script is intended for x86 or x64 Linux files"); + return; + } + + //determine whether the executable is 32 or 64 bit and set fields appropriately + int size = currentProgram.getLanguage().getLanguageDescription().getSize(); + if (size == 64) { + tester = ResolveX86orX64LinuxSyscallsScript::checkX64Instruction; + syscallRegister = "RAX"; + datatypeArchiveName = "generic_clib_64"; + syscallFileName = "x64_linux_syscall_numbers"; + overrideType = RefType.CALLOTHER_OVERRIDE_CALL; + callingConvention = "syscall"; + } + else { + tester = ResolveX86orX64LinuxSyscallsScript::checkX86Instruction; + syscallRegister = "EAX"; + datatypeArchiveName = "generic_clib"; + syscallFileName = "x86_linux_syscall_numbers"; + overrideType = RefType.CALL_OVERRIDE_UNCONDITIONAL; + callingConvention = "syscall"; + } + + //get the space where the system calls live. + //If it doesn't exist, create it. + AddressSpace syscallSpace = + currentProgram.getAddressFactory().getAddressSpace(SYSCALL_SPACE_NAME); + if (syscallSpace == null) { + //don't muck with address spaces if you don't have exclusive access to the program. + if (!currentProgram.hasExclusiveAccess()) { + popup("Must have exclusive access to " + currentProgram.getName() + + " to run this script"); + return; + } + Address startAddr = currentProgram.getAddressFactory().getAddressSpace( + BasicCompilerSpec.OTHER_SPACE_NAME).getAddress(0x0L); + AddUninitializedMemoryBlockCmd cmd = new AddUninitializedMemoryBlockCmd( + SYSCALL_SPACE_NAME, null, this.getClass().getName(), startAddr, + SYSCALL_SPACE_LENGTH, true, true, true, false, true); + if (!cmd.applyTo(currentProgram)) { + popup("Failed to create " + SYSCALL_SPACE_NAME); + return; + } + syscallSpace = currentProgram.getAddressFactory().getAddressSpace(SYSCALL_SPACE_NAME); + } + else { + printf("AddressSpace %s found, continuing...\n", SYSCALL_SPACE_NAME); + } + + //get all of the functions that contain system calls + //note that this will not find system call instructions that are not in defined functions + Map> funcsToCalls = getSyscallsInFunctions(currentProgram, monitor); + + if (funcsToCalls.isEmpty()) { + popup("No system calls found (within defined functions)"); + return; + } + + //get the system call number at each callsite of a system call. + //note that this is not guaranteed to succeed at a given system call call site - + //it might be hard (or impossible) to determine a specific constant + Map addressesToSyscalls = + resolveConstants(funcsToCalls, currentProgram, monitor); + + if (addressesToSyscalls.isEmpty()) { + popup("Couldn't resolve any syscall constants"); + return; + } + + //get the map from system call numbers to system call names + //you might have to create this yourself! + Map syscallNumbersToNames = getSyscallNumberMap(); + + //at each system call call site where a constant could be determined, create + //the system call (if not already created), then add the appropriate overriding reference + //use syscallNumbersToNames to name the created functions + //if there's not a name corresponding to the constant use a default + for (Entry entry : addressesToSyscalls.entrySet()) { + Address callSite = entry.getKey(); + Long offset = entry.getValue(); + Address callTarget = syscallSpace.getAddress(offset); + Function callee = currentProgram.getFunctionManager().getFunctionAt(callTarget); + if (callee == null) { + String funcName = "syscall_" + String.format("%08X", offset); + if (syscallNumbersToNames.get(offset) != null) { + funcName = syscallNumbersToNames.get(offset); + } + callee = createFunction(callTarget, funcName); + callee.setCallingConvention(callingConvention); + } + Reference ref = currentProgram.getReferenceManager().addMemoryReference(callSite, + callTarget, overrideType, SourceType.USER_DEFINED, Reference.MNEMONIC); + //overriding references must be primary to be active + currentProgram.getReferenceManager().setPrimary(ref, true); + } + + //finally, open the appropriate data type archive and apply its function data types + //to the new system call space, so that the system calls have the correct signatures + AutoAnalysisManager mgr = AutoAnalysisManager.getAnalysisManager(currentProgram); + DataTypeManagerService service = mgr.getDataTypeManagerService(); + List dataTypeManagers = new ArrayList<>(); + dataTypeManagers.add(service.openDataTypeArchive(datatypeArchiveName)); + dataTypeManagers.add(currentProgram.getDataTypeManager()); + ApplyFunctionDataTypesCmd cmd = new ApplyFunctionDataTypesCmd(dataTypeManagers, + new AddressSet(syscallSpace.getMinAddress(), syscallSpace.getMaxAddress()), + SourceType.USER_DEFINED, false, false); + cmd.applyTo(currentProgram); + } + + //TODO: better error checking! + private Map getSyscallNumberMap() { + Map syscallMap = new HashMap<>(); + ResourceFile rFile = Application.findDataFileInAnyModule(syscallFileName); + if (rFile == null) { + popup("Error opening syscall number file, using default names"); + return syscallMap; + } + try (FileReader fReader = new FileReader(rFile.getFile(false)); + BufferedReader bReader = new BufferedReader(fReader)) { + String line = null; + while ((line = bReader.readLine()) != null) { + //lines starting with # are comments + if (!line.startsWith("#")) { + String[] parts = line.trim().split(" "); + Long number = Long.parseLong(parts[0]); + syscallMap.put(number, parts[1]); + } + } + } + catch (IOException e) { + Msg.showError(this, null, "Error reading syscall map file", e.getMessage(), e); + } + return syscallMap; + } + + /** + * Scans through all of the functions defined in {@code program} and returns + * a map which takes a function to the set of address in its body which contain + * system calls + * @param program program containing functions + * @param tMonitor monitor + * @return map function -> addresses in function containing syscalls + * @throws CancelledException if the user cancels + */ + private Map> getSyscallsInFunctions(Program program, + TaskMonitor tMonitor) throws CancelledException { + Map> funcsToCalls = new HashMap<>(); + for (Function func : program.getFunctionManager().getFunctionsNoStubs(true)) { + tMonitor.checkCanceled(); + for (Instruction inst : program.getListing().getInstructions(func.getBody(), true)) { + if (tester.test(inst)) { + Set
callSites = funcsToCalls.get(func); + if (callSites == null) { + callSites = new HashSet<>(); + funcsToCalls.put(func, callSites); + } + callSites.add(inst.getAddress()); + } + } + } + return funcsToCalls; + } + + /** + * Uses the symbolic propogator to attempt to determine the constant value in + * the syscall register at each system call instruction + * + * @param funcsToCalls map from functions containing syscalls to address in each function of + * the system call + * @param program containing the functions + * @return map from addresses of system calls to system call numbers + * @throws CancelledException if the user cancels + */ + private Map resolveConstants(Map> funcsToCalls, + Program program, TaskMonitor tMonitor) throws CancelledException { + Map addressesToSyscalls = new HashMap<>(); + Register syscallReg = program.getLanguage().getRegister(syscallRegister); + for (Function func : funcsToCalls.keySet()) { + Address start = func.getEntryPoint(); + ContextEvaluator eval = new ConstantPropagationContextEvaluator(true); + SymbolicPropogator symEval = new SymbolicPropogator(program); + symEval.flowConstants(start, func.getBody(), eval, true, tMonitor); + for (Address callSite : funcsToCalls.get(func)) { + Value val = symEval.getRegisterValue(callSite, syscallReg); + if (val == null) { + createBookmark(callSite, "System Call", + "Couldn't resolve value of " + syscallReg); + printf("Couldn't resolve value of " + syscallReg + " at " + callSite + "\n"); + continue; + } + addressesToSyscalls.put(callSite, val.getValue()); + } + } + return addressesToSyscalls; + } + + /** + * Checks whether an x86 native instruction is a system call + * @param inst instruction to check + * @return true precisely when the instruction is a system call + */ + private static boolean checkX86Instruction(Instruction inst) { + try { + return Arrays.equals(x86_bytes, inst.getBytes()); + } + catch (MemoryAccessException e) { + Msg.info(ResolveX86orX64LinuxSyscallsScript.class, + "MemoryAccessException at " + inst.getAddress().toString()); + return false; + } + } + + /** + * Checks whether an x64 instruction is a system call + * @param inst instruction to check + * @return true precisely when the instruction is a system call + */ + private static boolean checkX64Instruction(Instruction inst) { + boolean retVal = false; + for (PcodeOp op : inst.getPcode()) { + if (op.getOpcode() == PcodeOp.CALLOTHER) { + int index = (int) op.getInput(0).getOffset(); + if (inst.getProgram().getLanguage().getUserDefinedOpName(index).equals( + SYSCALL_X64_CALLOTHER)) { + retVal = true; + } + } + } + return retVal; + } + +} diff --git a/Ghidra/Processors/x86/data/languages/x86-64-gcc.cspec b/Ghidra/Processors/x86/data/languages/x86-64-gcc.cspec index 98f7cd44a0..9903cb2319 100644 --- a/Ghidra/Processors/x86/data/languages/x86-64-gcc.cspec +++ b/Ghidra/Processors/x86/data/languages/x86-64-gcc.cspec @@ -164,4 +164,52 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/Ghidra/Processors/x86/data/languages/x86gcc.cspec b/Ghidra/Processors/x86/data/languages/x86gcc.cspec index eb1916af6e..73daf57473 100644 --- a/Ghidra/Processors/x86/data/languages/x86gcc.cspec +++ b/Ghidra/Processors/x86/data/languages/x86gcc.cspec @@ -246,6 +246,47 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/GhidraDocs/GhidraClass/Advanced/Examples/Makefile b/GhidraDocs/GhidraClass/Advanced/Examples/Makefile index 23b9d34801..346be24e32 100644 --- a/GhidraDocs/GhidraClass/Advanced/Examples/Makefile +++ b/GhidraDocs/GhidraClass/Advanced/Examples/Makefile @@ -4,7 +4,7 @@ AS=gcc OUTDIR := out -EXAMPLES := dataMutability override custom switch sharedReturn jumpWithinInstruction opaque globalRegVars.so setRegister compilerVsDecompiler noReturn createStructure animals ldiv inline +EXAMPLES := dataMutability override custom switch sharedReturn jumpWithinInstruction opaque globalRegVars.so setRegister compilerVsDecompiler noReturn createStructure animals ldiv inline write $(EXAMPLES): | $(OUTDIR) @@ -59,5 +59,8 @@ ldiv: ldiv.c inline: inline.s $(AS) inline.s -o $(OUTDIR)/inline +write: write.c + $(CC) write.c -o $(OUTDIR)/write -O1 + clean: rm -rf $(OUTDIR) diff --git a/GhidraDocs/GhidraClass/Advanced/Examples/write.c b/GhidraDocs/GhidraClass/Advanced/Examples/write.c new file mode 100644 index 0000000000..ff6525f5ab --- /dev/null +++ b/GhidraDocs/GhidraClass/Advanced/Examples/write.c @@ -0,0 +1,27 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +char *hello = "Hello World!\n"; + +int main(int argc, char **argv){ + asm(".intel_syntax noprefix"); + asm("mov rax,1"); + asm("mov rdi,1"); + asm("mov rsi, QWORD PTR hello[rip]"); + asm("mov rdx, 13"); + asm("syscall"); + asm(".att_syntax prefix"); + return 0; +} diff --git a/GhidraDocs/GhidraClass/Advanced/improvingDisassemblyAndDecompilation.pdf b/GhidraDocs/GhidraClass/Advanced/improvingDisassemblyAndDecompilation.pdf index f50410e819..f4ea829815 100644 Binary files a/GhidraDocs/GhidraClass/Advanced/improvingDisassemblyAndDecompilation.pdf and b/GhidraDocs/GhidraClass/Advanced/improvingDisassemblyAndDecompilation.pdf differ diff --git a/GhidraDocs/GhidraClass/Advanced/improvingDisassemblyAndDecompilation.tex b/GhidraDocs/GhidraClass/Advanced/improvingDisassemblyAndDecompilation.tex index 8cf48a71e3..e7dfb462b9 100755 --- a/GhidraDocs/GhidraClass/Advanced/improvingDisassemblyAndDecompilation.tex +++ b/GhidraDocs/GhidraClass/Advanced/improvingDisassemblyAndDecompilation.tex @@ -657,7 +657,134 @@ calling convention, it assumes that the call to \textbf{adjustStack} does not ch \end{block} \end{frame} +\subsection{System Calls} +\begin{frame} +\begin{block}{System Calls} +\begin{itemize} +\item \textbf{System calls} are a way for a program to request a service from the operating system. +\item Examples include process control, file management, device management,\ldots +\item A typical implementation uses a special native instruction along with a designated register, which we'll call the +\textbf{system call register}. +\item When the special instruction is executed, the value in the system call register determines which function is called. +\end{itemize} +\end{block} +\end{frame} +\begin{frame} +\begin{block}{Exercise: System Calls} +\begin{enumerate} +\item Open and analyze the file \textbf{write}, then navigate to \textbf{main}. +\item[] Note: \textbf{main} prints \texttt{Hello World!} to the screen using the \textbf{write} system call. +\end{enumerate} +\begin{itemize} +\item Before going further, let's examine what we see. +\begin{itemize} +\item In the decompiler, you should see \textbf{syscall()}, which looks like a function call but isn't (try clicking on it). +\item This is an example of a \textbf{user-defined Pcode op}. +\item Such operations are used when implementing the Pcode for a particular instruction is too hard (or impossible). +\item These operations show up as \textbf{CALLOTHER} Pcode ops in the Pcode field in the Listing. They can have inputs and outputs, but otherwise are treated +as black boxes by the decompiler. +\end{itemize} +\end{itemize} +\end{block} +\end{frame} + +\begin{frame} +\begin{block}{Exercise: System Calls} +\begin{enumerate} +\setcounter{enumi}{1} +\item In the decompiler, why is the return value of \textbf{main} \texttt{undefined [16]}? +\end{enumerate} +\pause +\begin{itemize} +\item The \textbf{SYSCALL} instruction is translated to a single \textbf{CALLOTHER} Pcode op (named \textbf{syscall}). The decompiler does not consider this operation to have any +side effects, so when it tries to automatically determine the return type it sees a move to \textbf{RDX} and a move to \textbf{RAX} before the \textbf{RET} instruction. +These registers form a register pair for this architecture, so the decompiler thinks the return value is 16 bytes. +\item So how do we improve the decompilation? +\end{itemize} +\end{block} +\end{frame} + +\begin{frame} +\begin{block}{Exercise: System Calls} +\begin{itemize} +\item This system call is a call to \textbf{write} since \texttt{1} is written to the system call register (\textbf{RAX}) before the \textbf{syscall} +instruction is executed (search online for ``x64 Linux syscall table"). +\item We'd like the call to \textbf{write} to appear with the correct name, signature, and calling convention. +\item We'd also like cross references, so that we can easily see all calls to \textbf{write}. +\item During execution, the code for the \textbf{write} function is somewhere in the kernel and not in the program's address space. +\item So what should the call target be in Ghidra? +\item Answer: use \textbf{overlay blocks} on the \textbf{OTHER} space. +\end{itemize} +\end{block} +\end{frame} + +\begin{frame} +\begin{block}{Exercise: System Calls} +\begin{itemize} +\item Prior to Ghidra 9.1, the \textbf{OTHER} space was used to store data from a binary that does not get loaded into memory, such as the \texttt{.comment} section of an ELF file. +\item In 9.1, we've extended the ability to make references into the \textbf{OTHER} space. +\item You can't use this space directly, but you can create \textbf{overlay blocks} on the \textbf{OTHER} space. +\item Overlays are a (sort of old school) technique to allow different blocks to be swapped in and out at the same address. +\item For our purposes, they allow us to put things in an artificial memory space without the possibility of conflicting with other uses of that space. +\end{itemize} +\end{block} +\end{frame} + +\begin{frame} +\begin{block}{Exercise: System Calls} +\begin{enumerate} +\setcounter{enumi}{2} +\item Create an overlay of the \textbf{OTHER} space as follows: +\begin{enumerate}[(i)] +\item Bring up the \textbf{Memory Map} by clicking on the ram chip icon in the tool bar of the Code Browser. +\item Click on the green plus to add a block. +\item Call the block \textbf{syscall\_block}. Have it start at address \texttt{0x0} of the \textbf{OTHER} space and have length \texttt{0x1000}. +For Block Type, select \textbf{Overlay} from the drop-down menu. +\end{enumerate} +\end{enumerate} +\end{block} +\end{frame} + +\begin{frame} +\begin{block}{Exercise: System Calls} +\begin{enumerate} +\setcounter{enumi}{3} +\item Next, go to address \texttt{0x1} in \textbf{syscall\_block} and create a function (in the Listing, select both the address and the \texttt{??} and press \texttt{f}). +\item Edit this new function to give it the name \textbf{write} and the \textbf{syscall} calling convention. +\item If you happen to know the parameters and their types you can add them. Altervatively, select the new function \textbf{write} in the Code Browser, right-click on +\textbf{generic\_clib\_64} in the \textbf{Data Type Manager}, and select \textbf{Apply Function Data Types} +\item[] Note: the function we've created has no body. It's essentially an address to hang a function signature and to get cross-references. +\end{enumerate} +\end{block} +\end{frame} + +\begin{frame} +\begin{block}{Exercise: System Calls} +\begin{enumerate} +\setcounter{enumi}{6} +\item Now, navigate back to the \textbf{syscall} instruction in \textbf{main}. +\item Click on the instruction in the Listing, then press \texttt{r} to bring up the \textbf{Reference Manager}. +\item Click the green plus to add a reference. Enter \textbf{syscall\_block::1} for the ``To Address'' and \textbf{CALLOTHER\_CALL\_OVERRIDE} for the Ref-Type. +This reference type essentially transforms the \textbf{CALLOTHER} Pcode op to a \textbf{CALL} op before sending the Pcode to the decompiler. The call target is the ``To Address'' +of the reference. +\item[] The decompilation should now look as expected. +\end{enumerate} +\end{block} +\end{frame} + +\begin{frame} +\begin{block}{System Call Notes} +\begin{enumerate} +\item The script \texttt{ResolveX86orX64LinuxSyscallScript.java} will do all of this for you. You can run it on this file, but a better demonstration is to run it on a +libc shared object file. +\item The script uses the \textbf{Symbolic Propagator} to determine the value of a register at a particular location. +\item The script requires a mapping from system call numbers to system call names. The x86 and x64 ones come with Ghidra, you will need to supply others. +\item Also, the signatures of most Linux system calls are included with Ghidra (used in step 6 above). The script shows you how to apply function data types programmatically, +but you might have to supply your own data type archive. +\end{enumerate} +\end{block} +\end{frame} \section{Improving Decompilation: Control Flow} @@ -760,10 +887,22 @@ determine statically. \begin{block}{Exercise: Opaque Predicates} \begin{enumerate} \item Open and analyze the file \textbf{opaque}, then navigate to the function \textbf{main}. -\item \textbf{main} contains an opaque predicate. Find it and fix it with the instruction patcher by changing a conditional jump to an unconditional jump. -\item To patch an instruction, right-click on it in the Listing and select \textbf{Patch Instruction}. -\item Hint: The opaque predicate is based on the fact that if you square an integer and reduce mod 4, you can only ever get 0 or 1. Look for a multiplication, modular reduction (optimized to a bitmask), and comparison in the assembly. +\item \textbf{main} contains an opaque predicate. Find it and fix it by either: +\begin{enumerate}[(i)] +\item Changing a conditional jump to an unconditional jump using the instruction patcher. To patch an instruction, right-click on it in the Listing and select \textbf{Patch Instruction}. +\item Adding a (primary) reference with Ref-Type \textbf{JUMP\_OVERRIDE\_UNCONDITIONAL} on the appropriate conditional jump. The ``To Address'' of the reference should be the jump target. + To the decompiler, this will change the conditional jump to an unconditional jump. \end{enumerate} +\item[] (hint on next slide) +\end{enumerate} +\end{block} +\end{frame} + +\begin{frame} +\begin{block}{Exercise: Opaque Predicates} +\begin{itemize} +\item Hint: The opaque predicate is based on the fact that if you square an integer and reduce mod 4, you can only ever get 0 or 1. Look for a multiplication, modular reduction (optimized to a bitmask), and comparison in the assembly. +\end{itemize} \end{block} \end{frame} diff --git a/GhidraDocs/GhidraClass/ExerciseFiles/Advanced/write b/GhidraDocs/GhidraClass/ExerciseFiles/Advanced/write new file mode 100755 index 0000000000..79d9f9c2d4 Binary files /dev/null and b/GhidraDocs/GhidraClass/ExerciseFiles/Advanced/write differ diff --git a/GhidraDocs/certification.manifest b/GhidraDocs/certification.manifest index c4e36f50a2..adc1fab797 100644 --- a/GhidraDocs/certification.manifest +++ b/GhidraDocs/certification.manifest @@ -39,6 +39,7 @@ GhidraClass/ExerciseFiles/Advanced/override.so||GHIDRA||||END| GhidraClass/ExerciseFiles/Advanced/setRegister||GHIDRA||||END| GhidraClass/ExerciseFiles/Advanced/sharedReturn||GHIDRA||||END| GhidraClass/ExerciseFiles/Advanced/switch||GHIDRA||||END| +GhidraClass/ExerciseFiles/Advanced/write||GHIDRA||||END| GhidraClass/ExerciseFiles/Emulation/Source/README.txt||GHIDRA||||END| GhidraClass/ExerciseFiles/VersionTracking/WallaceSrc.exe||GHIDRA||||END| GhidraClass/ExerciseFiles/VersionTracking/WallaceVersion2.exe||GHIDRA||||END|