This commit is contained in:
LOLIPO1233PI 2025-07-21 22:02:00 +02:00 committed by GitHub
commit 0a64882d81
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -1,5 +1,5 @@
"""
PyInstaller Extractor v2.0 (Supports pyinstaller 6.14.0, 6.13.0, 6.12.0, 6.11.1, 6.11.0, 6.10.0, 6.9.0, 6.8.0, 6.7.0, 6.6.0, 6.5.0, 6.4.0, 6.3.0, 6.2.0, 6.1.0, 6.0.0, 5.13.2, 5.13.1, 5.13.0, 5.12.0, 5.11.0, 5.10.1, 5.10.0, 5.9.0, 5.8.0, 5.7.0, 5.6.2, 5.6.1, 5.6, 5.5, 5.4.1, 5.4, 5.3, 5.2, 5.1, 5.0.1, 5.0, 4.10, 4.9, 4.8, 4.7, 4.6, 4.5.1, 4.5, 4.4, 4.3, 4.2, 4.1, 4.0, 3.6, 3.5, 3.4, 3.3, 3.2, 3.1, 3.0, 2.1, 2.0)
PyInstaller Extractor v2.0 (Supports pyinstaller 6.12.0, 6.11.1, 6.11.0, 6.10.0, 6.9.0, 6.8.0, 6.7.0, 6.6.0, 6.5.0, 6.4.0, 6.3.0, 6.2.0, 6.1.0, 6.0.0, 5.13.2, 5.13.1, 5.13.0, 5.12.0, 5.11.0, 5.10.1, 5.10.0, 5.9.0, 5.8.0, 5.7.0, 5.6.2, 5.6.1, 5.6, 5.5, 5.4.1, 5.4, 5.3, 5.2, 5.1, 5.0.1, 5.0, 4.10, 4.9, 4.8, 4.7, 4.6, 4.5.1, 4.5, 4.4, 4.3, 4.2, 4.1, 4.0, 3.6, 3.5, 3.4, 3.3, 3.2, 3.1, 3.0, 2.1, 2.0)
Author : Extreme Coders
E-mail : extremecoders(at)hotmail(dot)com
Web : https://0xec.blogspot.com
@ -81,6 +81,12 @@ Version 2.0 (March 26, 2020)
- Supports pyinstaller 3.6
- Added support for Python 3.7, 3.8
- The header of all extracted pyc's are now automatically fixed
Version 2.1 (March 26, 2020)
-------------------------------------------------
- Added proper logging
- Fixed exception handling
"""
from __future__ import print_function
@ -90,10 +96,20 @@ import marshal
import zlib
import sys
from uuid import uuid4 as uniquename
import logging
GeneralLogger = logging.Logger("Pyinstxtractor", logging.INFO)
LoggingFormat = logging.Formatter("[%(asctime)s] %(name)s %(message)s")
FileHandler = logging.FileHandler("logs.log")
StreamHandler = logging.StreamHandler()
StreamHandler.setFormatter(LoggingFormat), FileHandler.setFormatter(LoggingFormat)
GeneralLogger.addHandler(StreamHandler), GeneralLogger.addHandler(FileHandler)
class CTOCEntry:
def __init__(self, position, cmprsdDataSize, uncmprsdDataSize, cmprsFlag, typeCmprsData, name):
def __init__(
self, position, cmprsdDataSize, uncmprsdDataSize, cmprsFlag, typeCmprsData, name
):
self.position = position
self.cmprsdDataSize = cmprsdDataSize
self.uncmprsdDataSize = uncmprsdDataSize
@ -103,42 +119,39 @@ class CTOCEntry:
class PyInstArchive:
PYINST20_COOKIE_SIZE = 24 # For pyinstaller 2.0
PYINST21_COOKIE_SIZE = 24 + 64 # For pyinstaller 2.1+
MAGIC = b'MEI\014\013\012\013\016' # Magic number which identifies pyinstaller
PYINST20_COOKIE_SIZE = 24 # For pyinstaller 2.0
PYINST21_COOKIE_SIZE = 24 + 64 # For pyinstaller 2.1+
MAGIC = b"MEI\014\013\012\013\016" # Magic number which identifies pyinstaller
def __init__(self, path):
self.filePath = path
self.pycMagic = b'\0' * 4
self.barePycList = [] # List of pyc's whose headers have to be fixed
self.pycMagic = b"\0" * 4
self.barePycList = [] # List of pyc's whose headers have to be fixed
def open(self):
def open(self) -> bool:
try:
self.fPtr = open(self.filePath, 'rb')
self.fPtr = open(self.filePath, "rb")
self.fileSize = os.stat(self.filePath).st_size
except:
print('[!] Error: Could not open {0}'.format(self.filePath))
except IOError as e:
GeneralLogger.info("[!] Error: Could not open {}, due to {}".format(self.filePath, e))
return False
return True
def close(self):
try:
self.fPtr.close()
except:
pass
except Exception:
...
def checkFile(self):
print('[+] Processing {0}'.format(self.filePath))
GeneralLogger.info("[+] Processing {0}".format(self.filePath))
searchChunkSize = 8192
endPos = self.fileSize
self.cookiePos = -1
if endPos < len(self.MAGIC):
print('[!] Error : File is too short or truncated')
GeneralLogger.info("[!] Error : File is too short or truncated")
return False
while True:
@ -163,46 +176,59 @@ class PyInstArchive:
break
if self.cookiePos == -1:
print('[!] Error : Missing cookie, unsupported pyinstaller version or not a pyinstaller archive')
GeneralLogger.info(
"[!] Error : Missing cookie, unsupported pyinstaller version or not a pyinstaller archive"
)
return False
self.fPtr.seek(self.cookiePos + self.PYINST20_COOKIE_SIZE, os.SEEK_SET)
if b'python' in self.fPtr.read(64).lower():
print('[+] Pyinstaller version: 2.1+')
self.pyinstVer = 21 # pyinstaller 2.1+
if b"python" in self.fPtr.read(64).lower():
print("[+] Pyinstaller version: 2.1+")
self.pyinstVer = 21 # pyinstaller 2.1+
else:
self.pyinstVer = 20 # pyinstaller 2.0
print('[+] Pyinstaller version: 2.0')
self.pyinstVer = 20 # pyinstaller 2.0
print("[+] Pyinstaller version: 2.0")
return True
def getCArchiveInfo(self):
try:
if self.pyinstVer == 20:
self.fPtr.seek(self.cookiePos, os.SEEK_SET)
# Read CArchive cookie
(magic, lengthofPackage, toc, tocLen, pyver) = \
struct.unpack('!8siiii', self.fPtr.read(self.PYINST20_COOKIE_SIZE))
(magic, lengthofPackage, toc, tocLen, pyver) = struct.unpack(
"!8siiii", self.fPtr.read(self.PYINST20_COOKIE_SIZE)
)
elif self.pyinstVer == 21:
self.fPtr.seek(self.cookiePos, os.SEEK_SET)
# Read CArchive cookie
(magic, lengthofPackage, toc, tocLen, pyver, pylibname) = \
struct.unpack('!8sIIii64s', self.fPtr.read(self.PYINST21_COOKIE_SIZE))
(magic, lengthofPackage, toc, tocLen, pyver, pylibname) = struct.unpack(
"!8sIIii64s", self.fPtr.read(self.PYINST21_COOKIE_SIZE)
)
except:
print('[!] Error : The file is not a pyinstaller archive')
except Exception:
GeneralLogger.info("[!] Error : The file is not a pyinstaller archive")
return False
self.pymaj, self.pymin = (pyver//100, pyver%100) if pyver >= 100 else (pyver//10, pyver%10)
print('[+] Python version: {0}.{1}'.format(self.pymaj, self.pymin))
self.pymaj, self.pymin = (
(pyver // 100, pyver % 100) if pyver >= 100 else (pyver // 10, pyver % 10)
)
GeneralLogger.info("[+] Python version: {0}.{1}".format(self.pymaj, self.pymin))
# Additional data after the cookie
tailBytes = self.fileSize - self.cookiePos - (self.PYINST20_COOKIE_SIZE if self.pyinstVer == 20 else self.PYINST21_COOKIE_SIZE)
tailBytes = (
self.fileSize
- self.cookiePos
- (
self.PYINST20_COOKIE_SIZE
if self.pyinstVer == 20
else self.PYINST21_COOKIE_SIZE
)
)
# Overlay is the data appended at the end of the PE
self.overlaySize = lengthofPackage + tailBytes
@ -210,10 +236,9 @@ class PyInstArchive:
self.tableOfContentsPos = self.overlayPos + toc
self.tableOfContentsSize = tocLen
print('[+] Length of package: {0} bytes'.format(lengthofPackage))
GeneralLogger.info("[+] Length of package: {0} bytes".format(lengthofPackage))
return True
def parseTOC(self):
# Go to the table of contents
self.fPtr.seek(self.tableOfContentsPos, os.SEEK_SET)
@ -223,56 +248,77 @@ class PyInstArchive:
# Parse table of contents
while parsedLen < self.tableOfContentsSize:
(entrySize, ) = struct.unpack('!i', self.fPtr.read(4))
nameLen = struct.calcsize('!iIIIBc')
(entrySize,) = struct.unpack("!i", self.fPtr.read(4))
nameLen = struct.calcsize("!iIIIBc")
(entryPos, cmprsdDataSize, uncmprsdDataSize, cmprsFlag, typeCmprsData, name) = \
struct.unpack( \
'!IIIBc{0}s'.format(entrySize - nameLen), \
self.fPtr.read(entrySize - 4))
(
entryPos,
cmprsdDataSize,
uncmprsdDataSize,
cmprsFlag,
typeCmprsData,
name,
) = struct.unpack(
"!IIIBc{0}s".format(entrySize - nameLen), self.fPtr.read(entrySize - 4)
)
try:
name = name.decode("utf-8").rstrip("\0")
except UnicodeDecodeError:
newName = str(uniquename())
print('[!] Warning: File name {0} contains invalid bytes. Using random name {1}'.format(name, newName))
GeneralLogger.info(
"[!] Warning: File name {0} contains invalid bytes. Using random name {1}".format(
name, newName
)
)
name = newName
# Prevent writing outside the extraction directory
if name.startswith("/"):
name = name.lstrip("/")
if len(name) == 0:
name = str(uniquename())
print('[!] Warning: Found an unamed file in CArchive. Using random name {0}'.format(name))
GeneralLogger.info(
"[!] Warning: Found an unamed file in CArchive. Using random name {0}".format(
name
)
)
self.tocList.append( \
CTOCEntry( \
self.overlayPos + entryPos, \
cmprsdDataSize, \
uncmprsdDataSize, \
cmprsFlag, \
typeCmprsData, \
name \
))
self.tocList.append(
CTOCEntry(
self.overlayPos + entryPos,
cmprsdDataSize,
uncmprsdDataSize,
cmprsFlag,
typeCmprsData,
name,
)
)
parsedLen += entrySize
print('[+] Found {0} files in CArchive'.format(len(self.tocList)))
GeneralLogger.info("[+] Found {0} files in CArchive".format(len(self.tocList)))
def _writeRawData(self, filepath, data):
nm = filepath.replace('\\', os.path.sep).replace('/', os.path.sep).replace('..', '__')
nm = (
filepath.replace("\\", os.path.sep)
.replace("/", os.path.sep)
.replace("..", "__")
)
nmDir = os.path.dirname(nm)
if nmDir != '' and not os.path.exists(nmDir): # Check if path exists, create if not
if nmDir != "" and not os.path.exists(
nmDir
): # Check if path exists, create if not
os.makedirs(nmDir)
with open(nm, 'wb') as f:
with open(nm, "wb") as f:
f.write(data)
def extractFiles(self):
print('[+] Beginning extraction...please standby')
extractionDir = os.path.join(os.getcwd(), os.path.basename(self.filePath) + '_extracted')
GeneralLogger.info("[+] Beginning extraction...please standby")
extractionDir = os.path.join(
os.getcwd(), os.path.basename(self.filePath) + "_extracted"
)
if not os.path.exists(extractionDir):
os.mkdir(extractionDir)
@ -287,127 +333,138 @@ class PyInstArchive:
try:
data = zlib.decompress(data)
except zlib.error:
print('[!] Error : Failed to decompress {0}'.format(entry.name))
GeneralLogger.info("[!] Error : Failed to decompress {0}".format(entry.name))
continue
# Malware may tamper with the uncompressed size
# Comment out the assertion in such a case
assert len(data) == entry.uncmprsdDataSize # Sanity Check
assert len(data) == entry.uncmprsdDataSize # Sanity Check
if entry.typeCmprsData == b'd' or entry.typeCmprsData == b'o':
if entry.typeCmprsData == b"d" or entry.typeCmprsData == b"o":
# d -> ARCHIVE_ITEM_DEPENDENCY
# o -> ARCHIVE_ITEM_RUNTIME_OPTION
# These are runtime options, not files
continue
basePath = os.path.dirname(entry.name)
if basePath != '':
if basePath != "":
# Check if path exists, create if not
if not os.path.exists(basePath):
os.makedirs(basePath)
if entry.typeCmprsData == b's':
if entry.typeCmprsData == b"s":
# s -> ARCHIVE_ITEM_PYSOURCE
# Entry point are expected to be python scripts
print('[+] Possible entry point: {0}.pyc'.format(entry.name))
GeneralLogger.info("[+] Possible entry point: {0}.pyc".format(entry.name))
if self.pycMagic == b'\0' * 4:
if self.pycMagic == b"\0" * 4:
# if we don't have the pyc header yet, fix them in a later pass
self.barePycList.append(entry.name + '.pyc')
self._writePyc(entry.name + '.pyc', data)
self.barePycList.append(entry.name + ".pyc")
self._writePyc(entry.name + ".pyc", data)
elif entry.typeCmprsData == b'M' or entry.typeCmprsData == b'm':
elif entry.typeCmprsData == b"M" or entry.typeCmprsData == b"m":
# M -> ARCHIVE_ITEM_PYPACKAGE
# m -> ARCHIVE_ITEM_PYMODULE
# packages and modules are pyc files with their header intact
# From PyInstaller 5.3 and above pyc headers are no longer stored
# https://github.com/pyinstaller/pyinstaller/commit/a97fdf
if data[2:4] == b'\r\n':
if data[2:4] == b"\r\n":
# < pyinstaller 5.3
if self.pycMagic == b'\0' * 4:
if self.pycMagic == b"\0" * 4:
self.pycMagic = data[0:4]
self._writeRawData(entry.name + '.pyc', data)
self._writeRawData(entry.name + ".pyc", data)
else:
# >= pyinstaller 5.3
if self.pycMagic == b'\0' * 4:
if self.pycMagic == b"\0" * 4:
# if we don't have the pyc header yet, fix them in a later pass
self.barePycList.append(entry.name + '.pyc')
self.barePycList.append(entry.name + ".pyc")
self._writePyc(entry.name + '.pyc', data)
self._writePyc(entry.name + ".pyc", data)
else:
self._writeRawData(entry.name, data)
if entry.typeCmprsData == b'z' or entry.typeCmprsData == b'Z':
if entry.typeCmprsData == b"z" or entry.typeCmprsData == b"Z":
self._extractPyz(entry.name)
# Fix bare pyc's if any
self._fixBarePycs()
def _fixBarePycs(self):
for pycFile in self.barePycList:
with open(pycFile, 'r+b') as pycFile:
with open(pycFile, "r+b") as pycFile:
# Overwrite the first four bytes
pycFile.write(self.pycMagic)
def _writePyc(self, filename, data):
with open(filename, 'wb') as pycFile:
pycFile.write(self.pycMagic) # pyc magic
with open(filename, "wb") as pycFile:
pycFile.write(self.pycMagic) # pyc magic
if self.pymaj >= 3 and self.pymin >= 7: # PEP 552 -- Deterministic pycs
pycFile.write(b'\0' * 4) # Bitfield
pycFile.write(b'\0' * 8) # (Timestamp + size) || hash
if self.pymaj >= 3 and self.pymin >= 7: # PEP 552 -- Deterministic pycs
pycFile.write(b"\0" * 4) # Bitfield
pycFile.write(b"\0" * 8) # (Timestamp + size) || hash
else:
pycFile.write(b'\0' * 4) # Timestamp
pycFile.write(b"\0" * 4) # Timestamp
if self.pymaj >= 3 and self.pymin >= 3:
pycFile.write(b'\0' * 4) # Size parameter added in Python 3.3
pycFile.write(b"\0" * 4) # Size parameter added in Python 3.3
pycFile.write(data)
def _extractPyz(self, name):
dirName = name + '_extracted'
dirName = name + "_extracted"
# Create a directory for the contents of the pyz
if not os.path.exists(dirName):
os.mkdir(dirName)
with open(name, 'rb') as f:
with open(name, "rb") as f:
pyzMagic = f.read(4)
assert pyzMagic == b'PYZ\0' # Sanity Check
assert pyzMagic == b"PYZ\0" # Sanity Check
pyzPycMagic = f.read(4) # Python magic value
pyzPycMagic = f.read(4) # Python magic value
if self.pycMagic == b'\0' * 4:
if self.pycMagic == b"\0" * 4:
self.pycMagic = pyzPycMagic
elif self.pycMagic != pyzPycMagic:
self.pycMagic = pyzPycMagic
print('[!] Warning: pyc magic of files inside PYZ archive are different from those in CArchive')
GeneralLogger.info(
"[!] Warning: pyc magic of files inside PYZ archive are different from those in CArchive"
)
# Skip PYZ extraction if not running under the same python version
if self.pymaj != sys.version_info.major or self.pymin != sys.version_info.minor:
print('[!] Warning: This script is running in a different Python version than the one used to build the executable.')
print('[!] Please run this script in Python {0}.{1} to prevent extraction errors during unmarshalling'.format(self.pymaj, self.pymin))
print('[!] Skipping pyz extraction')
if (
self.pymaj != sys.version_info.major
or self.pymin != sys.version_info.minor
):
GeneralLogger.info(
"[!] Warning: This script is running in a different Python version than the one used to build the executable."
)
GeneralLogger.info(
"[!] Please run this script in Python {0}.{1} to prevent extraction errors during unmarshalling".format(
self.pymaj, self.pymin
)
)
GeneralLogger.info("[!] Skipping pyz extraction")
return
(tocPosition, ) = struct.unpack('!i', f.read(4))
(tocPosition,) = struct.unpack("!i", f.read(4))
f.seek(tocPosition, os.SEEK_SET)
try:
toc = marshal.load(f)
except:
print('[!] Unmarshalling FAILED. Cannot extract {0}. Extracting remaining files.'.format(name))
return
except Exception:
GeneralLogger.info(
"[!] Unmarshalling FAILED. Cannot extract {0}. Extracting remaining files.".format(
name
)
)
print('[+] Found {0} files in PYZ archive'.format(len(toc)))
GeneralLogger.info("[+] Found {0} files in PYZ archive".format(len(toc)))
# From pyinstaller 3.1+ toc is a list of tuples
if type(toc) == list:
if isinstance(toc, list):
toc = dict(toc)
for key in toc.keys():
@ -417,18 +474,14 @@ class PyInstArchive:
try:
# for Python > 3.3 some keys are bytes object some are str object
fileName = fileName.decode('utf-8')
except:
pass
fileName = fileName.decode("utf-8")
except Exception:
...
# Prevent writing outside dirName
fileName = fileName.replace('..', '__').replace('.', os.path.sep)
fileName = fileName.replace("..", "__").replace(".", os.path.sep)
if ispkg == 1:
filePath = os.path.join(dirName, fileName, '__init__.pyc')
else:
filePath = os.path.join(dirName, fileName + '.pyc')
filePath = os.path.join(dirName, fileName + "{}.pyc".format("__init__" if ispkg == 1 else ""))
fileDir = os.path.dirname(filePath)
if not os.path.exists(fileDir):
@ -437,16 +490,21 @@ class PyInstArchive:
try:
data = f.read(length)
data = zlib.decompress(data)
except:
print('[!] Error: Failed to decompress {0}, probably encrypted. Extracting as is.'.format(filePath))
open(filePath + '.encrypted', 'wb').write(data)
except Exception:
GeneralLogger.info(
"[!] Error: Failed to decompress {0}, probably encrypted. Extracting as is.".format(
filePath
)
)
with open(filePath + ".encrypted", "wb") as f:
f.write(data)
else:
self._writePyc(filePath, data)
def main():
if len(sys.argv) < 2:
print('[+] Usage: pyinstxtractor.py <filename>')
print("[+] Usage: pyinstxtractor.py <filename>")
else:
arch = PyInstArchive(sys.argv[1])
@ -456,13 +514,17 @@ def main():
arch.parseTOC()
arch.extractFiles()
arch.close()
print('[+] Successfully extracted pyinstaller archive: {0}'.format(sys.argv[1]))
print('')
print('You can now use a python decompiler on the pyc files within the extracted directory')
return
GeneralLogger.info(
"[+] Successfully extracted pyinstaller archive: {0}\n".format(
sys.argv[1]
)
)
GeneralLogger.info(
"You can now use a python decompiler on the pyc files within the extracted directory"
)
arch.close()
if __name__ == '__main__':
if __name__ == "__main__":
main()