import os import re import argparse import shutil from datetime import datetime """ clean_js_agent.py ------------------ Scans JavaScript (and optionally HTML/PHP) files for patterns commonly used by JS:Agent-ENC trojans (obfuscated bootstrapper that fetches/executes payloads), then optionally removes those injected blocks while creating backups. ⚠️ Disclaimer: Heuristics may remove some legitimate minified/obfuscated code. Run in --dry-run mode first (default) and review the report before applying --fix. """ # --- Suspicious pattern detectors ------------------------------------------------- SUSPICIOUS_REGEXES = { # Gate often used by injectors: if(typeof X==="undefined"){ ... } 'typeof_undefined_gate': re.compile( r";\s*if\s*\(\s*typeof\s+[A-Za-z_$][\w$]*\s*===\s*['\"]undefined['\"]\s*\)\s*\{", re.DOTALL ), # Specific to your sample ("nqdq" gate). Keep this to increase precision. 'nqdq_gate': re.compile( r";\s*if\s*\(\s*typeof\s+nqdq\s*===\s*['\"]undefined['\"]\s*\)\s*\{", re.DOTALL ), # Obfuscation trick: while(!![]) loop 'while_true_array': re.compile(r"while\s*\(\s*!!\[\]\s*\)", re.DOTALL), # Heavy parseInt noise within try/catch loop 'parseInt_noise': re.compile(r"try\s*\{[^}]*parseInt\([^)]*\)[^}]*\}", re.DOTALL), # Custom Base64/URL decoders combined with fromCharCode/atob 'custom_decoder': re.compile( r"fromCharCode[\s\S]{0,400}decodeURIComponent|atob\s*\([^)]*\)\s*;?", re.IGNORECASE ), # Encoded strings presence 'encoded_hex_unicode': re.compile(r"\\x[0-9A-Fa-f]{2}|\\u[0-9A-Fa-f]{4}"), # XMLHttpRequest wrapper + token/rand beacons (seen in sample) 'xmlhttp_wrapper': re.compile( r"XMLHttpRequest[\s\S]{0,600}token\s*\(\)|rand\s*\(\)", re.IGNORECASE ), # Self-modifying/dynamic execution 'eval_function': re.compile(r"\beval\s*\(|\bnew\s+Function\s*\(", re.IGNORECASE), # Sample identifiers often present in this injection 'sample_idents': re.compile(r"\b(a0E|a0l|HttpClient|token|rand)\b"), } AGENT_START_PATTERNS = [ re.compile(r";\s*if\s*\(\s*typeof\s+[A-Za-z_$][\w$]*\s*===\s*['\"]undefined['\"]\s*\)\s*\{", re.DOTALL), re.compile(r"\(function\s*\([^)]*\)\s*\{", re.DOTALL), # generic IIFE start ] # --- Helpers --------------------------------------------------------------------- def find_suspicious_indicators(text): flags = [] for name, rx in SUSPICIOUS_REGEXES.items(): if rx.search(text): flags.append(name) return flags def remove_block_by_brace_balance(text, start_idx): """ Given an index near a block start, remove the balanced block until its matching '}'. Returns (new_text, removed) where removed is True/False. """ # Find the first '{' from start_idx brace_start = text.find('{', start_idx) if brace_start == -1: return text, False depth = 0 i = brace_start while i < len(text): ch = text[i] if ch == '{': depth += 1 elif ch == '}': depth -= 1 if depth == 0: # Include the closing brace end_idx = i + 1 new_text = text[:start_idx] + text[end_idx:] return new_text, True i += 1 return text, False def remove_obfuscated_iife(text): """ Attempt to remove the common obfuscated JS:Agent-ENC injection block. Finds a gate like `if(typeof X==="undefined"){...}` or an anonymous IIFE and removes its full balanced block. Repeats up to 5 times in case of multiple injections. """ removed_any = False cur_text = text for _ in range(5): start_match = None # Prefer exact 'nqdq' gate removal if present m_exact = SUSPICIOUS_REGEXES['nqdq_gate'].search(cur_text) if m_exact: start_match = m_exact else: for rx in AGENT_START_PATTERNS: m = rx.search(cur_text) if m: start_match = m break if not start_match: break start_idx = start_match.start() new_text, removed = remove_block_by_brace_balance(cur_text, start_idx) if removed: cur_text = new_text removed_any = True else: break return cur_text, removed_any # --- Core scanning/cleaning logic ------------------------------------------------- def process_file(path, root, fix=False, backup_dir=None): try: with open(path, 'r', encoding='utf-8', errors='ignore') as f: content = f.read() except Exception as e: return {'path': path, 'error': f'read_error: {e}'} indicators = find_suspicious_indicators(content) cleaned = False new_content = content if fix and indicators: # Optionally back up if backup_dir: rel = os.path.relpath(path, start=root) backup_path = os.path.join(backup_dir, rel) os.makedirs(os.path.dirname(backup_path), exist_ok=True) try: shutil.copy2(path, backup_path) except Exception: pass # Continue even if backup fails, but we already created the structure # Try structured removal first new_content, removed = remove_obfuscated_iife(content) if removed: cleaned = True else: # Fallback: If strong indicators and presence of while(!![]), remove surrounding IIFE (function(){...})();) m = SUSPICIOUS_REGEXES['while_true_array'].search(content) if m: start_func = content.rfind('(function', 0, m.start()) end_call = content.find('})();', m.end()) if start_func != -1 and end_call != -1: new_content = content[:start_func] + content[end_call+4:] cleaned = True # Extra attempt: strip segment around XMLHttpRequest beacon by balanced braces if not cleaned and SUSPICIOUS_REGEXES['xmlhttp_wrapper'].search(content): # Approx: remove block containing first XMLHttpRequest occurrence m2 = SUSPICIOUS_REGEXES['xmlhttp_wrapper'].search(content) if m2: # Find nearest preceding '{' to start removal brace_prev = content.rfind('{', 0, m2.start()) if brace_prev != -1: new_content, removed2 = remove_block_by_brace_balance(content, brace_prev) cleaned = cleaned or removed2 # If cleaned, write back if fix and cleaned: try: with open(path, 'w', encoding='utf-8', errors='ignore') as f: f.write(new_content) except Exception as e: return {'path': path, 'error': f'write_error: {e}', 'indicators': indicators, 'cleaned': False} return {'path': path, 'indicators': indicators, 'cleaned': cleaned} # --- CLI ------------------------------------------------------------------------- def iter_files(root, exts): for dirpath, dirnames, filenames in os.walk(root): for fn in filenames: if any(fn.lower().endswith(ext) for ext in exts): yield os.path.join(dirpath, fn) def main(args): exts = ['.js'] if args.include_html: exts += ['.html', '.htm', '.php'] # Prepare backup directory if fixing backup_dir = None if args.fix: ts = datetime.now().strftime('%Y%m%d_%H%M%S') backup_dir = os.path.join(args.root, f"backup_js_agent_{ts}") os.makedirs(backup_dir, exist_ok=True) total = 0 infected = 0 cleaned = 0 results = [] for path in iter_files(args.root, exts): total += 1 r = process_file(path, root=args.root, fix=args.fix, backup_dir=backup_dir) results.append(r) indicators = r.get('indicators', []) if indicators: infected += 1 if r.get('cleaned'): cleaned += 1 # Reporting print(f"\nScan complete. Files scanned: {total}") print(f"Files with suspicious indicators: {infected}") if args.fix: print(f"Files cleaned: {cleaned}") print(f"Backups saved to: {backup_dir}") # Detailed listing for r in results: path = r['path'] inds = r.get('indicators', []) status = 'CLEANED' if r.get('cleaned') else ('INFECTED' if inds else 'OK') err = r.get('error') if err: print(f"[ERROR] {path}: {err}") else: if inds: print(f"[{status}] {path} -> {', '.join(inds)}") else: print(f"[{status}] {path}") if __name__ == '__main__': parser = argparse.ArgumentParser( description='Scan and optionally clean JS:Agent-ENC style injections in JS/HTML/PHP files.' ) parser.add_argument('root', help='Root directory to scan') parser.add_argument('--fix', action='store_true', help='Apply cleaning to infected files (creates backups)') parser.add_argument('--include-html', action='store_true', help='Also scan HTML/HTM/PHP files for inline JS') args = parser.parse_args() main(args)