# unused_strings.py
#
# Simple Python script, that uses yaramodv4 module and ObservingVisitor
# interface for identifying unused strings in YARA rulesets. It takes one 
# argument - path to file with rules, in which will be unused strings 
# identified. If yara file is invalid (there is some syntax or semantic error)
# script ends with return code 1.
#
# Requirements: yaramodv4
#
#
# USAGE:
# python3 unused_strings.py <path>
#
#
# EXAMPLE:
#
# test.tmp:
# ```
# rule a { strings: $s00 = "abc" $s01 = "def" condition: true }
# rule b { 
#       strings: $h00 = "abc" $s00 = "def" $s01 = "ghi" 
#       condition: all of them 
# }
# rule c { 
#       strings: $h00 = "abc" $s00 = "def" $s01 = "ghi" 
#       condition: $h00 or $s01
# }
# ```
#
# Output:
# ```
# --- Unused strings in file ../test.tmp ---
# a:
# s00
# s01
# b:
# c:
# s00
# --- Done (0.0015494823455810547 s) ---
# ```
#
# Author: Vojtěch Dvořák


import sys
import yaramodv4 as y
import time


class UnusedStringsIdentifier(y.ObservingVisitor):
    """Concrete visitor class, that performs the check of unused strings"""

    def __init__(self):
        y.ObservingVisitor.__init__(self)
        self.string_cache : list[str] = []

    def identify(self, rule : y.Rule) -> list[str]:
        self.string_cache = [ string.id for string in rule.strings ]
        self.start(rule.condition)
        return self.string_cache

    def visit_string_expr(self, e):
        """Removes string from the string cache (if there is the string)"""
        if e.id in self.string_cache:
            self.string_cache.remove(e.id)

    def visit_String(self, e):
        self.visit_string_expr(e)

    def visit_StringCount(self, e):
        self.visit_string_expr(e)

    def visit_StringOffset(self, e):
        self.visit_string_expr(e)

    def visit_StringMatchLength(self, e):
        self.visit_string_expr(e)

    def visit_StringWildcard(self, e):
        for string in self.string_cache[:]:
            if string.startswith(e.id[:-1]):
                self.string_cache.remove(string)

    def visit_Them(self, _):
        self.string_cache = []


if __name__ == "__main__":
    if len(sys.argv) < 2:
        print("Missing path!")
        exit(1)

    parser = y.Yaramod()
    yara_src = parser.parse_file(sys.argv[1])



    visitor = UnusedStringsIdentifier()

    print(f"--- Unused strings in file {sys.argv[1]} ---")
    start = time.time()
    
    for rule in yara_src.all_rules:
        print(f"{rule.id}:")

        # Identify unused string for every rule
        unused_strings = visitor.identify(rule)
        for u_string in unused_strings:
            print(u_string)

    end = time.time()
    print(f"--- Done ({end - start} s) ---")
    
