Moved to new Diff endpoint and fix with commenting logic (#88)

dacoburn · web-flow · commit 82d300be1ce5 · 2025-06-23T22:53:07.000-07:00
* Version bump * Cherry picked 73e1ce2 back in * Cherry picked missing commit * Removed unneeded full scann processing
diff --git a/pyproject.toml b/pyproject.toml
@@ -6,7 +6,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "socketsecurity"
-version = "2.1.3"
+version = "2.1.9"
 requires-python = ">= 3.10"
 license = {"file" = "LICENSE"}
 dependencies = [
diff --git a/socketsecurity/__init__.py b/socketsecurity/__init__.py
@@ -1,2 +1,2 @@
 __author__ = 'socket.dev'
-__version__ = '2.1.3'
+__version__ = '2.1.9'
diff --git a/socketsecurity/core/__init__.py b/socketsecurity/core/__init__.py
@@ -133,25 +133,40 @@ def create_sbom_output(self, diff: Diff) -> dict:
     @staticmethod
     def expand_brace_pattern(pattern: str) -> List[str]:
         """
-        Expands brace expressions (e.g., {a,b,c}) into separate patterns.
-        """
-        brace_regex = re.compile(r"\{([^{}]+)\}")
-
-        # Expand all brace groups
-        expanded_patterns = [pattern]
-        while any("{" in p for p in expanded_patterns):
-            new_patterns = []
-            for pat in expanded_patterns:
-                match = brace_regex.search(pat)
-                if match:
-                    options = match.group(1).split(",")  # Extract values inside {}
-                    prefix, suffix = pat[:match.start()], pat[match.end():]
-                    new_patterns.extend([prefix + opt + suffix for opt in options])
-                else:
-                    new_patterns.append(pat)
-            expanded_patterns = new_patterns
-
-        return expanded_patterns
+        Recursively expands brace expressions (e.g., {a,b,c}) into separate patterns, supporting nested braces.
+        """
+        def recursive_expand(pat: str) -> List[str]:
+            stack = []
+            for i, c in enumerate(pat):
+                if c == '{':
+                    stack.append(i)
+                elif c == '}' and stack:
+                    start = stack.pop()
+                    if not stack:
+                        # Found the outermost pair
+                        before = pat[:start]
+                        after = pat[i+1:]
+                        inner = pat[start+1:i]
+                        # Split on commas not inside nested braces
+                        options = []
+                        depth = 0
+                        last = 0
+                        for j, ch in enumerate(inner):
+                            if ch == '{':
+                                depth += 1
+                            elif ch == '}':
+                                depth -= 1
+                            elif ch == ',' and depth == 0:
+                                options.append(inner[last:j])
+                                last = j+1
+                        options.append(inner[last:])
+                        results = []
+                        for opt in options:
+                            expanded = before + opt + after
+                            results.extend(recursive_expand(expanded))
+                        return results
+            return [pat]
+        return recursive_expand(pattern)
 
     @staticmethod
     def is_excluded(file_path: str, excluded_dirs: Set[str]) -> bool:
@@ -176,13 +191,7 @@ def find_files(self, path: str) -> List[str]:
         files: Set[str] = set()
 
         # Get supported patterns from the API
-        try:
-            patterns = self.get_supported_patterns()
-        except Exception as e:
-            log.error(f"Error getting supported patterns from API: {e}")
-            log.warning("Falling back to local patterns")
-            from .utils import socket_globs as fallback_patterns
-            patterns = fallback_patterns
+        patterns = self.get_supported_patterns()
 
         for ecosystem in patterns:
             if ecosystem in self.config.excluded_ecosystems:
@@ -642,7 +651,6 @@ def create_new_diff(
         try:
             new_scan_start = time.time()
             new_full_scan = self.create_full_scan(files_for_sending, params)
-            new_full_scan.sbom_artifacts = self.get_sbom_data(new_full_scan.id)
             new_scan_end = time.time()
             log.info(f"Total time to create new full scan: {new_scan_end - new_scan_start:.2f}")
         except APIFailure as e:
diff --git a/socketsecurity/core/classes.py b/socketsecurity/core/classes.py
@@ -97,7 +97,7 @@ class AlertCounts(TypedDict):
     low: int
 
 @dataclass(kw_only=True)
-class Package(SocketArtifactLink):
+class Package():
     """
     Represents a package detected in a Socket Security scan.
     
@@ -106,16 +106,23 @@ class Package(SocketArtifactLink):
     """
     
     # Common properties from both artifact types
-    id: str
+    type: str
     name: str
     version: str
-    type: str
+    release: str
+    diffType: str
+    id: str
+    author: List[str] = field(default_factory=list)
     score: SocketScore
     alerts: List[SocketAlert]
-    author: List[str] = field(default_factory=list)
     size: Optional[int] = None
     license: Optional[str] = None
     namespace: Optional[str] = None
+    topLevelAncestors: Optional[List[str]] = None
+    direct: Optional[bool] = False
+    manifestFiles: Optional[List[SocketManifestReference]] = None
+    dependencies: Optional[List[str]] = None
+    artifact: Optional[SocketArtifactLink] = None
     
     # Package-specific fields
     license_text: str = ""
@@ -203,7 +210,9 @@ def from_diff_artifact(cls, data: dict) -> "Package":
             manifestFiles=ref.get("manifestFiles", []),
             dependencies=ref.get("dependencies"),
             artifact=ref.get("artifact"),
-            namespace=data.get('namespace', None)
+            namespace=data.get('namespace', None),
+            release=ref.get("release", None),
+            diffType=ref.get("diffType", None),
         )
 
 class Issue:
diff --git a/socketsecurity/core/helper/__init__.py b/socketsecurity/core/helper/__init__.py
@@ -0,0 +1,119 @@
+import markdown
+from bs4 import BeautifulSoup, NavigableString, Tag
+import string
+
+
+class Helper:
+    @staticmethod
+    def parse_gfm_section(html_content):
+        """
+        Parse a GitHub-Flavored Markdown section containing a table and surrounding content.
+        Returns a dict with "before_html", "columns", "rows_html", and "after_html".
+        """
+        html = markdown.markdown(html_content, extensions=['extra'])
+        soup = BeautifulSoup(html, "html.parser")
+
+        table = soup.find('table')
+        if not table:
+            # If no table, treat entire content as before_html
+            return {"before_html": html, "columns": [], "rows_html": [], "after_html": ''}
+
+        # Collect HTML before the table
+        before_parts = [str(elem) for elem in table.find_previous_siblings()]
+        before_html = ''.join(reversed(before_parts))
+
+        # Collect HTML after the table
+        after_parts = [str(elem) for elem in table.find_next_siblings()]
+        after_html = ''.join(after_parts)
+
+        # Extract table headers
+        headers = [th.get_text(strip=True) for th in table.find_all('th')]
+
+        # Extract table rows (skip header)
+        rows_html = []
+        for tr in table.find_all('tr')[1:]:
+            cells = [str(td) for td in tr.find_all('td')]
+            rows_html.append(cells)
+
+        return {
+            "before_html": before_html,
+            "columns": headers,
+            "rows_html": rows_html,
+            "after_html": after_html
+        }
+
+    @staticmethod
+    def parse_cell(html_td):
+        """Convert a table cell HTML into plain text or a dict for links/images."""
+        soup = BeautifulSoup(html_td, "html.parser")
+        a = soup.find('a')
+        if a:
+            cell = {"url": a.get('href', '')}
+            img = a.find('img')
+            if img:
+                cell.update({
+                    "img_src": img.get('src', ''),
+                    "title": img.get('title', ''),
+                    "link_text": a.get_text(strip=True)
+                })
+            else:
+                cell["link_text"] = a.get_text(strip=True)
+            return cell
+        return soup.get_text(strip=True)
+
+    @staticmethod
+    def parse_html_parts(html_fragment):
+        """
+        Convert an HTML fragment into a list of parts.
+        Each part is either:
+          - {"text": "..."}
+          - {"link": "url", "text": "..."}
+          - {"img_src": "url", "alt": "...", "title": "..."}
+        """
+        soup = BeautifulSoup(html_fragment, 'html.parser')
+        parts = []
+
+        def handle_element(elem):
+            if isinstance(elem, NavigableString):
+                text = str(elem).strip()
+                if text and not all(ch in string.punctuation for ch in text):
+                    parts.append({"text": text})
+            elif isinstance(elem, Tag):
+                if elem.name == 'a':
+                    href = elem.get('href', '')
+                    txt = elem.get_text(strip=True)
+                    parts.append({"link": href, "text": txt})
+                elif elem.name == 'img':
+                    parts.append({
+                        "img_src": elem.get('src', ''),
+                        "alt": elem.get('alt', ''),
+                        "title": elem.get('title', '')
+                    })
+                else:
+                    # Recurse into children for nested tags
+                    for child in elem.children:
+                        handle_element(child)
+
+        for element in soup.contents:
+            handle_element(element)
+
+        return parts
+
+    @staticmethod
+    def section_to_json(section_result):
+        """
+        Convert a parsed section into structured JSON.
+        Returns {"before": [...], "table": [...], "after": [...]}.
+        """
+        # Build JSON rows for the table
+        table_rows = []
+        cols = section_result.get('columns', [])
+        for row_html in section_result.get('rows_html', []):
+            cells = [Helper.parse_cell(cell_html) for cell_html in row_html]
+            table_rows.append(dict(zip(cols, cells)))
+
+        return {
+            "before": Helper.parse_html_parts(section_result.get('before_html', '')),
+            "table": table_rows,
+            "after": Helper.parse_html_parts(section_result.get('after_html', ''))
+        }
diff --git a/socketsecurity/core/messages.py b/socketsecurity/core/messages.py
@@ -292,7 +292,8 @@ def create_security_comment_json(diff: Diff) -> dict:
         output = {
             "scan_failed": scan_failed,
             "new_alerts": [],
-            "full_scan_id": diff.id
+            "full_scan_id": diff.id,
+            "diff_url": diff.diff_url
         }
         for alert in diff.new_alerts:
             alert: Issue
diff --git a/socketsecurity/output.py b/socketsecurity/output.py
@@ -66,7 +66,8 @@ def output_console_comments(self, diff_report: Diff, sbom_file_name: Optional[st
 
         console_security_comment = Messages.create_console_security_alert_table(diff_report)
         self.logger.info("Security issues detected by Socket Security:")
-        self.logger.info(console_security_comment)
+        self.logger.info(f"Diff Url: {diff_report.diff_url}")
+        self.logger.info(f"\n{console_security_comment}")
 
     def output_console_json(self, diff_report: Diff, sbom_file_name: Optional[str] = None) -> None:
         """Outputs JSON formatted results"""
diff --git a/socketsecurity/socketcli.py b/socketsecurity/socketcli.py
@@ -235,15 +235,14 @@ def main_code():
                     log.debug("Updated security comment with no new alerts")
             
             # FIXME: diff.new_packages is never populated, neither is removed_packages
-            if (len(diff.new_packages) == 0 and len(diff.removed_packages) == 0) or config.disable_overview:
+            if (len(diff.new_packages) == 0) or config.disable_overview:
                 if not update_old_overview_comment:
                     new_overview_comment = False
                     log.debug("No new/removed packages or Dependency Overview comment disabled")
                 else:
                     log.debug("Updated overview comment with no dependencies")
             
             log.debug(f"Adding comments for {config.scm}")
-            
             scm.add_socket_comments(
                 security_comment,
                 overview_comment,

Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,2 @@`
`1`	`1`	`__author__ = 'socket.dev'`
`2`		`-__version__ = '2.1.3'`
	`2`	`+__version__ = '2.1.9'`