{ "cells": [ { "cell_type": "markdown", "id": "serious-pressing", "metadata": {}, "source": [ "# CWE Data\n", "---" ] }, { "cell_type": "code", "execution_count": 1, "id": "91b44dc2", "metadata": { "execution": { "iopub.execute_input": "2024-10-22T00:29:31.729311Z", "iopub.status.busy": "2024-10-22T00:29:31.728882Z", "iopub.status.idle": "2024-10-22T00:29:32.364877Z", "shell.execute_reply": "2024-10-22T00:29:32.364348Z" }, "tags": [ "remove-cell" ] }, "outputs": [ { "data": { "text/html": [ "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from IPython.core.magic import register_cell_magic\n", "from IPython.display import Markdown\n", "import datetime\n", "from datetime import date\n", "import glob\n", "import json\n", "import logging\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", "import plotly\n", "import warnings\n", "import calplot\n", "from itables import init_notebook_mode, show\n", "import itables.options as opt\n", "\n", "\n", "opt.dom = \"tpir\" \n", "opt.style = \"table-layout:auto;width:auto\"\n", "init_notebook_mode(all_interactive=True, connected=True)\n", "\n", "@register_cell_magic\n", "def markdown(line, cell):\n", " return Markdown(cell.format(**globals()))\n", "\n", "\n", "logging.getLogger('matplotlib.font_manager').disabled = True\n", "warnings.filterwarnings(\"ignore\")\n", "pd.set_option('display.width', 500)\n", "pd.set_option('display.max_rows', 50)\n", "pd.set_option('display.max_columns', 10)" ] }, { "cell_type": "code", "execution_count": 2, "id": "98bafc2f-2e20-4032-a091-ec2dc0ecb7a5", "metadata": { "execution": { "iopub.execute_input": "2024-10-22T00:29:32.366889Z", "iopub.status.busy": "2024-10-22T00:29:32.366512Z", "iopub.status.idle": "2024-10-22T00:29:52.787044Z", "shell.execute_reply": "2024-10-22T00:29:52.786457Z" }, "tags": [ "remove-cell" ] }, "outputs": [], "source": [ "row_accumulator = []\n", "for filename in glob.glob('nvd.jsonl'):\n", " with open(filename, 'r', encoding='utf-8') as f:\n", " nvd_data = json.load(f)\n", " for entry in nvd_data:\n", " cve = entry['cve']['id']\n", " try:\n", " assigner = entry['cve']['sourceIdentifier']\n", " except KeyError:\n", " assigner = 'Missing_Data'\n", " try:\n", " published_date = entry['cve']['published']\n", " except KeyError:\n", " published_date = 'Missing_Data'\n", " try:\n", " attack_vector = entry['cve']['metrics']['cvssMetricV31'][0]['cvssData']['attackVector']\n", " except KeyError:\n", " attack_vector = 'Missing_Data'\n", " try:\n", " attack_complexity = entry['cve']['metrics']['cvssMetricV31'][0]['cvssData']['attackComplexity']\n", " except KeyError:\n", " attack_complexity = 'Missing_Data'\n", " try:\n", " privileges_required = entry['cve']['metrics']['cvssMetricV31'][0]['cvssData']['privilegesRequired']\n", " except KeyError:\n", " privileges_required = 'Missing_Data'\n", " try:\n", " user_interaction = entry['cve']['metrics']['cvssMetricV31'][0]['cvssData']['userInteraction']\n", " except KeyError:\n", " user_interaction = 'Missing_Data'\n", " try:\n", " scope = entry['cve']['metrics']['cvssMetricV31'][0]['cvssData']['scope']\n", " except KeyError:\n", " scope = 'Missing_Data'\n", " try:\n", " confidentiality_impact = entry['cve']['metrics']['cvssMetricV31'][0]['cvssData']['confidentialityImpact']\n", " except KeyError:\n", " confidentiality_impact = 'Missing_Data'\n", " try:\n", " integrity_impact = entry['cve']['metrics']['cvssMetricV31'][0]['cvssData']['integrityImpact']\n", " except KeyError:\n", " integrity_impact = 'Missing_Data'\n", " try:\n", " availability_impact = entry['cve']['metrics']['cvssMetricV31'][0]['cvssData']['availabilityImpact']\n", " except KeyError:\n", " availability_impact = 'Missing_Data'\n", " try:\n", " base_score = entry['cve']['metrics']['cvssMetricV31'][0]['cvssData']['baseScore']\n", " except KeyError:\n", " base_score = '0.0'\n", " try:\n", " base_severity = entry['cve']['metrics']['cvssMetricV31'][0]['cvssData']['baseSeverity']\n", " except KeyError:\n", " base_severity = 'Missing_Data'\n", " try:\n", " exploitability_score = entry['cve']['metrics']['cvssMetricV31'][0]['exploitabilityScore']\n", " except KeyError:\n", " exploitability_score = 'Missing_Data'\n", " try:\n", " impact_score = entry['cve']['metrics']['cvssMetricV31'][0]['impactScore']\n", " except KeyError:\n", " impact_score = 'Missing_Data'\n", " try:\n", " cwe = entry['cve']['weaknesses'][0]['description'][0]['value']\n", " except KeyError:\n", " cwe = 'Missing_Data'\n", " try:\n", " description = entry['cve']['descriptions'][0]['value']\n", " except IndexError:\n", " description = ''\n", " try:\n", " cve_tag = entry['cve']['cveTags'][0]['tags'] \n", " except IndexError:\n", " cve_tag = np.nan \n", " try: \n", " vulnStatus = entry['cve']['vulnStatus']\n", " except IndexError:\n", " vulnStatus = '' \n", " new_row = {\n", " 'CVE': cve,\n", " 'Published': published_date,\n", " 'AttackVector': attack_vector,\n", " 'AttackComplexity': attack_complexity,\n", " 'PrivilegesRequired': privileges_required,\n", " 'UserInteraction': user_interaction,\n", " 'Scope': scope,\n", " 'ConfidentialityImpact': confidentiality_impact,\n", " 'IntegrityImpact': integrity_impact,\n", " 'AvailabilityImpact': availability_impact,\n", " 'BaseScore': base_score,\n", " 'BaseSeverity': base_severity,\n", " 'ExploitabilityScore': exploitability_score,\n", " 'ImpactScore': impact_score,\n", " 'CWE': cwe,\n", " 'Description': description,\n", " 'Assigner' : assigner,\n", " 'Tag' : cve_tag,\n", " 'Status': vulnStatus \n", " }\n", " row_accumulator.append(new_row)\n", " nvd = pd.DataFrame(row_accumulator)\n", "\n", "nvd = nvd[~nvd.Status.str.contains('Rejected')] \n", "nvd['Published'] = pd.to_datetime(nvd['Published'])\n", "nvd = nvd.sort_values(by=['Published'])\n", "nvd = nvd.reset_index(drop=True)\n", "nvd['BaseScore'] = pd.to_numeric(nvd['BaseScore']);\n", "nvd['BaseScore'] = pd.to_numeric(nvd['BaseScore']);\n", "nvd['BaseScore'] = nvd['BaseScore'].replace(0, np.nan);\n", "nvdcount = nvd['Published'].count()\n", "nvdunique = nvd['Published'].nunique()\n", "startdate = date(2000, 1, 1)\n", "enddate = date.today()\n", "numberofdays = enddate - startdate \n", "per_day = nvdcount/numberofdays.days" ] }, { "cell_type": "markdown", "id": "aa3ea191", "metadata": { "tags": [ "hide-input" ] }, "source": [ "\n", "\n", "## CWE Data" ] }, { "cell_type": "code", "execution_count": 3, "id": "6815f0a1", "metadata": { "execution": { "iopub.execute_input": "2024-10-22T00:29:52.789418Z", "iopub.status.busy": "2024-10-22T00:29:52.789064Z", "iopub.status.idle": "2024-10-22T00:29:53.093750Z", "shell.execute_reply": "2024-10-22T00:29:53.093118Z" }, "tags": [ "remove-input" ] }, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "nvd_cwe = nvd['CWE'].value_counts()\n", "nvd_cwe = nvd_cwe.reset_index()\n", "nvd_cwe.columns = ['CWE', 'CVEs']\n", "nvd_cwe_graph = nvd_cwe[nvd_cwe.CVEs > 100].head(25)\n", "plt.figure(figsize=(10,10));\n", "plt.barh(\"CWE\", \"CVEs\", data = nvd_cwe_graph, color=\"#001d82\");\n", "plt.xlabel(\"CVEs\"); \n", "plt.ylabel(\"CWE\") ;\n", "plt.title(\"Most Common CWE in CVE Records\");\n" ] }, { "cell_type": "code", "execution_count": 4, "id": "04a26e54", "metadata": { "execution": { "iopub.execute_input": "2024-10-22T00:29:53.095910Z", "iopub.status.busy": "2024-10-22T00:29:53.095583Z", "iopub.status.idle": "2024-10-22T00:29:53.103837Z", "shell.execute_reply": "2024-10-22T00:29:53.103269Z" }, "tags": [ "remove-input" ] }, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", "\n", "
CWECVEs
\n", "\n", "
\n", "Loading ITables v2.2.2 from the internet...\n", "(need help?)
\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "show(nvd_cwe, scrollY=\"400px\", scrollCollapse=True, paging=False)" ] }, { "cell_type": "code", "execution_count": 5, "id": "20ee3a71", "metadata": { "execution": { "iopub.execute_input": "2024-10-22T00:29:53.105621Z", "iopub.status.busy": "2024-10-22T00:29:53.105303Z", "iopub.status.idle": "2024-10-22T00:29:53.109235Z", "shell.execute_reply": "2024-10-22T00:29:53.108695Z" }, "tags": [ "remove-input" ] }, "outputs": [ { "data": { "text/markdown": [ "This report is updated automatically every day, last generated on: **2024-10-22 00:29:53.106236**" ], "text/plain": [ "" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Markdown(f\"This report is updated automatically every day, last generated on: **{datetime.datetime.now()}**\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.10" }, "vscode": { "interpreter": { "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49" } } }, "nbformat": 4, "nbformat_minor": 5 }