Web Surface Discovery

#Inputs

Start from a reviewed URL file, one URL per line. It should be derived from approved DNS and network targets rather than arbitrary internet-wide discovery.

#Paste-Ready Web Mapper

Save as web-map.sh.

#!/usr/bin/env bash
set -Eeuo pipefail
umask 077

[[ $# -eq 1 ]] || { echo "Usage: $0 <approved-urls.txt>"; exit 1; }
INPUT="$(realpath "$1")"
[[ -s "$INPUT" ]] || { echo "Input file is empty"; exit 1; }
[[ -n "${ENGAGEMENT_ROOT:-}" ]] || { echo "Load engagement.env first"; exit 1; }

OUT="$RAW_DIR/web-$(date -u +%Y%m%dT%H%M%SZ)"
mkdir -p "$OUT"/{probe,crawl,content,tls,screenshots}

required=(httpx katana ffuf curl openssl)
for tool in "${required[@]}"; do
  command -v "$tool" >/dev/null 2>&1 || { echo "Missing dependency: $tool"; exit 1; }
done

httpx -silent -l "$INPUT" -follow-redirects -status-code -title -tech-detect \
  -server -ip -cname -tls-grab -json -rate-limit "${HTTP_RATE:-25}" \
  -o "$OUT/probe/httpx.jsonl"

jq -r '.url // empty' "$OUT/probe/httpx.jsonl" | sort -u > "$OUT/live-urls.txt"

katana -silent -list "$OUT/live-urls.txt" -depth 3 -js-crawl \
  -known-files robotstxt,sitemapxml -rate-limit "${HTTP_RATE:-25}" \
  -output "$OUT/crawl/urls.txt"

while IFS= read -r url; do
  [[ -z "$url" ]] && continue
  id="$(printf '%s' "$url" | sha256sum | cut -c1-12)"
  curl -ksS -D "$OUT/probe/$id.headers" -o /dev/null "$url" || true
  host="$(printf '%s' "$url" | sed -E 's#https?://([^/:]+).*#\1#')"
  printf '' | openssl s_client -connect "$host:443" -servername "$host" 2>/dev/null \
    | openssl x509 -noout -subject -issuer -dates -ext subjectAltName \
    > "$OUT/tls/$id.txt" || true
done < "$OUT/live-urls.txt"

WORDLIST="${WEB_WORDLIST:-/usr/share/seclists/Discovery/Web-Content/raft-small-words.txt}"
if [[ -f "$WORDLIST" ]]; then
  while IFS= read -r url; do
    [[ -z "$url" ]] && continue
    id="$(printf '%s' "$url" | sha256sum | cut -c1-12)"
    ffuf -s -ac -rate "${HTTP_RATE:-25}" -w "$WORDLIST" \
      -u "${url%/}/FUZZ" -of json -o "$OUT/content/$id.json" || true
  done < "$OUT/live-urls.txt"
else
  echo "Skipping content discovery; wordlist not found: $WORDLIST"
fi

if command -v gowitness >/dev/null 2>&1; then
  gowitness scan file -f "$OUT/live-urls.txt" \
    --screenshot-path "$OUT/screenshots" --write-db || true
fi

echo "Review:"
echo "  Live services: $OUT/probe/httpx.jsonl"
echo "  Crawled URLs:  $OUT/crawl/urls.txt"
echo "  Content hits:  $OUT/content/"

BASH

#Virtual Host Discovery

Use a known in-scope base domain and an approved IP. Filter by response size or automatic calibration, then manually verify candidates.

ffuf -ac -rate 20 \
  -w /usr/share/seclists/Discovery/DNS/subdomains-top1million-5000.txt \
  -u http://<approved-ip>/ -H 'Host: FUZZ.example.com'

BASH

#Review Priorities

Unexpected administrative interfaces and non-production environments.
Certificate SANs that reveal additional in-scope names.
Authentication boundaries, API documentation, GraphQL endpoints, and backup files.
Technology versions that can be checked in Web Vulnerability Identification.
Content that warrants an operator-selected technique from Web Attacks.