Web Surface Discovery
#Inputs
Start from a reviewed URL file, one URL per line. It should be derived from approved DNS and network targets rather than arbitrary internet-wide discovery.
#Paste-Ready Web Mapper
Save as web-map.sh.
#!/usr/bin/env bash
set -Eeuo pipefail
umask 077
[[ $# -eq 1 ]] || { echo "Usage: $0 <approved-urls.txt>"; exit 1; }
INPUT="$(realpath "$1")"
[[ -s "$INPUT" ]] || { echo "Input file is empty"; exit 1; }
[[ -n "${ENGAGEMENT_ROOT:-}" ]] || { echo "Load engagement.env first"; exit 1; }
OUT="$RAW_DIR/web-$(date -u +%Y%m%dT%H%M%SZ)"
mkdir -p "$OUT"/{probe,crawl,content,tls,screenshots}
required=(httpx katana ffuf curl openssl)
for tool in "${required[@]}"; do
command -v "$tool" >/dev/null 2>&1 || { echo "Missing dependency: $tool"; exit 1; }
done
httpx -silent -l "$INPUT" -follow-redirects -status-code -title -tech-detect \
-server -ip -cname -tls-grab -json -rate-limit "${HTTP_RATE:-25}" \
-o "$OUT/probe/httpx.jsonl"
jq -r '.url // empty' "$OUT/probe/httpx.jsonl" | sort -u > "$OUT/live-urls.txt"
katana -silent -list "$OUT/live-urls.txt" -depth 3 -js-crawl \
-known-files robotstxt,sitemapxml -rate-limit "${HTTP_RATE:-25}" \
-output "$OUT/crawl/urls.txt"
while IFS= read -r url; do
[[ -z "$url" ]] && continue
id="$(printf '%s' "$url" | sha256sum | cut -c1-12)"
curl -ksS -D "$OUT/probe/$id.headers" -o /dev/null "$url" || true
host="$(printf '%s' "$url" | sed -E 's#https?://([^/:]+).*#\1#')"
printf '' | openssl s_client -connect "$host:443" -servername "$host" 2>/dev/null \
| openssl x509 -noout -subject -issuer -dates -ext subjectAltName \
> "$OUT/tls/$id.txt" || true
done < "$OUT/live-urls.txt"
WORDLIST="${WEB_WORDLIST:-/usr/share/seclists/Discovery/Web-Content/raft-small-words.txt}"
if [[ -f "$WORDLIST" ]]; then
while IFS= read -r url; do
[[ -z "$url" ]] && continue
id="$(printf '%s' "$url" | sha256sum | cut -c1-12)"
ffuf -s -ac -rate "${HTTP_RATE:-25}" -w "$WORDLIST" \
-u "${url%/}/FUZZ" -of json -o "$OUT/content/$id.json" || true
done < "$OUT/live-urls.txt"
else
echo "Skipping content discovery; wordlist not found: $WORDLIST"
fi
if command -v gowitness >/dev/null 2>&1; then
gowitness scan file -f "$OUT/live-urls.txt" \
--screenshot-path "$OUT/screenshots" --write-db || true
fi
echo "Review:"
echo " Live services: $OUT/probe/httpx.jsonl"
echo " Crawled URLs: $OUT/crawl/urls.txt"
echo " Content hits: $OUT/content/"
BASH
#Virtual Host Discovery
Use a known in-scope base domain and an approved IP. Filter by response size or automatic calibration, then manually verify candidates.
ffuf -ac -rate 20 \
-w /usr/share/seclists/Discovery/DNS/subdomains-top1million-5000.txt \
-u http://<approved-ip>/ -H 'Host: FUZZ.example.com'
BASH
#Review Priorities
- Unexpected administrative interfaces and non-production environments.
- Certificate SANs that reveal additional in-scope names.
- Authentication boundaries, API documentation, GraphQL endpoints, and backup files.
- Technology versions that can be checked in Web Vulnerability Identification.
- Content that warrants an operator-selected technique from Web Attacks.