(ns codescene.features.util.url
  "Generating and working with URLs. For building URLs you should create a parts representation,
  edit that and reconstitute an URL from that.

  1. When a parts of a URL is converted to a String, the parameters and paths need to be
  URL encoded, so that the params can contain special characters.

  2. When incoming request URL is parsed by the server, http-kit doesn't decode anything. However, the params
  are decoded by ring.middleware.params/wrap-params middleware.

  3. Because http-kit doesn't decode path that means Compojure routes on encoded paths. So if you
  have '/%' path you need to match '/%25'. The reason to not just decode everything is that it makes
  parsing inputs impossible. Once you decode /project/x%2Fy/repos, you aget /project/x/y/repos which
  is a completely different path.

  4. If URL is kept in String form, it should always have parts properly encoded.

  5. If URI starts with //, such as //path, that // is considered scheme and host separator,
     https://codescene.freshdesk.com/a/tickets/3329, so that is a path of 'path' without host or schema.
     We don't really want to support that kind of URL, and I don't think we will ever need that kind of URL,
     so we will just reduce those slashes to 1. Note that we are talking about URLs here, if these were
     file path URIs, we would have to support this."
  (:require [clojure.string :as str]
            [evolutionary-metrics.mining.file-patterns :as file-patterns]
            [medley.core :as m]
            [ring.util.codec :as codec]
            [ring.util.request :as ring-request]
            [taoensso.timbre :as log])
  (:import (java.net URI URISyntaxException URL)))

(defn ^URI to-uri
  "Tries to cast `url` to java.net.URI throwing if it's an incompatible value
  Strings, java.net.URI and java.net.URL instances are supported.
  nils or blank strings are converted to an empty string and then used to construct URI.

  URL starting with // is treated differently, see NS doc."
  [url]
  (cond
    ;; replace any spaces with empty string - otherwise URI. would throw
    (or (nil? url) (string? url)) (URI. (if (str/blank? url) "" (str/replace url #"^/+" "/")))
    (instance? URI url) url
    (instance? URL url) (.toURI ^URL url)
    :else (throw (ex-info "Invalid url" {:url url}))))

(defn url-param-map
  "Returns URL param map.

  - no ? in URL returns nil
  - just ? will return {}
  - key-value query ?k1=v1&k2=v2, returns a map
  - string query (excluding empty) such as ?q, will return map with one key and nil value
  - a mix of both will return a map as above"
  [url]
  (let [params (some-> (.getRawQuery (to-uri url)) codec/form-decode)]
    (if (string? params)
      (if (= "" params) {} {(keyword params) nil})
      (some-> params (update-keys keyword)))))

(defn url->parts
  "Parses URL into data. Do NOT feed this some sort of URL string you've constructed
  from Ring Request map, unless you know exactly what you're doing.

  Key :path has is a vector of path segments, which are separated by '/'.
  A segment can be an empty string. Note that in most cases first element of path vector
  will be empty string, as most paths start with a `/`, observe:

  http://localhost -> [] ; absolute URL without path
  http://localhost/a -> ['', 'a'] ; absolute URL with a path
  http://localhost/ -> ['', '']; another absolute URL with a path
  /path1 -> ['', 'path1'] ; Root-relative URI
  path1 -> ['path1'] ; Document-relative URI

  "
  [url]
  (let [uri (to-uri url)
        [user password] (some-> (.getRawUserInfo uri) (str/split #":" 2))
        path (.getRawPath uri)]
    (->> {:scheme (.getScheme uri)
          :port (when (pos-int? (.getPort uri)) (.getPort uri))
          :path (if (seq path)
                  (->> (str/split path #"/" -1)
                       (mapv codec/url-decode)
                       not-empty)
                  [])
          :user (some-> user codec/url-decode)
          :password (some-> password codec/url-decode)
          :host (.getHost uri)
          :query-params (url-param-map uri)
          :fragment (some-> (.getRawFragment uri) codec/form-decode-str)}
         (m/filter-vals some?))))

(defn parts->url
  "Returns URL string constructed from url parts. Query params only supports
  list params by repeated query keys. Note that parts of the URL are encoded."
  [{:keys [scheme port path user password host query-params fragment]}]
  (let [user-pass (cond-> ""
                    user (str (codec/url-encode user))
                    password (str ":" (codec/url-encode password))
                    (or user password) (str "@"))
        authority (when host
                    (cond-> (str user-pass host)
                      port (str ":" port)))]
    (cond-> ""
      scheme (str scheme "://")
      authority (str authority)
      (and (not-empty (first path))
           (not-empty authority)) (str "/")
      (seq path) (str (str/join "/" (map codec/url-encode path)))
      query-params (str "?"
                        (if (and (= 1 (count query-params))
                                 (nil? (val (first query-params))))
                          (name (key (first query-params)))
                          (codec/form-encode query-params)))
      fragment (str "#" (codec/form-encode fragment)))))

(defn update-url
  "Converts url to parts, runs f(parts p1 p2 p3 ...) then assembles a string representation again."
  [url f & more]
  (parts->url (apply f (url->parts url) more)))

(defn base-url
  "Returns the base part of given url, including protocol, host and port
  but excluding path, query string, fragment et al.
  It is expected to be used only for absolute URLs;
  returns nil for relative URLs."
  [url]
  (let [ret (update-url url dissoc :path :query-params :fragment)]
    (when-not (= "" ret) ret)))

(defn relative-url
  "Returns path, query and fragment. Note that this is different from the canonical
  URI implementation, where any URI without a scheme is not absolute."
  [url]
  (some-> url (update-url dissoc :host :scheme)))

(defn request-url
  "Relative request url."
  [req] (relative-url (ring-request/request-url req)))

(defn post-request->request-url
  "Request URL but form params will be emitted as query params.

   This is useful generating a redirect (GET) URL from a POST request."
  [req]
  ;; skip anti-forgery-token
  (let [form-params (m/filter-keys #(not (str/starts-with? % "_")) (:form-params req))]
    (some-> (ring-request/request-url req)
            (update-url update :query-params merge form-params)
            relative-url)))

(defn try-relative-url
  "Returns path, query and fragment. Note that this is different from the canonical
  URI implementation, where any URI without a scheme is not absolute.

  Returns nil if URL is invalid."
  [uri]
  (try
    (relative-url uri)
    (catch URISyntaxException e
      (log/warnf "Invalid 'next' URI '%s', must be a valid URI; reason: %s" uri (.getMessage e)))))

(defn s3-url [bucket-name object-key]
  (str "s3://" bucket-name "/" object-key))

(def ^:private root-segment-regex #"(/[^/]*).*")
(defn first-path-segment [uri-path]
  (second (re-find root-segment-regex uri-path)))
(comment
  (first-path-segment "/v2/ahoj")
  ;; => "/v2"
  .)

(defn to-absolute-url
  "Given a base-uri (string) and a relative or absolute url (string)
  returns it's absolute version (a string).
  Assumptions:
  - Relative URLs must begin with `/`.
  - base-uri cannot be empty.
  - url can be nil or empty string in which case base-uri is returned.
  - Neither of arguments can be nil.
  See also :base-uri in cloud/web/resources/config/config.edn.

  Further notes: This function can be especially useful for redirects (\"Location\" header).
  This is because Jetty automatically replaces relative URLs in redirects with absolute URLs.
  This makes problems in tests and also can be problematic with load balancers terminating HTTPS.
  See these for more information:
    - https://stackoverflow.com/questions/25652718/do-relative-server-side-redirects-respect-the-protocol,
    - https://datatracker.ietf.org/doc/html/rfc2616#section-10.
    - the updated HTTP 1.1 specifications (RFC 7231): https://datatracker.ietf.org/doc/html/rfc7231
      - relaxed the original constraint, allowing the use of relative URLs in Location headers."
  [base-uri url]
  (let [uri (to-uri url)]
    (str (.resolve (URI. base-uri)
                   uri))))

(defn same-host?
  "Checks whether two URIs point have the same protocol+host+port part,
  ignoring path, query string, etc.
  See
  https://docs.oracle.com/javase/8/docs/api/java/net/URI.html
  and check the unit test."
  [url1 url2]
  (= (base-url url1) (base-url url2)))

(defn add-params
  "`param-pairs` should be a map. Special characters will be encoded."
  [uri param-map]
  (update-url uri update :query-params merge param-map))

(defn path-encode
  "Don't call this directly."
  [v]
  (let [encode (comp codec/url-encode str)]
    (if (vector? v)
      (when (not-empty v) (str/join \/ (mapv encode v)))
      (encode v))))

(defn merge-path-vec
  "Merge path vectors, in a way where it won't produce // in the middle."
  [path1 path2]
  (-> (cond-> path1
        (= "" (last path1)) butlast)
      (concat (cond-> path2
                (= "" (first path2)) next))
      vec))

(defn merge-path-vec-prefix
  "Merge path vectors, in a way where it won't produce // in the middle.

  It will also look if one path is strict prefix of another and just append the rest:
  e.g. /api/v4 + /api/v4/projects -> /api/v4/projects, but /api/v4/groups + /api/v4/person will produce
  /api/v4/projects/api/v4/person"
  [path1 path2]
  (let [p1 (drop-while empty? path1)
        p2 (drop-while empty? path2)]
    (if (every? true? (map = p1 p2))
      (merge-path-vec path1 (drop (count p1) p2))
      (merge-path-vec path1 path2))))

(defmacro make-url
  "Concatenates base plus parts using / into a path. It will somewhat sanitize the inputs.

  See to-url-test function.

  Values are treated as such:
  - base value has any trailing slashes removed
  - string parts have slashes trimmed on both sides, except if last part is string,
  then it only has slash trimmed on left
  - other parts (such as variables) are wrapped in a string conversion and
  url encode (which encodes characters invalid in paths).
  - if variable value is a vector, it will treat each element as a separate path segment

  e.g. (to-url 'https://google.com/' x '/x/') where x is '//' is 'https://google.com/%2F%2F/x/'"
  [base & parts]
  (let [final-slash? (and (string? (last parts))
                          (.endsWith ^String (last parts) "/"))
        processed-parts (mapv #(if (string? %)
                                 (-> %
                                     (file-patterns/triml-ch \/)
                                     (file-patterns/trimr-ch \/))
                                 `(path-encode ~%))
                              parts)]
    `(let [path# (cons (some-> ~base (file-patterns/trimr-ch \/))
                       ~(cond-> processed-parts final-slash? (conj "")))]
       (str/join \/ (remove nil? path#)))))