/*
 * This object keeps track of which websites Trustable can handle, and how to extract the necessary
 * information from each.
 *
 * Each target (e.g. 'amazon') specifies a set of target page types. Each page type has a
 * regex to use on the URL to see if it's a match. There's also an 'injection_site' that specified
 * where any new content will be placed. Finally, there's a collection of data that we want to get
 * from each page type (e.g. product title, description, brand for a detail page).
 *
 * It does this via jQuery selectors (sorry, jQuery dependency here...)
 * the 'selector' attribute stores the jQuery selector string that retrieves a block of html
 * and the 'extractor' is run on the 'client side' (i.e. inside the iFrame). There might
 * be an XSS vulnerability for this, but that would require the host site sending malicious code
 * (e.g. Amazon attacking us) and I'm using jQuery's .parseHtml with a virtual DOM.
 * See https://www.nickang.com/2017-12-14-prevent-jquery-loading-image-parsehtml/
 *
 * Great tool for testing, https://regex101.com/
 */
import log from 'loglevel';

export type TargetPageType = {
  name: string;
  purpose: string;
  enable_mobile: boolean;
  doc_title_cleaner: (title: string) => string;
  show_recs: boolean;
  show_reviews: boolean;
  url_match: RegExp;
  canonicalUrlMaker?: (url: string, product_id: string) => string;
  injection_site?: string;
  data?: {
    id: string;
    selector: string;
    extractor: (html: Node[]) => string | null;
  }[];
  url_param_keys?: {
    [key: string]: string; // ie 'query' => 'k'
  };
  url_params?: {
    [key: string]: string; // ie 'query' => 'k'
  };
  product_selector?: string;
};

type Target = {
  name: string;
  product_id_type: string;
  page_types: {
    [key: string]: TargetPageType;
  };
};

type Targets = {
  [key: string]: Target;
};

export type ParsedPage = {
  knownSite: boolean;
  knownPageType: boolean;
  pagePurpose: string;
  show_recs: boolean;
  show_reviews: boolean;
  url: string;
  canonicalUrl: string;
  searchParams?: URLSearchParams;
  site?: string;
  siteInfo?: Target;
  page_type?: string;
  pageTypeInfo?: TargetPageType;
  product_id_type: string;
  product_id: string;
};

export const targets: Targets = {
  amazon: {
    name: 'Amazon',
    product_id_type: 'asin',
    page_types: {
      search: {
        name: 'Search Page',
        purpose: 'search',
        enable_mobile: false,
        show_recs: true,
        show_reviews: false,
        doc_title_cleaner: (title: string) => title,
        /* Search page examples
         * https://www.amazon.com/s?k=items&i=hpc&crid=3HY11Z5HBC5MF&sprefix=item%2Chpc%2C150&ref=nb_sb_noss_1
         */
        url_match: /www\.amazon\.com\/(?:s\?|s\/)/,
        injection_site: '.s-matching-dir:first-child > .sg-col-inner',
        url_param_keys: {
          query: 'k',
        },
        product_selector: "div[data-asin!=''][data-asin]", // See https://stackoverflow.com/questions/10641258/jquery-select-data-attributes-that-arent-empty
      },
      dp: {
        name: 'Detail Page',
        purpose: 'product',
        enable_mobile: false,
        doc_title_cleaner: (title: string) =>
          title.replace(/^Amazon\.com: /, ''),
        show_recs: false, // For now...
        show_reviews: true,
        /** Detail page examples
         * https://www.amazon.com/Kimi-Cuisine-FOAMING-Soap-Dispenser/dp/B07GKLDS3S
         * https://www.amazon.com/Kimi-Cuisine-FOAMING-Soap-Dispenser/dp/B07GKLDS3S?th=1
         * https://www.amazon.com/Kimi-Cuisine-FOAMING-Soap-Dispenser/dp/B07GKLDS3S#some-anchor
         * https://www.amazon.com/dp/B07XZ1ZKK7/ref=ppx_yo_dt_b_asin_title_o04_s00?ie=UTF8&th=1
         * https://www.amazon.com/gp/product/B07XZ1ZKK7/ref=ppx_yo_dt_b_asin_title_o04_s00?ie=UTF8&th=1
         * https://www.amazon.com/Sporzon-Colored-Neoprene-Dumbbell-Multiple/dp/B088M3VTND/ref=sr_1_5?crid=17S2RZL1YQ2FG&keywords=weights&qid=1648080365&s=sporting-goods&sprefix=weight%2Csporting%2C128&sr=1-5
         * NOTE: did not work on: https://www.amazon.com/Atomic-Habits-Proven-Build-Break/dp/0735211299/?_encoding=UTF8&pd_rd_w=gb59b&content-id=amzn1.sym.64be5821-f651-4b0b-8dd3-4f9b884f10e5&pf_rd_p=64be5821-f651-4b0b-8dd3-4f9b884f10e5&pf_rd_r=XTXSA5QD2VEFXCZY60N0&pd_rd_wg=Pk90l&pd_rd_r=f7556ec5-b4c0-4772-84a0-1b02f1c0811d&ref_=pd_gw_crs_zg_bs_283155
         *  Would only work if 0735211299 is something like B735211299
         */
        url_match:
          /www\.amazon\.com.*\/(?:dp|gp\/product)\/(B\w{9})(?:\/|\?|#|$)/,
        canonicalUrlMaker: (product_id_type: string, product_id: string) =>
          `https://www.amazon.com/dp/${product_id}`,
        injection_site: '#ATFCriticalFeaturesDataContainer',
        data: [
          {
            id: 'image_url',
            selector: '#imgTagWrapperId img',
            extractor: (r) =>
              r?.[0] instanceof Element ? r[0].getAttribute('src') : null,
          },
          {
            id: 'title',
            selector: '#productTitle',
            extractor: (r) => r?.[0].textContent?.trim() || null,
          },
          {
            id: 'product_description',
            selector: '#productDescription',
            extractor: (r) => r?.[0].textContent?.trim() || null,
          },
          {
            id: 'brand',
            selector: '#productOverview_feature_div',
            // See https://codesandbox.io/s/jquery-playground-forked-gud0no?file=/src/index.js
            extractor: (r) =>
              (r?.[0] instanceof Element
                ? [...r[0].querySelectorAll('span')]
                    .filter((e) => e.textContent === 'Brand')?.[0]
                    .parentElement?.nextElementSibling?.textContent?.trim()
                : null) || null,
          },
        ],
      },
    },
  },

  target: {
    name: 'Target',
    product_id_type: 'reddot',
    page_types: {
      dp: {
        name: 'Detail Page',
        purpose: 'product',
        enable_mobile: true,
        doc_title_cleaner: (title: string) =>
          title.replace(/: Target$/, '').trim(),
        show_recs: false, // For now...
        show_reviews: true,
        url_match: /^https:\/\/www\.target\.com\/p\//,
        canonicalUrlMaker: (product_id_type: string, product_id: string) =>
          `https://www.target.com/p/${product_id}`,
        injection_site: 'NOT SET',
        data: [],
      },
    },
  },

  walmart: {
    name: 'Walmart',
    product_id_type: 'bluedot',
    page_types: {
      dp: {
        name: 'Detail Page',
        purpose: 'product',
        enable_mobile: true,
        doc_title_cleaner: (title: string) =>
          title.replace(/- Walmart.com$/, '').trim(),
        show_recs: false, // For now...
        show_reviews: true,
        url_match: /^https:\/\/www\.walmart\.com\/ip\/(.*\/)/,
        canonicalUrlMaker: (product_id_type: string, product_id: string) =>
          `https://www.walmart.com/ip/${product_id}`,
        injection_site: 'NOT SET',
        data: [],
      },
      search: {
        name: 'Search Page',
        purpose: 'search',
        enable_mobile: false,
        show_recs: false,
        show_reviews: false,
        doc_title_cleaner: (title: string) => title,
        url_match: /^https:\/\/www\.walmart\.com\/search/,
        injection_site: 'NOT SET',
        url_param_keys: {
          query: 'q',
        },
      },
    },
  },

  // Can happen during testing :)
  localhost: {
    name: 'Localhost',
    product_id_type: 'NA',
    page_types: {
      all: {
        name: 'Generic Page',
        url_match: /http:\/\/localhost/,
        doc_title_cleaner: (title: string) => title,
        enable_mobile: false,
        purpose: 'generic',
        show_recs: false,
        show_reviews: false,
      },
    },
  },
  /* Disable saving any Google pages as product recommendations */
  google: {
    name: 'Google',
    product_id_type: 'tbd',
    page_types: {
      search: {
        name: 'Search Page',
        url_match: /www\.google\.com\/search/,
        doc_title_cleaner: (title: string) => title,
        injection_site: '#taw',
        purpose: 'search',
        enable_mobile: true,
        show_recs: true,
        show_reviews: false,
        url_param_keys: {
          query: 'q',
        },
      },
      // make sure this comes last
      home: {
        name: 'Home Page',
        url_match: /www\.google\.com/,
        doc_title_cleaner: (title: string) => title,
        purpose: 'generic',
        enable_mobile: false,
        show_recs: false,
        show_reviews: false,
      },
    },
  },
};

/*
 * Get the product identifier, site type,
 */
export function parseExtensionUrl(url: string) {
  let page_type = null; // String with the id of the page type
  let site = null; // String with the id of the site
  let pageTypeInfo: TargetPageType | null = null;
  const searchParams = new URL(url).searchParams;

  // Find matching site and page_type
  for (const site_name in targets) {
    for (const pt in targets[site_name].page_types) {
      const t = targets[site_name].page_types[pt];
      if (t.url_match.test(url)) {
        site = site_name;
        page_type = pt;
        // Object.assign(pageTypeInfo, t); // Make a shallow copy
        pageTypeInfo = { ...t }; // Make a shallow copy
        break;
      }
    }
  }

  const product_id_type = site ? targets[site]?.product_id_type : null;
  const product_id = pageTypeInfo?.url_match?.exec(url)?.[1];
  const canonicalUrl =
    page_type === 'dp' && product_id && product_id_type
      ? pageTypeInfo?.canonicalUrlMaker?.(product_id_type, product_id)
      : url.split('?')[0];

  // If we don't recognize the site
  if (site === null || !pageTypeInfo) {
    const ans: ParsedPage = {
      knownSite: false,
      knownPageType: false,
      pagePurpose: 'unknown',
      show_recs: false,
      show_reviews: undefined, // not false! False is for sure we don't want to show
      url,
      canonicalUrl: canonicalUrl ?? '',
      product_id_type: 'url',
      product_id: canonicalUrl ? window.btoa(canonicalUrl) : '',
    };
    log.info('parseExtensionUrl: URL not recognized');
    log.info(ans);
    return ans;
  }

  if (pageTypeInfo.url_param_keys) {
    // Pull out the necessary URL parameters into standard named spots
    // example url_param_keys objects: {query: 'k'}
    pageTypeInfo.url_params = {};
    for (const param in pageTypeInfo.url_param_keys) {
      pageTypeInfo.url_params[param] =
        searchParams.get(pageTypeInfo.url_param_keys[param]) ?? '';
    }
  }

  const ans: ParsedPage = {
    knownSite: site !== null,
    knownPageType: page_type !== null,
    pagePurpose: pageTypeInfo.purpose,
    show_recs: pageTypeInfo.show_recs,
    show_reviews: pageTypeInfo.show_reviews,
    url,
    canonicalUrl: canonicalUrl ?? '',
    searchParams,
    site,
    siteInfo: targets[site],
    page_type: page_type ?? '', // e.g. 'search' or 'dp'
    pageTypeInfo,
    product_id_type: product_id_type ?? '',
    product_id: product_id ?? '',
  };

  return ans;
}
