Pseudo-Isomorphic Substrings

QUESTION

Two strings A and B, consisting of small English alphabet letters are called pseudo-isomorphic if\n\nTheir lengths are equal\nFor every pair (i,j), where 1 <= i < j <= |A|, B[i] = B[j], iff A[i] = A[j]\nFor every pair (i,j), where 1 <= i < j <= |A|, B[i] != B[j] iff A[i] != A[j]\nNaturally, we use 1-indexation in these definitions and |A| denotes the length of the string A.\n\nYou are given a string S, consisting of no more than 105 lowercase alphabetical characters. For every prefix of S denoted by S’, you are expected to find the size of the largest possible set of strings , such that all elements of the set are substrings of S’ and no two strings inside the set are pseudo-isomorphic to each other.\n\nif S = abcde \nthen, 1st prefix of S is ‘a’ \nthen, 2nd prefix of S is ‘ab’ \nthen, 3rd prefix of S is ‘abc’ \nthen, 4th prefix of S is ‘abcd’ and so on..\n\nInput Format\n\nThe first and only line of input will consist of a single string S. The length of S will not exceed 10^5.\n\nConstraints\n1. 1<=|S|<=10^5\n2.S contains only lower-case english alphabets (‘a’ – ‘z’).\nOutput Format\n\nOutput N lines. On the ith line, output the size of the largest possible set for the first i alphabetical characters of S such that no two strings in the set are pseudo-isomorphic to each other.

ANSWER

#include <stdio.h>
#include <string.h>
#include <algorithm>
#include <map>
#include <vector>

using namespace std;

#define LOGMAX 17
#define NMAX 131072
#define DEBUG 0
#define USE_FILES 0

int qans, qmin, qmax, nactsuff, pmin, pmax, N;
long long cans, sumlcp, sumactsuff;
int bit[LOGMAX + 1], logidx[NMAX];
int rmq[2][NMAX][LOGMAX], lcp[NMAX];
char S[NMAX];

void compute_bits() {
	int i, j;
	bit[0] = 1;

	for (i = 1; i <= LOGMAX; i++)
		bit[i] = bit[i - 1] << 1;

	logidx[1] = 0;

	for (i = 2, j = 1; i < NMAX; i++) {
		if (i == bit[j + 1]) j++;
		logidx[i] = j;
	}
}

void pre_compute_RMQ() {
	int i, j;

	for (i = 1; i < N; i++) {
		rmq[0][i][0] = rmq[1][i][0] = lcp[i];

		for (j = 1; j < LOGMAX && i - bit[j] >= 0; j++) {
			rmq[0][i][j] = rmq[0][i - bit[j - 1]][j - 1];

			if (rmq[0][i][j - 1] < rmq[0][i][j])
				rmq[0][i][j] = rmq[0][i][j - 1];
		}
	}

	for (i = N - 1; i >= 1; i--) {
		for (j = 1; j < LOGMAX && i + bit[j] <= N; j++) {
			rmq[1][i][j] = rmq[1][i + bit[j - 1]][j - 1];

			if (rmq[1][i][j - 1] < rmq[1][i][j])
				rmq[1][i][j] = rmq[1][i][j - 1];
		}
	}
}

void get_query() {
	int idx = logidx[qmax - qmin + 1];
	qans = rmq[0][qmax][idx];
	if (rmq[1][qmin][idx] < qans)
		qans = rmq[1][qmin][idx];
}

void read_input() {
	if (USE_FILES)
		freopen("x.txt", "r", stdin);
	scanf("%s", S + 1);
	N = strlen(S + 1);
}

void reverse_S() {
	int i;
	char c;
	for (i = 1; i <= N / 2; i++) {
		c = S[i];
		S[i] = S[N - i + 1];
		S[N - i + 1] = c;
	}
}

int D[NMAX];
map<char, int> lastpoz, nextpoz;
map<char, int>::iterator it;
vector<int> out[NMAX];
vector<pair<int, char> > vpoz;
int cidx[NMAX][30];

void pre_process_str() {
	int i, j, k;

	for (i = 1; i <= N; i++) {
		D[i] = i - lastpoz[S[i]];
		lastpoz[S[i]] = i;

		for (j = i - D[i] + 1; j <= i; j++)
			out[j].push_back(i);
	}

	if (DEBUG) {
		fprintf(stderr, "D:");

		for (i = 1; i <= N; i++)
			fprintf(stderr, " %d", D[i]);

		fprintf(stderr, "\n");
	}

	char ch;

	for (ch = 'a'; ch <= 'z'; ch++)
		nextpoz[ch] = N + 1;

	for (i = N; i >= 1; i--) {
		nextpoz[S[i]] = i;
		vpoz.clear();

		for (it = nextpoz.begin(); it != nextpoz.end(); it++)
			vpoz.push_back(make_pair(it->second, it->first));
		sort(vpoz.begin(), vpoz.end());

		for (j = 0; j < vpoz.size(); j++)
			cidx[i][vpoz[j].second - 'a'] = j;
	}
}

int group[LOGMAX + 1][NMAX], vs[NMAX], vsrev[NMAX], vsaux[NMAX], r, ii, jj, kk;

int LCP(int s1, int s2) {
	if (group[r][s1] == group[r][s2]) {
		int p = s1 + bit[r], q = s2 + bit[r], lenmax, u, poz;

		if (p > N || q > N) return bit[r];

		if (group[r][p] != group[r][q]) {
			qmin = min(vsrev[p], vsrev[q]);
			qmax = max(vsrev[p], vsrev[q]) - 1;
			get_query();
			lenmax = bit[r] + qans;
		} else
			lenmax = bit[r + 1];

		for (u = 0; u < out[p].size(); u++) {
			poz = out[p][u];

			if (poz - s1 >= lenmax) break;

			if (poz - D[poz] >= s1 && D[poz - s1 + s2] != D[poz]) {
				lenmax = poz - s1;
				break;
			}
		}

		for (u = 0; u < out[q].size(); u++) {
			poz = out[q][u];

			if (poz - s2 >= lenmax) break;

			if (poz - D[poz] >= s2 && D[poz - s2 + s1] != D[poz]) {
				lenmax = poz - s2;
				break;
			}
		}

		return lenmax;
	} else {
		qmin = min(vsrev[s1], vsrev[s2]);
		qmax = max(vsrev[s1], vsrev[s2]) - 1;
		get_query();
		return qans;
	}
}

void compute_LCPs() {
	int i, j, p, q, lenmax, u, poz;

	for (i = 1; i < N; i++)
		lcp[i] = LCP(vs[i], vs[i + 1]);

	if (DEBUG)
		for (i = 1; i < N; i++)
			fprintf(stderr, "i=%d: LCP(%d,%d)=%d\n", i, vs[i], vs[i + 1], lcp[i]);
}

int d1, d2, clcp;

inline int compare_suffixes(int s1, int s2) {
	clcp = LCP(s1, s2);
	d1 = s1 + clcp;
	d2 = s2 + clcp;

	if (d1 > N) return +1;
	if (d2 > N) return -1;
	return (cidx[s1][S[d1] - 'a'] - cidx[s2][S[d2] - 'a']);
}

void merge_sort(int li, int ls) {
	if (li >= ls) return;

	int mid = (li + ls) >> 1;

	merge_sort(li, mid);
	merge_sort(mid + 1, ls);

	ii = li; jj = mid + 1; kk = li - 1;

	while (ii <= mid && jj <= ls) {
		kk++;

		if (compare_suffixes(vs[ii], vs[jj]) <= 0) {
			vsaux[kk] = vs[ii];
			ii++;
		} else {
			vsaux[kk] = vs[jj];
			jj++;
		}
	}

	for (; ii <= mid; ii++) {
		kk++;
		vsaux[kk] = vs[ii];
	}

	for (; jj <= ls; jj++) {
		kk++;
		vsaux[kk] = vs[jj];
	}

	for (kk = li; kk <= ls; kk++)
		vs[kk] = vsaux[kk];
}

void suffix_array() {
	int i, j, ng = 1;

	for (i = 1; i <= N; i++) {
		vs[i] = i;
		vsrev[i] = i;
		lcp[i] = 1;
		group[0][i] = 0;
	}

	for (r = 0; r < LOGMAX; r++) {
		pre_compute_RMQ();
		merge_sort(1, N);

		ng = 0;
		group[r + 1][vs[1]] = 0;

		compute_LCPs();
		for (i = 2; i <= N; i++) {
			if (lcp[i - 1] != bit[r + 1])
				ng++;
			group[r + 1][vs[i]] = ng;
		}

		ng++;

		for (i = 1; i <= N; i++)
			vsrev[vs[i]] = i;

		if (DEBUG) {
			fprintf(stderr, "Suffix array after round %d/%d:", r + 1, LOGMAX);
			for (i = 1; i <= N; i++)
				fprintf(stderr, " %d(%d)", vs[i], group[r + 1][vs[i]]);
			fprintf(stderr, "\n");
		}
	}
}

int cnt[2 * NMAX];

void UpdateCnt(int idx, int v) {
	sumactsuff += v * vs[idx];
	nactsuff += v;
	idx = NMAX + idx - 1;
	while (idx >= 1) {
		cnt[idx] += v;
		idx >>= 1;
	}
}

int GetPred(int idx) {
	idx = NMAX + idx - 1;
	int p;
	while (idx > 1) {
		p = idx >> 1;
		if ((idx & 1) == 1 && cnt[p] > cnt[idx]) {
			idx--;
			break;
		}
		idx = p;
	}

	if (idx == 1)
		return 0;

	while (idx < NMAX) {
		idx <<= 1;
		if (cnt[idx + 1] >= 1)
			idx++;
	}

	return idx - NMAX + 1;
}

int GetSucc(int idx) {
	idx = NMAX + idx - 1;
	int p;
	while (idx > 1) {
		p = idx >> 1;
		if ((idx & 1) == 0 && cnt[p] > cnt[idx]) {
			idx++;
			break;
		}
		idx = p;
	}

	if (idx == 1)
		return 0;

	while (idx < NMAX) {
		idx <<= 1;
		if (cnt[idx] == 0)
			idx++;
	}

	return idx - NMAX + 1;
}

#define QSIZE 10000000

int qactsuff[QSIZE], nextqactsuff[QSIZE], startactsuff[NMAX], nq;
int idxmax[NMAX];

int update_LCPPairState(int s1, int s2, int coef) {
	int idx, result = 2;
	qmin = s1; qmax = s2 - 1;
	get_query();

	if (qans == 0)
		return 0;

	s1 = vs[s1]; s2 = vs[s2];
	if (s1 > s2) {
		idx = s1; s1 = s2; s2 = idx;
		result = 1;
	}

	idx = s2 + qans - 1;
	if (idx > pmax)
		return result;

	sumlcp += coef * qans;
	if (coef == 1) {
		if (result == 2)
			qmin = qmax + 1;
		if (idx > idxmax[qmin]) {
			nq++;
			if (nq >= QSIZE) {
				fprintf(stderr, "!! QSIZE exceeded\n");
				exit(1);
			}
			qactsuff[nq] = qmin;
			nextqactsuff[nq] = startactsuff[idx];
			startactsuff[idx] = nq;
			idxmax[qmin] = idx;
		}
	}
	return 0;
}

void update_state(int s, int coef) {
	int pred, succ, p, res, removed;

	pred = GetPred(s);
	succ = GetSucc(s);

	if (DEBUG)
		fprintf(stderr, "  suffix: s=%d pred=%d succ=%d\n", s, pred, succ);

	if (pred > 0 && succ > 0)
		update_LCPPairState(pred, succ, -1);

	removed = 0;
	while (pred > 0) {
		res = update_LCPPairState(pred, s, 1);
		if (!res)
			break;
		if (res == 1) {
			UpdateCnt(pred, -1);
			p = GetPred(pred);
			if (p > 0)
				update_LCPPairState(p, pred, -1);
			pred = p;
		} else {
			removed = 1;
			break;
		}
	}

	if (!removed) {
		while (succ > 0) {
			res = update_LCPPairState(s, succ, coef);
			if (!res)
				break;
			if (res == 2) {
				UpdateCnt(succ, -1);
				p = GetSucc(succ);
				if (p > 0)
					update_LCPPairState(succ, p, -1);
				succ = p;
			} else {
				removed = 1;
				break;
			}
		}
	}

	if (removed) {
		pred = GetPred(s);
		succ = GetSucc(s);
		if (pred > 0 && succ > 0)
			update_LCPPairState(pred, succ, 1);
	}
}

void process_op() {
	int opidx;

	sumlcp = sumactsuff = 0;
	nactsuff = 0;
	pmin = N + 1;
	pmax = N;

	for (opidx = 1; opidx <= N; opidx++) {
		pmin--;
		UpdateCnt(vsrev[pmin], 1);
		update_state(vsrev[pmin], 1);
		cans = (long long) (pmax + 1) * (long long) nactsuff;
		cans -= sumactsuff;
		cans -= sumlcp;
		if (DEBUG)
			fprintf(stderr, "op=%d pmin=%d suff=%d cans=%lld pmax=%d nactsuff=%d sumactsuff=%lld sumlcp=%lld\n", opidx, pmin, vsrev[pmin], cans, pmax, nactsuff, sumactsuff, sumlcp);
		printf("%lld\n", cans);
	}
}

int main() {
	compute_bits();
	read_input();
	reverse_S();
	pre_process_str();
	suffix_array();
	pre_compute_RMQ();
	process_op();
	return 0;
}

Ads Blocker Image Powered by Code Help Pro

Ads Blocker Detected!!!

We have detected that you are using extensions to block ads. Please support us by disabling these ads blocker.