433. 最小基因变化¶
难度:中等
题目¶
一条基因序列由一个带有8个字符的字符串表示,其中每个字符都属于 "A"
, "C"
, "G"
, "T"
中的任意一个。
假设我们要调查一个基因序列的变化。一次基因变化意味着这个基因序列中的一个字符发生了变化。
例如,基因序列由 "AACCGGTT"
变化至 "AACCGGTA"
即发生了一次基因变化。
与此同时,每一次基因变化的结果,都需要是一个合法的基因串,即该结果属于一个基因库。
现在给定3个参数 — start
, end
, bank
,分别代表起始基因序列,目标基因序列及基因库,请找出能够使起始基因序列变化为目标基因序列所需的最少变化次数。如果无法实现目标变化,请返回 -1
。
注意:
- 起始基因序列默认是合法的,但是它并不一定会出现在基因库中。
- 如果一个起始基因序列需要多次变化,那么它每一次变化之后的基因序列都必须是合法的。
- 假定起始基因序列与目标基因序列是不一样的。
示例 1:
start: "AACCGGTT"
end: "AACCGGTA"
bank: ["AACCGGTA"]
返回值: 1
示例 2:
start: "AACCGGTT"
end: "AAACGGTA"
bank: ["AACCGGTA", "AACCGCTA", "AAACGGTA"]
返回值: 2
示例 3:
start: "AAAAACCC"
end: "AACCCCCC"
bank: ["AAAACCCC", "AAACCCCC", "AACCCCCC"]
返回值: 3
题解¶
使用广度优先算法遍历基因的所有可能改动情况。基因序列长度为8,每次可能的改动情况为\(8\times 3 = 24\)种,在这24种改动情况中选择满足如下条件的结果:
- 结果是合法的基因
- 结果未被遍历过
使用哈希表存储基因库和已经被遍历过的基因。
char *genes = "TGCATGCA";
int getCode(char *s)
{
int ret = 0;
switch (*s)
{
case 'A':
ret++;
case 'C':
ret++;
case 'G':
ret++;
case 'T':
break;
}
return ret;
}
int hash(char *s)
{
char *cur = s;
int ret = 0;
while (*cur)
{
ret <<= 2;
ret += getCode(cur);
cur++;
}
return ret;
}
char *dehash(int o)
{
char *ret = (char *)malloc(sizeof(char) * 9);
ret[8] = 0;
for (int i = 7; i >= 0; i--)
{
ret[i] = genes[o & 3];
o >>= 2;
}
return ret;
}
struct Bank {
char *bitmap;
};
void deleteBank(struct Bank *dest)
{
free(dest->bitmap);
free(dest);
}
void removeGene(struct Bank *dest, char *seq)
{
int i = hash(seq);
dest->bitmap[i >> 3] &= ~(1 << (i & 7));
}
void addGene(struct Bank *dest, char *seq)
{
int i = hash(seq);
dest->bitmap[i >> 3] |= 1 << (i & 7);
}
bool queryGene(struct Bank *dest, char *seq)
{
int i = hash(seq);
return dest->bitmap[i >> 3] & (1 << (i & 7));
}
void merge(struct Bank *dest, struct Bank *src)
{
int i = 0;
for (i = 0; i < 8192; i++)
dest->bitmap[i] |= src->bitmap[i];
deleteBank(src);
}
struct Bank *getBank(char **bank, int bankSize)
{
struct Bank *ret = (struct Bank *)malloc(sizeof(struct Bank));
ret->bitmap = (char *)malloc(sizeof(char) * 8192);
memset(ret->bitmap, 0, sizeof(char) * 8192);
for (int i = 0; i < bankSize; i++)
addGene(ret, bank[i]);
return ret;
}
struct Bank *step(char *current, char *end, struct Bank *passed, struct Bank *fullBank, int *count)
{
int startHash = hash(current);
struct Bank *ret = getBank(NULL, 0);
addGene(passed, current);
for (int i = 0; i < 8; i++)
{
for (int j = 0; j < 4; j++)
{
current[i] = genes[getCode(current + i) + 1];
if (startHash == hash(current))
continue;
if (!queryGene(fullBank, current))
continue;
if (queryGene(passed, current))
continue;
addGene(ret, current);
(*count)++;
}
}
return ret;
}
struct Bank *stepBank(struct Bank *current, char *end, struct Bank *passed, struct Bank *fullBank, int* count)
{
int index = 0;
struct Bank *ret = getBank(NULL, 0);
char *temp = NULL;
for (int i = 0; i < 8192; i++)
{
if (current->bitmap[i])
{
index = i << 3;
for (int j = 0; j < 8; j++)
{
if (current->bitmap[i] & (1 << j))
{
index += j;
temp = dehash(index);
merge(ret, step(temp, end, passed, fullBank, count));
free(temp);
}
}
}
}
return ret;
}
int minMutation(char * start, char * end, char ** bank, int bankSize){
struct Bank *fullBank = getBank(bank, bankSize), *passed = getBank(NULL, 0);
struct Bank *temp = getBank(NULL, 0), *temp2 = NULL;
addGene(temp, start);
int ret = 0, modified = 0;
while (!queryGene(temp, end))
{
modified = 0;
temp2 = stepBank(temp, end, passed, fullBank, &modified);
deleteBank(temp);
temp = temp2;
if (!modified)
return -1;
ret++;
}
return ret;
}