为了准备面试,开始啃《编程之美》这本书,卡在“最短摘要生成“这一题已经很多天了,决心自己把代码实现,今天终于初步调试通过,在这里做一个小结。
思路书上写的很清楚,可以概括为:
1.先将页面分词,得到分词后的数组W,假设关键词数组为K。
2.对数组W进行扫描,第一次得到包含所有关键词的数组段,其起点和终点分别为beg和end,记下此时数组段的长度length。将beg指向数组段中下一个关键词,end向后移,继续扫描数组W,直到包含所有的关键词,将此时数组段的长度与length进行比较,若比其短,则记录下数组段的起始和终止位置,继续重复上述过程,直到数组W的末尾。
设数组W长为N,关键词数组长为M,此方法的时间复杂度为O( N*M )。
代码:
#include <iostream>
#include <string>
#include <map>
using namespace std;
void shortest_abstract(const string arrW[], const string arrK[], int * aBeg, int * aEnd, int lenW, int lenK);
int main()
{
int lenW, lenK;
int aBeg = -1;
int aEnd = -1;
cout << "Please enter the length of words and keys: " << endl;
cin >> lenW >> lenK;
string * arrW = new string[lenW];
string * arrK = new string[lenK];
cout << "Please enter words: " << endl;
for(int i = 0; i < lenW; i++)
cin >> arrW[i];
cout << "Plearse enter keys: " << endl;
for(int i = 0; i < lenK; i++)
cin >> arrK[i];
shortest_abstract(arrW, arrK, &aBeg, &aEnd, lenW, lenK);
if(aBeg == aEnd)
{
cout << "Can't contain all words." << endl;
}
else
{
cout << "The shortest abstract is: " << endl;
for(int i = aBeg; i <= aEnd; i++)
{
cout << arrW[i] << " ";
}
cout << endl;
}
return 0;
}
void shortest_abstract(const string arrW[], const string arrK[], int * aBeg, int * aEnd, int lenW, int lenK)
{
int i,j;
map<int, int> pos_key;//store the key's index and its corresponding poistion
int * pos = new int[lenK];
int count = 0; // the number of keys have been found
int beg = 0;
int end = 0;
int length; // the length contains all key words
int aLength = lenW; // the length of abstract
cout << "lenW = " << lenW << " lenK = " << lenK << endl;
//initial position
for(i = 0; i < lenK; i++)
pos[i] = -1;
// begin finding the shortest abstract
for(i = 0; i < lenW; i++)
{
for(j = 0; j < lenK; j++)
{
if(arrW[i] == arrK[j])
{
if(pos[j] == -1)
{
if(count == 0)
beg = i;
count++;
cout << "count = " << count << endl;
// This is the first time that find this key
pos[j] = i; // record key's position in words array
pos_key.insert( pair<int,int>(i,j) );
if(count == lenK)
{
end = i;
length = end - beg;
if(aLength > length)
{
cout << "------" << endl;
*aBeg = beg;
*aEnd = end;
aLength = length;
}
map<int, int>::iterator it;
it = pos_key.begin();
beg = (*it).first;
pos[(*it).second] = -1;
it++; // move beg to next word
beg = (*it).first;
it--;
pos_key.erase(it);
count--;
}
}
else
{
map<int, int>::iterator it;
it = pos_key.find(pos[j]);
// if this key word is the same as the begin word of abstract, then move beg to next word
if(beg == (*it).first)
{
it++;
beg = (*it).first;
it--;
}
pos_key.erase(it);
pos_key.insert( pair<int, int>(i,j) );
pos[j] = i; //update position to make it as the newest position
}
break;
}
}
}
cout << "aBeg = " << *aBeg << " aEnd = " << *aEnd << endl;
}
没有评论:
发表评论