模式识别c均值算法的实现

来源:百度文库 编辑:神马文学网 时间:2024/04/29 15:45:23
模式识别c均值算法的实现(C++实现)实验目的:
实验原理:
实验内容:
写程序实现c均值算法,并用表中的三维数据进行测试,下面给出了每种测试的类别数目和初始值。
的结果与(3)中的结果进行比较,并解释差别,包括迭代次数的差别。

 
 


 
实验代码:
CCMean(CData *pdata,CData *pmean);
void work(int InitClassNum);
void CalcuMean( int i );//计算第i类的均值
计算第i类的误差
初始化分类
将第i类样本移动到第k类中,如果返回true这,总误差变小,否则不移动
bool MoveItoK( const CData& da, int i, int &k );
double  dist( const CData& mean, const CData& da);
指针指向样本数据地址
指针指向初始化分类重心数据地址
各样本的误差君方根
list< CData >* pcla[DATANUM];
CCMean::CCMean(CData *pdata)
for(int i = 0; i < DATANUM; i ++ )
pcla[i] = new list< CData >;
assert( pcla[i] != 0 );
CCMean::CCMean(CData *pdata,CData *pmean)
for(int i = 0; i < DATANUM; i ++ )
pcla[i] = new list< CData >;
assert( pcla[i] != 0 );
for(int i = 0; i < DATANUM; i ++ )
for(int i = 0; i < DATANUM; i ++ )
void CCMean::CalcuMean(int ii)
double sum1 = 0.0, sum2 = 0.0,sum3 = 0.0;
int si = (int)pcla[ii]->size();
list< CData >::iterator iter = pcla[ii]->begin();
for(int i = 0; i < si; i ++ )
sum3 += iter->x3;
mean[ii].x1 = (double)sum1 / si;
mean[ii].x2 = (double)sum2 / si;
mean[ii].x3 = (double)sum3 / si;
for( int i = 0; i < iClassNum ; i ++ )
void CCMean::CalcuJc( int index )
list< CData >::iterator iter = pcla[index]->begin();
int si = (int)pcla[index]->size();
for( int i = 0; i < si; i ++)
jc[index] += dist( mean[index], *iter );
double CCMean::dist(const CData& mean, const CData& da)
return (mean.x1 - da.x1)*(mean.x1 - da.x1) + (mean.x2 - da.x2)*(mean.x2 - da.x2) + (mean.x3 - da.x3)*(mean.x3 - da.x3);
CData *pmean = pMean;
for( int ii = 0; ii < iClassNum; ii ++ )
初始化类别重心数组
如果是没有给定初始化的分类重心,可以加上下面的这段代码
//    for( int i = 0; i < iClassNum; i ++ )
//       pcla[i]->push_back( *ptem );
for( int i = 0; i < DATANUM; i ++ )
double mindis = MAXDIST;
for( int j = 0; j < iClassNum; j ++ )
double curdis = dist( pData[i], mean[j] );
if( curdis < mindis )
mindis = curdis;
pcla[pos]->push_back( pData[i] );
for( int j = 0; j < iClassNum ; j ++ )
bool CCMean::MoveItoK( const CData &da, int i , int& k )
for( int j = 0; j < iClassNum; j ++ )
int si = (int)pcla[j]->size();
Pj = dist( mean[j], da ) * si/(si - 1);
Pj = dist( mean[j], da ) * si/(si + 1);
else if ( Pj == Pk  && j == i )
当 Pj == Pk && j == i, 移动
pcla[k]->push_back( da );
从第i类中删除da,但是首先从链表中找到他的位置
list< CData >::iterator iter = pcla[i]->begin();
while( iter != pcla[i]->end() )
if( iter->x1 == da.x1 && iter->x2 == da.x2 && iter->x3 == da.x3 )
pcla[i]->erase( iter );
for( int i = 0; i < iClassNum ; i ++ )
类别
重心点为: ("<list< CData >::iterator iter = pcla[i]->begin();
while( iter != pcla[i]->end() )
cout<<"("<x1<<","<x2<<","<x3<<")   "<if( j++ % 5 == 0)
cout<void CCMean::work(int InitClassNum)
iClassNum = InitClassNum;
用来判断迭代是否停止
for( int i = 0; i < iClassNum ; i ++ )
int si = (int)pcla[i]->size();
list< CData >::iterator iter = pcla[i]->begin();
for(int j = 0; j < (int)pcla[i]->size(); j++)
CData da = *iter;
if( MoveItoK( da , i, k ) == true )
double OldJe = je;
CalcuMean( i );
CalcuMean( k );
if( OldJe > je )
count++;
counter = 0;
goto Again;
if( counter == DATANUM )
最后总误差 Je 为
迭代次数是:"<{-7.82,-4.58,-3.97},{-6.68,3.16,2.71},{4.36,-2.19,2.09},{6.72,0.88,2.80},{-8.64,3.06,3.50},
{-6.87,0.57,-5.45},{4.47,-2.62,5.76},{6.73,-2.01,4.18},{-7.71,2.34,-6.33},{-6.91,-0.49,-5.68},
{6.18,2.81,5.82},{6.72,-0.93,-4.04},{-6.25,-0.26,0.56},{-6.94,-1.22,1.13},{8.09,0.20,2.25},
{6.18,0.17,-4.15},{-5.19,4.24,4.04},{-6.38,-1.74,1.43},{4.08,1.30,5.33},{6.27,0.93,-2.78}
CData m11[2] = {{1,1,1},{-1,1,-1}};
CData m12[2] = {{0,0,0},{1,1,-1}};
CData m21[3] = {{0,0,0},{-1,1,-1},{-1,0,2}};
CData m22[3] = {{-0.1,0,0.1},{0,-0.1,0.1},{-0.1,-0.1,0.1}};
CData m31[4] = {{-0.1,0,0.1},{0,-0.1,0.1},{-0.1,-0.1,0.1},{0.2,1,0}};
int main(int argc, char* argv[])
测试
CCMean cmean11( yy,m11 );
测试
CCMean cmean12( yy,m12 );
测试
CCMean cmean21( yy,m21 );
测试
CCMean cmean22( yy,m22 );
实验结果:
重心点为
重心点为
重心点为: (-6.83667,3.48667,3.41667)
重心点为: (-6.98286,-0.768571,-2.61571)
结果分析:初始的类别重心选择越与实际的的重心接近,则迭代次数越少,划分的类别越多则迭代的次数也越少,当分成N类,则迭代次数是零。