帧内预测是视频编码中非常重要的一环，H.264的帧内预测包括亮度预测和色度预测，其中亮度预测有四种类型：Intra_4x4，Intra_8x8，Intra_16x16和I_PCM。这里主要讲解Intra_4x4和Intra_16x16。需要注意的是用于帧内预测的相邻像素必须是在做去块滤波之前。同时，所有预测函数的调用实际上都是在模式决策这一环进行的，模式决策是个比较复杂且耗时的过程，有机会再仔细研究。

Intra_4x4

下图是Intra_4x4预测是示意图，其中小写字母a-p是当前要进行帧内预测的4x4块，而大写字母则是预测时需要用到的13个相邻像素，且它们均是编码后重新解码得到的像素值(即并非是原始像素值)。这些像素的可用性由上一篇文章说到的get4x4Neighbour()函数得到，额外需要注意的几点是：

如果相邻4x4块当前还未编码则不可用(按照标准的编码顺序会出现这种情况)。
当UseConstrainedIntraPred的值为真时帧间编码的像素也不能用于进行帧内预测。
若E-H像素不可用，可以使用D的值代替这几个像素的值。

帧内预测
Intra_4x4共有9种预测模式，以0-8表示，分别对应了不同的方向，如图所示。这些预测模式所利用到的相邻像素有所不同，若需要的相邻像素不可用，则表明该预测模式不可用，这种情况在第一行/列宏块中尤为严重，需要注意。这些预测模式的对像素值的计算方法均是一种线性关系，具体公式可以参考标准，此处不做赘述。编码时会从可用的模式中选取最佳的预测模式，编码最佳预测模式Mode的方法如下：

先根据标准中的方法从左邻块和上邻块的帧内预测模式估计出一个最可能的预测模式MPM。
若MPM=Mode，则直接编码一个1作为标志位即可。
若MPM!=Mode，则先编码一个0作为标志位，然后编码固定的三位来表示最佳预测模式。
固定三位的编码方法是：若MPM>Mode，则编码结果为Mode所代表的序号的二进制表示(由前提条件知道其值最大不超过8，故三位即可表示)；若MPM<Mode，则编码结果为Mode-1所代表的序号的二进制表示(由前提条件知道其值最大可能为9-1=8，最小为1-1=0，仍然可以用三位表示)。

int mode_decision_for_I4x4_blocks_JM_High (Macroblock *currMB, int  b8,  int  b4,  int  lambda,  distblk*  min_cost)
{
  
  //省略一大段变量的定义与赋值

  //获取左邻块和上邻块的信息
  get4x4Neighbour(currMB, block_x - 1, block_y    , mb_size, &left_block);
  get4x4Neighbour(currMB, block_x,     block_y - 1, mb_size, &top_block );

  // constrained intra pred 判断是否限制帧间像素的可用性
  if (p_Inp->UseConstrainedIntraPred)
  {
    left_block.available = left_block.available ? p_Vid->intra_block[left_block.mb_addr] : 0;
    top_block.available  = top_block.available  ? p_Vid->intra_block[top_block.mb_addr]  : 0;
  }

  //获取左邻块和上邻块的帧内预测模式
  upMode            =  top_block.available ? p_Vid->ipredmode[top_block.pos_y ][top_block.pos_x ] : (char) -1;
  leftMode          = left_block.available ? p_Vid->ipredmode[left_block.pos_y][left_block.pos_x] : (char) -1;
  
  //计算最可能的预测模式：若两者均不可用只能是DC模式，否则选择值小的预测模式
  mostProbableMode  = (upMode < 0 || leftMode < 0) ? DC_PRED : upMode < leftMode ? upMode : leftMode;
  *min_cost = DISTBLK_MAX;

  currMB->ipmode_DPCM = NO_INTRA_PMODE; ////For residual DPCM

  //===== INTRA PREDICTION FOR 4x4 BLOCK =====
  // set intra prediction values for 4x4 intra prediction 计算13个像素的可用性并进行赋值
  currSlice->set_intrapred_4x4(currMB, PLANE_Y, pic_pix_x, pic_pix_y, &left_available, &up_available, &all_available);  

  //===== LOOP OVER ALL 4x4 INTRA PREDICTION MODES ===== 在9种模式中进行循环
  for (ipmode = 0; ipmode < NO_INTRA_PMODE; ipmode++)
  {
    available_mode =  (all_available) || (ipmode==DC_PRED) ||
      (up_available && (ipmode==VERT_PRED||ipmode==VERT_LEFT_PRED||ipmode==DIAG_DOWN_LEFT_PRED)) ||
      (left_available && (ipmode==HOR_PRED||ipmode==HOR_UP_PRED));

    if (valid_intra_mode(currSlice, ipmode) == 0)
      continue;
  
    //如果当前模式可用
    if( available_mode)
    {
      // generate intra 4x4 prediction block given availability 用当前模式进行预测
      get_intrapred_4x4(currMB, PLANE_Y, ipmode, block_x, block_y, left_available, up_available);

      // get prediction and prediction error 计算预测值和真实值的预测误差，得到像素的差分值
      generate_pred_error_4x4(&p_Vid->pCurImg[pic_opix_y], currSlice->mpr_4x4[0][ipmode], &currSlice->mb_pred[0][block_y], &currSlice->mb_ores[0][block_y], pic_opix_x, block_x);

      // get and check rate-distortion cost 计算率失真
#ifdef BEST_NZ_COEFF
      currMB->cbp_bits[0] = cbp_bits;
#endif

      //调用rdcost_for_4x4_intra_blocks()计算率失真，此函数会进行变换、量化、反量化、反变换、重构、熵编码等操作，计算得到率失真的值
      rdcost = currSlice->rdcost_for_4x4_intra_blocks (currMB, &c_nz, b8, b4, ipmode, lambda, mostProbableMode, min_rdcost);
      //如果小于最小的率失真值，则将相关的编码信息保存下来
      if ((rdcost < min_rdcost) || (rdcost == min_rdcost && ipmode == mostProbableMode))
      {
        //--- set coefficients ---
        memcpy(p_RDO->cofAC4x4[0], ACLevel, 18 * sizeof(int));
        memcpy(p_RDO->cofAC4x4[1], ACRun,   18 * sizeof(int));

        //--- set reconstruction ---
        copy_4x4block(p_RDO->rec4x4[PLANE_Y], &p_Vid->enc_picture->imgY[pic_pix_y], 0, pic_pix_x);

        // SP/SI reconstruction
        if(currSlice->slice_type == SP_SLICE && !currSlice->sp2_frame_indicator)
        {
          for (y=0; y<4; y++)
          {
            memcpy(lrec4x4[y],&p_Vid->lrec[pic_pix_y+y][pic_pix_x], BLOCK_SIZE * sizeof(int));// stores the mode coefficients
          }
        }

        //--- flag if transform-coefficients must be coded ---
        nonzero = c_nz;

        //--- set best mode update minimum cost ---
        *min_cost     = rdcost;
        min_rdcost    = rdcost;
        best_ipmode   = ipmode;

        best_nz_coeff = p_Vid->nz_coeff [currMB->mbAddrX][block_x4][block_y4];
#ifdef BEST_NZ_COEFF
        best_coded_block_flag = (int)((currMB->cbp_bits[0] >> bit_pos)&(int64)(1));
#endif
        if (p_Vid->AdaptiveRounding)
        {
          store_adaptive_rounding_4x4 (p_Vid, p_Vid->ARCofAdj4x4, I4MB, block_y, block_x);
        }
      }
    }
  }
#if INTRA_RDCOSTCALC_NNZ
  p_Vid->nz_coeff [currMB->mbAddrX][block_x4][block_y4] = best_nz_coeff;
#endif
#ifdef BEST_NZ_COEFF
  cbp_bits &= (~(int64)(1<<bit_pos));
  cbp_bits |= (int64)(best_coded_block_flag<<bit_pos);
#endif
  
  /*循环完所有的预测模式后得到最终的最优模式并将相关信息保存下来*/
  //===== set intra mode prediction =====
  p_Vid->ipredmode[pic_block_y][pic_block_x] = (char) best_ipmode;
  currMB->intra_pred_modes[4*b8+b4] =
    (char) (mostProbableMode == best_ipmode ? -1 : (best_ipmode < mostProbableMode ? best_ipmode : best_ipmode-1)); 

  //===== restore coefficients =====
  memcpy (ACLevel, p_RDO->cofAC4x4[0], 18 * sizeof(int));
  memcpy (ACRun,   p_RDO->cofAC4x4[1], 18 * sizeof(int));

  //===== restore reconstruction and prediction (needed if single coeffs are removed) =====
  copy_4x4block(&p_Vid->enc_picture->imgY[pic_pix_y], p_RDO->rec4x4[PLANE_Y], pic_pix_x, 0);
  copy_4x4block(&currSlice->mb_pred[0][block_y], currSlice->mpr_4x4[0][best_ipmode], block_x, 0);

  // 省略

}

该函数在rd_intra_jm.c中，是在模式决策时进行调用的，包含了完整Intra_4x4预测模式的计算与决策。

void set_intrapred_4x4(Macroblock *currMB, ColorPlane pl, int img_x,int img_y, int *left_available, int *up_available, int *all_available)
{
  VideoParameters *p_Vid = currMB->p_Vid;
  InputParameters *p_Inp = currMB->p_Inp;

  imgpel  *PredPel = currMB->intra4x4_pred[pl];  // array of predictor pels
  imgpel   **img_enc = p_Vid->enc_picture->p_curr_img;
  
  /*img_x、img_y时当前宏块内一个像素点在一帧内的坐标(以像素为单位)，
    和15做与运算表示ioff、joff是该像素点在宏块内以当前宏块左上角像素为原点的相对坐标(取值范围均为0-15)*/
  int ioff = (img_x & 15);
  int joff = (img_y & 15);

  PixelPos pix_a, pix_b, pix_c, pix_d;

  int block_available_up;
  int block_available_left;
  int block_available_up_left;
  int block_available_up_right;
  int *mb_size = p_Vid->mb_size[IS_LUMA];

  //计算I、A、E、X的可用性
  p_Vid->getNeighbour(currMB, ioff - 1, joff    , mb_size, &pix_a);
  p_Vid->getNeighbour(currMB, ioff    , joff - 1, mb_size, &pix_b);
  p_Vid->getNeighbour(currMB, ioff + 4, joff - 1, mb_size, &pix_c);
  p_Vid->getNeighbour(currMB, ioff - 1, joff - 1, mb_size, &pix_d);

  //考虑到编码顺序的原因，相对坐标为(4,4)和(4,12)的右上角邻块仍未编码，所以右上角像素不可用
  //而(12,4)和(12,12)由于其右上角的邻块属于另一个宏块，在mb_is_available函数中就已经判断过了，因此无需再次判断
  pix_c.available = pix_c.available && !((ioff==4) && ((joff==4)||(joff==12)));

  if (p_Inp->UseConstrainedIntraPred)
  {
    block_available_left     = pix_a.available ? p_Vid->intra_block [pix_a.mb_addr]: 0;
    block_available_up       = pix_b.available ? p_Vid->intra_block [pix_b.mb_addr] : 0;
    block_available_up_right = pix_c.available ? p_Vid->intra_block [pix_c.mb_addr] : 0;
    block_available_up_left  = pix_d.available ? p_Vid->intra_block [pix_d.mb_addr] : 0;
  }
  else
  {
    block_available_left     = pix_a.available;
    block_available_up       = pix_b.available;
    block_available_up_right = pix_c.available;
    block_available_up_left  = pix_d.available;
  }

  //得到相邻块的可用性
  *left_available = block_available_left;
  *up_available   = block_available_up;
  *all_available  = block_available_up && block_available_left && block_available_up_left;

  /*根据可用情况对13个像素进行相应的赋值*/
  // form predictor pels
  if (block_available_up)
  {
    memcpy(&PredPel[1], &img_enc[pix_b.pos_y][pix_b.pos_x], BLOCK_SIZE * sizeof(imgpel));
  }
  else
  {
    P_A = P_B = P_C = P_D = (imgpel) p_Vid->dc_pred_value;
  }

  if (block_available_up_right)
  {
    memcpy(&PredPel[5], &img_enc[pix_c.pos_y][pix_c.pos_x], BLOCK_SIZE * sizeof(imgpel));
  }
  else
  {
    P_E = P_F = P_G = P_H = P_D;
  }

  if (block_available_left)
  {
    int pos_y = pix_a.pos_y;
    int pos_x = pix_a.pos_x;
    P_I = img_enc[pos_y++][pos_x];
    P_J = img_enc[pos_y++][pos_x];
    P_K = img_enc[pos_y++][pos_x];
    P_L = img_enc[pos_y  ][pos_x];
  }
  else
  {
    P_I = P_J = P_K = P_L = p_Vid->dc_pred_value;
  }

  if (block_available_up_left)
  {
    P_X = img_enc[pix_d.pos_y][pix_d.pos_x];
  }
  else
  {
    P_X = p_Vid->dc_pred_value;
  }
}

该函数在intra4x4.c中，用于判断相邻4x4块像素的可用性并赋值。

void get_intrapred_4x4(Macroblock *currMB, ColorPlane pl, int i4x4_mode, int img_x, int img_y, int left_available, int up_available)
{
  imgpel        *PredPel = currMB->intra4x4_pred[pl];  // array of predictor pels
  imgpel ***curr_mpr_4x4 = currMB->p_Slice->mpr_4x4[pl];

  // Note that currently prediction values are always placed starting from (0,0) and not according to block position. 
  //根据预测模式分别调用不同的计算方法
  switch (i4x4_mode)
  {
  case VERT_PRED :    
    get_i4x4_vertical(curr_mpr_4x4[VERT_PRED], PredPel);
    break;
  case HOR_PRED :
    get_i4x4_horizontal(curr_mpr_4x4[HOR_PRED], PredPel);
    break;
  case DC_PRED :
    get_i4x4_dc(curr_mpr_4x4[DC_PRED], PredPel, left_available, up_available);
    break;
  case DIAG_DOWN_LEFT_PRED :
    get_i4x4_downleft(curr_mpr_4x4[DIAG_DOWN_LEFT_PRED], PredPel);
    break;
  case DIAG_DOWN_RIGHT_PRED :
    get_i4x4_downright(curr_mpr_4x4[DIAG_DOWN_RIGHT_PRED], PredPel);
    break;
  case VERT_RIGHT_PRED :
    get_i4x4_vertright(curr_mpr_4x4[VERT_RIGHT_PRED], PredPel);
    break;
  case HOR_DOWN_PRED :
    get_i4x4_hordown(curr_mpr_4x4[HOR_DOWN_PRED], PredPel);
    break;
  case VERT_LEFT_PRED :
    get_i4x4_vertleft(curr_mpr_4x4[VERT_LEFT_PRED], PredPel);
    break;
  case HOR_UP_PRED :
    get_i4x4_horup(curr_mpr_4x4[HOR_UP_PRED], PredPel);
    break;
  default:
    printf("invalid prediction mode \n");
    break;
  }
}

该函数在intra4x4.c中，用于调用不同预测模式的计算方法。

void generate_pred_error_4x4(imgpel **cur_img, imgpel **prd_img, imgpel **cur_prd, int **m7, int pic_opix_x, int block_x)
{
  int j, i, *m7_line;
  imgpel *cur_line, *prd_line;

  for (j = 0; j < BLOCK_SIZE; j++)
  {
    m7_line = &m7[j][block_x]; //差分值
    cur_line = &cur_img[j][pic_opix_x]; //原始值
    prd_line = prd_img[j]; //预测值
    memcpy(&cur_prd[j][block_x], prd_line, BLOCK_SIZE * sizeof(imgpel));

    for (i = 0; i < BLOCK_SIZE; i++)
    {
      *m7_line++ = (int) (*cur_line++ - *prd_line++); //计算预测误差
    }
  }        
}

该函数在intra4x4.c中，用于计算像素的预测误差，用于后续编码。

Intra_16x16

Intra_16x16是对整个宏块进行预测，因此相对简单，共有四种预测模式，与Intra_4x4有些相似，用到了上方宏块的16个像素和左方宏块的16个像素，详见标准。其预测模式的编码比较特别，并非是单独编码，而是与CBP元素一起联合编码的，具体可参考标准的表格。

int mode_decision_for_I16x16_MB (Macroblock* currMB, int lambda)
{
  find_best_mode_I16x16_MB (currMB, lambda, DISTBLK_MAX);//找到最佳的预测模式
  return currMB->residual_transform_quant_luma_16x16 (currMB, PLANE_Y);//变换、量化、反量化、反变换、重构、熵编码
}

int find_best_mode_I16x16_MB (Macroblock *currMB, int lambda,  distblk min_cost)
{
  Slice *currSlice = currMB->p_Slice;
  currMB->luma_transform_size_8x8_flag = FALSE;
  return (int) currSlice->find_sad_16x16 (currMB);//一般指向find_sad_16x16_JM
}

以上两个函数位于rd_intra_jm.c中，是在模式决策时进行调用的，流程和Intra4x4也是一样的。

distblk find_sad_16x16_JM(Macroblock *currMB)
{
  Slice *currSlice = currMB->p_Slice;
  VideoParameters *p_Vid = currMB->p_Vid;
  InputParameters *p_Inp = currMB->p_Inp;
  distblk current_intra_sad_2, best_intra_sad2 = DISTBLK_MAX;
  int k;
  imgpel  ***curr_mpr_16x16 = currSlice->mpr_16x16[0];

  int up_avail, left_avail, left_up_avail;

  currMB->i16mode = DC_PRED_16;
  
  //类似地，计算相邻像素的可用性并赋值
  currSlice->set_intrapred_16x16(currMB, PLANE_Y, &left_avail, &up_avail, &left_up_avail);
  // For speed purposes, we should just unify all planes
  if (currSlice->P444_joined)
  {
    currSlice->set_intrapred_16x16(currMB, PLANE_U, &left_avail, &up_avail, &left_up_avail);
    currSlice->set_intrapred_16x16(currMB, PLANE_V, &left_avail, &up_avail, &left_up_avail);
  }
  
  //在四种模式中循环
  for (k = VERT_PRED_16; k <= PLANE_16; k++)
  {
    //这几个标志暂时不知道什么意思
    if (p_Inp->IntraDisableInterOnly == 0 || (currSlice->slice_type != I_SLICE && currSlice->slice_type != SI_SLICE) )
    {
      if (p_Inp->Intra16x16ParDisable && (k == VERT_PRED_16||k == HOR_PRED_16))
        continue;

      if (p_Inp->Intra16x16PlaneDisable && k == PLANE_16)
        continue;
    }
    //check if there are neighbours to predict from 只有可用的模式才进行计算
    if (!((k == VERT_PRED_16 && !up_avail) || (k == HOR_PRED_16 && !left_avail) || (k == PLANE_16 && (!left_avail || !up_avail || !left_up_avail))))
    {
      //类似地，使用相应的预测模式进行预测
      get_intrapred_16x16(currMB, PLANE_Y, k, left_avail, up_avail);

      //有趣的是distI16x16一般指向的是distI16x16_satd(在intra16x16.c中)，即计算的是SATD(hadamard变换后再绝对误差求和)
      current_intra_sad_2 = currSlice->distI16x16(currMB, p_Vid->pCurImg, curr_mpr_16x16[k], best_intra_sad2);
      if (currSlice->P444_joined)
      {
        get_intrapred_16x16(currMB, PLANE_U, k, left_avail, up_avail);
        current_intra_sad_2 += currSlice->distI16x16(currMB, p_Vid->pImgOrg[1], currSlice->mpr_16x16[1][k], best_intra_sad2);
        get_intrapred_16x16(currMB, PLANE_V, k, left_avail, up_avail);
        current_intra_sad_2 += currSlice->distI16x16(currMB, p_Vid->pImgOrg[2], currSlice->mpr_16x16[2][k], best_intra_sad2);
      }
    
      //找到SAD(绝对误差和)值最小的预测模式即为最佳模式
      if (current_intra_sad_2 < best_intra_sad2)
      {
        best_intra_sad2 = current_intra_sad_2;
        currMB->i16mode = (char) k; // update best intra mode
      }
    }
  }

  return best_intra_sad2;
}

该函数在intra16x16.c中，与Intra4x4的相应函数相似，但这里主要用的是SAD作为最佳模式的衡量值（当然，如果有特别设置的情况下也是可以使用RDO作为度量的），但实际计算的一般是SATD，这点没有深究。