📜  程序计算给定语法的第一套和第二套

📅  最后修改于: 2021-05-28 05:05:13             🧑  作者: Mango

在继续进行之前,强烈建议您熟悉语法分析,LL(1)解析的基础知识以及计算语法的第一套和第二套的规则。

  1. 语法分析导论
  2. 为什么要先跟随?
  3. 语法分析中的第一组
  4. 语法分析中的跟随集

假设读者熟悉上面讨论的基础知识,让我们开始讨论如何实现C程序来计算给定语法的第一和第二。

例子 :

Input :
E  -> TR
R  -> +T R| #
T  -> F Y
Y  -> *F Y | #
F  -> (E) | i


Output :
 First(E)= { (, i, }
 First(R)= { +, #, }
 First(T)= { (, i, }
 First(Y)= { *, #, }
 First(F)= { (, i, }

-----------------------------------------------

 Follow(E) = { $, ),  }
 Follow(R) = { $, ),  }
 Follow(T) = { +, $, ),  }
 Follow(Y) = { +, $, ),  }
 Follow(F) = { *, +, $, ),  }

函数Follow和Followfirst都涉及给定非终端的跟踪集的计算。后面的开始符号集将始终包含“ $”。现在,“关注”的计算可分为以下三种情况:

  • 如果任何产品的RHS上的非终端紧随终端,则可以立即将其包含在该非终端的“跟随”集中。
  • 如果任何产品的RHS上的非终端紧随其后是非终端,则该新非终端的第一组将包括在我们原始非终端的后一组中。如果遇到epsilon,即“#”,则转到生产中的下一个符号。
    注意: “#”绝不会包含在任何非终端的“跟随”集中。
  • 如果在计算跟随次数时达到了生产的末尾,则该非终端的“跟随”集将包括该生产的LHS上非终端的“跟随”集。这可以通过递归轻松实现。

假设:

  1. Epsilon以“#”表示。
  2. 产品的形式为A = B,其中“ A”是单个非终端,而“ B”可以是终端和非终端的任意组合。
  3. 第一条生产规则的LHS是开始符号。
  4. Grammer不是左递归的。
  5. 非终端的每个生产都在不同的行上输入。
  6. 只有大写字母是非终端字母,其他所有字母都是终端。
  7. 不使用 ‘!’或“ $”,因为它们被保留作特殊用途。

解释 :
将语法存储在2D字符数组productionfindfirst函数用于计算任何非终端中的第一个。两种情况下的first跌落计算:

  • 如果产品的RHS中的第一个符号是终端,则可以直接将其包含在第一组中。
  • 如果产品的RHS中的第一个符号是非终端,则在该非终端上再次调用findfirst函数。处理像递归这样的情况是最好的解决方案。同样,如果新的非端子的第一个包含epsilon,则我们必须移至原始产品的下一个符号,该符号可以再次是端子或非端子。

注意:对于第二种情况,即使代码看起来很完美,也很容易陷入无限循环。因此,重要的是要始终跟踪所有的函数调用,而永远不要再次调用同一函数。

下面是实现:

// C program to calculate the First and
// Follow sets of a given grammar
#include
#include
#include
  
// Functions to calculate Follow
void followfirst(char, int, int);
void follow(char c);
  
// Function to calculate First
void findfirst(char, int, int);
  
int count, n = 0;
  
// Stores the final result 
// of the First Sets
char calc_first[10][100];
  
// Stores the final result
// of the Follow Sets
char calc_follow[10][100];
int m = 0;
  
// Stores the production rules
char production[10][10];
char f[10], first[10];
int k;
char ck;
int e;
  
int main(int argc, char **argv)
{
    int jm = 0;
    int km = 0;
    int i, choice;
    char c, ch;
    count = 8;
      
    // The Input grammar
    strcpy(production[0], "E=TR");
    strcpy(production[1], "R=+TR");
    strcpy(production[2], "R=#");
    strcpy(production[3], "T=FY");
    strcpy(production[4], "Y=*FY");
    strcpy(production[5], "Y=#");
    strcpy(production[6], "F=(E)");
    strcpy(production[7], "F=i");
      
    int kay;
    char done[count];
    int ptr = -1;
      
    // Initializing the calc_first array
    for(k = 0; k < count; k++) {
        for(kay = 0; kay < 100; kay++) {
            calc_first[k][kay] = '!';
        }
    }
    int point1 = 0, point2, xxx;
      
    for(k = 0; k < count; k++)
    {
        c = production[k][0];
        point2 = 0;
        xxx = 0;
          
        // Checking if First of c has
        // already been calculated
        for(kay = 0; kay <= ptr; kay++)
            if(c == done[kay])
                xxx = 1;
                  
        if (xxx == 1)
            continue;
          
        // Function call    
        findfirst(c, 0, 0);
        ptr += 1;
          
        // Adding c to the calculated list
        done[ptr] = c;
        printf("\n First(%c) = { ", c);
        calc_first[point1][point2++] = c;
          
        // Printing the First Sets of the grammar
        for(i = 0 + jm; i < n; i++) {
            int lark = 0, chk = 0;
              
            for(lark = 0; lark < point2; lark++) {
                  
                if (first[i] == calc_first[point1][lark])
                {
                    chk = 1;
                    break;
                }
            }
            if(chk == 0)
            {
                printf("%c, ", first[i]);
                calc_first[point1][point2++] = first[i];
            }
        }
        printf("}\n");
        jm = n;
        point1++;
    }
    printf("\n");
    printf("-----------------------------------------------\n\n");
    char donee[count];
    ptr = -1;
      
    // Initializing the calc_follow array
    for(k = 0; k < count; k++) {
        for(kay = 0; kay < 100; kay++) {
            calc_follow[k][kay] = '!';
        }
    }
    point1 = 0;
    int land = 0;
    for(e = 0; e < count; e++)
    {
        ck = production[e][0];
        point2 = 0;
        xxx = 0;
          
        // Checking if Follow of ck
        // has alredy been calculated
        for(kay = 0; kay <= ptr; kay++)
            if(ck == donee[kay])
                xxx = 1;
                  
        if (xxx == 1)
            continue;
        land += 1;
          
        // Function call
        follow(ck);
        ptr += 1;
          
        // Adding ck to the calculated list
        donee[ptr] = ck;
        printf(" Follow(%c) = { ", ck);
        calc_follow[point1][point2++] = ck;
          
        // Printing the Follow Sets of the grammar
        for(i = 0 + km; i < m; i++) {
            int lark = 0, chk = 0;
            for(lark = 0; lark < point2; lark++) 
            {
                if (f[i] == calc_follow[point1][lark])
                {
                    chk = 1;
                    break;
                }
            }
            if(chk == 0)
            {
                printf("%c, ", f[i]);
                calc_follow[point1][point2++] = f[i];
            }
        }
        printf(" }\n\n");
        km = m;
        point1++; 
    }
}
  
void follow(char c)
{
    int i, j;
      
    // Adding "$" to the follow
    // set of the start symbol
    if(production[0][0] == c) {
        f[m++] = '$';
    }
    for(i = 0; i < 10; i++)
    {
        for(j = 2;j < 10; j++)
        {
            if(production[i][j] == c)
            {
                if(production[i][j+1] != '\0')
                {
                    // Calculate the first of the next
                    // Non-Terminal in the production
                    followfirst(production[i][j+1], i, (j+2));
                }
                  
                if(production[i][j+1]=='\0' && c!=production[i][0])
                {
                    // Calculate the follow of the Non-Terminal
                    // in the L.H.S. of the production
                    follow(production[i][0]);
                }
            } 
        }
    }
}
  
void findfirst(char c, int q1, int q2)
{
    int j;
      
    // The case where we 
    // encounter a Terminal
    if(!(isupper(c))) {
        first[n++] = c;
    }
    for(j = 0; j < count; j++)
    {
        if(production[j][0] == c)
        {
            if(production[j][2] == '#')
            {
                if(production[q1][q2] == '\0')
                    first[n++] = '#';
                else if(production[q1][q2] != '\0' 
                          && (q1 != 0 || q2 != 0))
                {
                    // Recursion to calculate First of New
                    // Non-Terminal we encounter after epsilon
                    findfirst(production[q1][q2], q1, (q2+1));
                }
                else
                    first[n++] = '#';
            }
            else if(!isupper(production[j][2]))
            {
                first[n++] = production[j][2];
            }
            else 
            {
                // Recursion to calculate First of
                // New Non-Terminal we encounter 
                // at the beginning
                findfirst(production[j][2], j, 3);
            }
        }
    } 
}
  
void followfirst(char c, int c1, int c2)
{
    int k;
      
    // The case where we encounter
    // a Terminal
    if(!(isupper(c)))
        f[m++] = c;
    else
    {
        int i = 0, j = 1;
        for(i = 0; i < count; i++)
        {
            if(calc_first[i][0] == c)
                break;
        }
          
        //Including the First set of the
        // Non-Terminal in the Follow of
        // the original query
        while(calc_first[i][j] != '!')
        {
            if(calc_first[i][j] != '#') 
            {
                f[m++] = calc_first[i][j];
            }
            else
            {
                if(production[c1][c2] == '\0')
                {
                    // Case where we reach the
                    // end of a production
                    follow(production[c1][0]);
                }
                else
                {
                    // Recursion to the next symbol
                    // in case we encounter a "#"
                    followfirst(production[c1][c2], c1, c2+1);
                }
            }
            j++;
        }
    }
}

输出 :

First(E)= { (, i, }
 First(R)= { +, #, }
 First(T)= { (, i, }
 First(Y)= { *, #, }
 First(F)= { (, i, }

-----------------------------------------------

 Follow(E) = { $, ),  }
 Follow(R) = { $, ),  }
 Follow(T) = { +, $, ),  }
 Follow(Y) = { +, $, ),  }
 Follow(F) = { *, +, $, ),  }

想要从精选的最佳视频中学习和练习问题,请查看《基础知识到高级C的C基础课程》。