vendredi 8 mai 2015

C linux regex performance issue

I'm creating a program that reads file line by line, match that line to regex and diplay how many lines has matched that regex. The problem is, this program is using pretty great precentage of CPU. 67.5% without valgrind and with valgrind 100.1% and it's very slow ~5 seconds for 84000 lines. And valgrind outputs below(input file is 84000 lines long).

Why is it using so much cpu ?.Why is it taking so long?. Is there any way to make it faster and use less memory, cpu? Thank you.

==10737== HEAP SUMMARY:
==10737==     in use at exit: 0 bytes in 0 blocks
==10737==   total heap usage: 42,200,387 allocs, 42,200,387 frees, 5,441,088,516 bytes allocated
==10737== 
==10737== All heap blocks were freed -- no leaks are possible
==10737== 
==10737== For counts of detected and suppressed errors, rerun with: -v
==10737== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 4 from 4)

Source code:

#include <stdio.h>
#include "stdlib.h"
#include <string.h>
#include <regex.h>

int check_regex(char* line);
void regex_test(const char* log_file);

int main(){
    regex_test("/var/log/syslog");
}

void regex_test(const char* log_file){
    printf("%s\n", log_file);
   FILE * fp;
   char * line = NULL;
   size_t len = 0;
   ssize_t read;
   int line_count=0;
   int match_count=0;
   fp = fopen(log_file, "r");
   if (fp == NULL)
       exit(EXIT_FAILURE);

   while ((read = getline(&line, &len, fp)) != -1) {
    // printf("%p\n", &line);
    if (check_regex(line))
    {
      match_count++;
    }else{
      printf("%s", line);
      printf("%d\n", line_count);
      // exit(0);
    }
    line_count++;
   }
   printf("%d/%d\n",match_count, line_count);
   fclose(fp);
   if (line)
       free(line);
}

int check_regex(char* line){
  regex_t regex;
  if (regcomp(&regex,"^(\\w+[ ]+[0-9]+ [0-9]+:[0-9]+:[0-9]+) [A-Za-z0-9-]+ [A-Za-z\\/]+\\[?[^]:]*\\]?: <?(\\w+)?>? ?(.+)$", REG_EXTENDED)) {
      printf("Could not compile regex\n");
      exit(1);
  }
  if (!regexec(&regex, line, 0, NULL, 0)) {
      // printf("Match\n");
      regfree(&regex);
      return 1;
  }
  else{
      // printf("No Match\n");
      regfree(&regex);
      return 0;
  }
}

Aucun commentaire:

Enregistrer un commentaire