一个Python和C性能对比的好例子
me09
|
1#
me09 发表于 2006-06-20 16:55
一个Python和C性能对比的好例子VCKbase的论坛上有人提了一个问题: 遇到的问题是,我把E-mail地址收集在txt文件中(每行一个E-mail地址,共78000行) ,要求是把其中重复的E-mail地址全部去掉。考虑编程方便,我用了CString类,但似乎 效率不高。我写出来的整理78000份E-mail地址的需要近7分钟的时间 我给他用Python实现了一个(这个版本是修改过的,当时我随手实现的一个版本做错了,呵呵) 这里算法的主要内容就是利用hash_table来判断email是否已经重复出现,这比字符串的查找要快的多。 #remove duplicated email address from file import datetime if __name__ == "__main__": t = datetime.datetime(2000,1,1) print str(t.today()) hashtable = {} f = file("email.txt","r") f2 = file("email_new.txt","w") line = f.readline(); while len(line)>0: if not hashtable.has_key(line): hashtable[line] = 1 f2.write(line) line = f.readline(); f.close() f2.close() t2 = datetime.datetime(2000,1,1) print str(t2.today()) 上面的算法在我的机器上:P4 2.8G,512M 内存,耗时大约300ms,跟他的7分钟差了天了。 而我当时错误的算法,需要2分半钟,所以我贴到我们内部的BBS上,我的一个同学实现了如下算法(C): #include #include #include #include "list.h" #define HASH_SIZE 262157 #define LINE_MAX_LEN 1024 struct list_node { struct list_head head; int len; char *str; }; struct list_head hash_table[HASH_SIZE]; static inline int hash_string (const char *ptr, int len) { unsigned int hash = 0; for (hash = 0; len; len--, ptr++) { /* (31 * hash) will probably be optimized to ((hash len == len) && (memcmp(node->str, str, len) == 0)); } int main(int argc, char **argv) { int i, hash, len; struct list_node *node; char buf[LINE_MAX_LEN]; FILE *fp_in, *fp_out; if(argc != 3) { printf("error: \nplease use eml as:\n eml infile outfile\n"); return -1; } fp_in = fopen(argv[1], "r"); if(!fp_in) { printf("error: could not open infile %s\n", argv[1]); return -1; } fp_out = fopen(argv[2], "w"); if(!fp_out) { printf("error: could not open infile %s\n", argv[2]); return -1; } for(i=0; istr = (char *)malloc(len + 1); node->len = len; memcpy(node->str, buf, len); node->str[len] = '\0'; list_append(&(hash_table[hash]), node); } } return 0; } 还有.h文件: #ifndef _LINUX_LIST_H #define _LINUX_LIST_H extern inline void prefetch(const void *x) { return; } /* * Simple doubly linked list implementation. * * Some of the internal functions ("__xxx") are useful when * manipulating whole lists rather than single entries, as * sometimes we already know the next/prev entries and we can * generate better code by using them directly rather than * using the generic single-entry routines. */ struct list_head { struct list_head *next, *prev; }; #define LIST_HEAD_INIT(name) { &(name), &(name) } #define LIST_HEAD(name) \ struct list_head name = LIST_HEAD_INIT(name) #define INIT_LIST_HEAD(ptr) do { \ (ptr)->next = (ptr); (ptr)->prev = (ptr); \ } while (0) /* * Insert a new entry between two known consecutive entries. * * This is only for internal list manipulation where we know * the prev/next entries already! */ static inline void __list_add(struct list_head *new, struct list_head *prev, struct list_head *next) { next->prev = new; new->next = next; new->prev = prev; prev->next = new; } /** * list_add - add a new entry * @new: new entry to be added * @head: list head to add it after * * Insert a new entry after the specified head. * This is good for implementing stacks. */ static inline void list_add(struct list_head *new, struct list_head *head) { __list_add(new, head, head->next); } /** * list_add_tail - add a new entry * @new: new entry to be added * @head: list head to add it before * * Insert a new entry before the specified head. * This is useful for implementing queues. */ static inline void list_add_tail(struct list_head *new, struct list_head *head) { __list_add(new, head->prev, head); } /* * Delete a list entry by making the prev/next entries * point to each other. * * This is only for internal list manipulation where we know * the prev/next entries already! */ static inline void __list_del(struct list_head *prev, struct list_head *next) { next->prev = prev; prev->next = next; } /** * list_del - deletes entry from list. * @entry: the element to delete from the list. * Note: list_empty on entry does not return true after this, the entry is in an undefined state. */ static inline void list_del(struct list_head *entry) { __list_del(entry->prev, entry->next); entry->next = (void *) 0; entry->prev = (void *) 0; } /** * list_del_init - deletes entry from list and reinitialize it. * @entry: the element to delete from the list. */ static inline void list_del_init(struct list_head *entry) { __list_del(entry->prev, entry->next); INIT_LIST_HEAD(entry); } /** * list_move - delete from one list and add as another's head * @list: the entry to move * @head: the head that will precede our entry */ static inline void list_move(struct list_head *list, struct list_head *head) { __list_del(list->prev, list->next); list_add(list, head); } /** * list_move_tail - delete from one list and add as another's tail * @list: the entry to move * @head: the head that will follow our entry */ static inline void list_move_tail(struct list_head *list, struct list_head *head) { __list_del(list->prev, list->next); list_add_tail(list, head); } /** * list_empty - tests whether a list is empty * @head: the list to test. */ static inline int list_empty(struct list_head *head) { return head->next == head; } static inline void __list_splice(struct list_head *list, struct list_head *head) { struct list_head *first = list->next; struct list_head *last = list->prev; struct list_head *at = head->next; first->prev = head; head->next = first; last->next = at; at->prev = last; } /** * list_splice - join two lists * @list: the new list to add. * @head: the place to add it in the first list. */ static inline void list_splice(struct list_head *list, struct list_head *head) { if (!list_empty(list)) __list_splice(list, head); } /** * list_splice_init - join two lists and reinitialise the emptied list. * @list: the new list to add. * @head: the place to add it in the first list. * * The list at @list is reinitialised */ static inline void list_splice_init(struct list_head *list, struct list_head *head) { if (!list_empty(list)) { __list_splice(list, head); INIT_LIST_HEAD(list); } } /** * list_entry - get the struct for this entry * @ptr: the &struct list_head pointer. * @type: the type of the struct this is embedded in. * @member: the name of the list_struct within the struct. */ #define list_entry(ptr, type, member) \ ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) /** * list_for_each - iterate over a list * @pos: the &struct list_head to use as a loop counter. * @head: the head for your list. */ #define list_for_each(pos, head) \ for (pos = (head)->next, prefetch(pos->next); pos != (head); \ pos = pos->next, prefetch(pos->next)) /** * list_for_each_safe - iterate over a list safe against removal of list entry * @pos: the &struct list_head to use as a loop counter. * @n: another &struct list_head to use as temporary storage * @head: the head for your list. */ #define list_for_each_safe(pos, n, head) \ for (pos = (head)->next, n = pos->next; pos != (head); \ pos = n, n = pos->next) /** * list_for_each_entry - iterate over list of given type * @pos: the type * to use as a loop counter. * @head: the head for your list. * @member: the name of the list_struct within the struct. */ #define list_for_each_entry(pos, head, member) \ for (pos = list_entry((head)->next, typeof(*pos), member), \ prefetch(pos->member.next); \ &pos->member != (head); \ pos = list_entry(pos->member.next, typeof(*pos), member), \ prefetch(pos->member.next)) /*this is the begin of listhelp ;-)*/ #define LIST_FIND(head, cmpfn, type, args...) \ ({ \ const struct list_head *__i = (head); \ \ do { \ __i = __i->next; \ if (__i == (head)) { \ __i = NULL; \ break; \ } \ } while (!cmpfn((const type)__i , ## args)); \ (type)__i; \ }) #define LIST_FIND_W(head, cmpfn, type, args...) \ ({ \ const struct list_head *__i = (head); \ \ do { \ __i = __i->next; \ if (__i == (head)) { \ __i = NULL; \ break; \ } \ } while (!cmpfn((type)__i , ## args)); \ (type)__i; \ }) static inline int __list_cmp_same(const void *p1, const void *p2) { return p1 == p2; } /* Is this entry in the list? */ static inline int list_inlist(struct list_head *head, const void *entry) { return LIST_FIND(head, __list_cmp_same, void *, entry) !=NULL; } /* Delete from list. */ #ifdef CONFIG_NETFILTER_DEBUG #define LIST_DELETE(head, oldentry) \ do { \ if (!list_inlist(head, oldentry)) \ printk("LIST_DELETE: %s:%u `%s'(%p) not in %s.\n", \ __FILE__, __LINE__, #oldentry, oldentry, #head); \ else list_del((struct list_head *)oldentry); \ } while(0) #else #define LIST_DELETE(head, oldentry) list_del((struct list_head *)oldentry) #endif /* Append. */ static inline void list_append(struct list_head *head, void *new) { list_add((new), (head)->prev); } /* Prepend. */ static inline void list_prepend(struct list_head *head, void *new) { list_add(new, head); } /* Insert according to ordering function; insert before first true. */ #define LIST_INSERT(head, new, cmpfn) \ do { \ struct list_head *__i; \ for (__i = (head)->next; \ !cmpfn((new), (typeof (new))__i) && __i != (head); \ __i = __i->next); \ list_add((struct list_head *)(new), __i->prev); \ } while(0) /* If the field after the list_head is a nul-terminated string, you can use these functions. */ static inline int __list_cmp_name(const void *i, const char *name) { return strcmp(name, i + sizeof (struct list_head)) == 0; } static inline int __list_cmp_nameptr(const void *i, const char *name) { return strcmp(name, ((char *) *(unsigned long *) (i + sizeof (struct list_head)))) == 0; } /* Returns false if same name already in list, otherwise does insert. */ static inline int list_named_insert(struct list_head *head, void *new) { if (LIST_FIND(head, __list_cmp_name, void *, new + sizeof (struct list_head))) return 0; list_prepend(head, new); return 1; } static inline int list_nameptr_insert(struct list_head *head, void *new) { if (LIST_FIND(head, __list_cmp_nameptr, void *, new + sizeof (struct list_head))) return 0; list_prepend(head, new); return 1; } /* Find this named element in the list. */ #define list_named_find(head, name) \ LIST_FIND(head, __list_cmp_name, void *, name) #define list_nameptr_find(head, name) \ LIST_FIND(head, __list_cmp_nameptr, void *, name) #endif 在我的机器上测试(cygwin):耗时竟然是400ms! 当然,这里包含了Cygwin的间接调用开销。 很难想象吧,Python在这里的性能竟然跟C不相上下!而且Python的代码比C不知道短了多少倍! 所以讲,Python是蛮好用的,:-). 不过,这个例子其实是个特殊情况,以前我们玩过的另外一个程序,C的性能就比Python快得多。不过代码量还是Python简短得多。 有兴趣的朋友可以用自己熟悉的语言来测试一下,下面是测试文件(也是我用11行python代码产生的,有1/10的email是重复的): 这其实是一个rar,下载后请修改扩展名 |