[go: up one dir, main page]

File: dedupe.c

package info (click to toggle)
suck 3.10.1-3
  • links: PTS
  • area: main
  • in suites: slink
  • size: 728 kB
  • ctags: 727
  • sloc: ansic: 8,357; sh: 585; makefile: 212; java: 144; perl: 24
file content (125 lines) | stat: -rw-r--r-- 3,565 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#include <config.h>

#include <stdio.h>
#include <stdlib.h>

#include "suck_config.h"
#include "suck.h"
#include "both.h"
#include "dedupe.h"
#include "suckutils.h"
#include "phrases.h"
#include "timer.h"

/* this is almost an exact duplicate of the chkhistory() routine */
/*-------------------------------------------------------------------------------------------------------*/
/* We'll build an array of pointers to the linked list, based on a character of the article id		 */
/* then we only have to check the article ids that match the character, saving searching thru the list	 */
/*-------------------------------------------------------------------------------------------------------*/

#define NR_LETTERS 128		/* nr of chars indexed, only 128 since rfc doesn't support > 128 */
#define LETTER_TO_CHECK	2	/* which letter to check in MSG_ID */

void dedupe_list(PMaster master) {
	
	int i, j, x, nrfound = 0;
	PList curr, prev, *lets;
	long tlet[NR_LETTERS];
	struct {
		long nr;
		long start;
	} letters[NR_LETTERS];	

	print_phrases(master->msgs, dedupe_phrases[0], NULL);
	fflush(master->msgs);	/* so msg gets printed */

	TimerFunc(TIMER_START, 0L, NULL);

	/* pass one */
	/* count the letters, so we can build the array and also index it */
		
	/* initialize the arrays */
	for(i=0;i<NR_LETTERS;i++) {
		letters[i].nr = letters[i].start = tlet[i] = 0;
	}
	curr = master->head;
	while(curr != NULL) {
		letters[((curr->msgnr[LETTER_TO_CHECK]) % NR_LETTERS)].nr++;
		/* the % is in case a wacko article id slips by */
		curr = curr->next;
	}
	/* now build the array with the starting points for each */
	/* nr 0 starts on zero so skip it */
	for(i=1;i<NR_LETTERS;i++) {
		letters[i].start = letters[i-1].start + letters[i-1].nr;
	}
	
	/* pass two */
	/* first malloc the array */
	if ((lets = (PList *) calloc(master->nritems, sizeof(PList))) == NULL) {
		error_log(ERRLOG_REPORT, dedupe_phrases[1], NULL);
	}
	else {
		/* now put them in place */
		curr = master->head;
		while(curr != NULL) {
			j = (curr->msgnr[LETTER_TO_CHECK]) % NR_LETTERS;
			i = letters[j].start + tlet[j];
			tlet[j]++;	/* so go to next slot */
			lets[i] = curr;
			curr = curr->next;
		}

		/* pass three */
		/* now we can work our way down the linked list */
		/* and check against letter only */
		curr = master->head;
		while(curr != NULL) {
			j = (curr->msgnr[LETTER_TO_CHECK]) % NR_LETTERS;
			for(i=0;i<letters[j].nr;i++) {
				x = letters[j].start+i;
				/* need the curr != lets[x] so that we don't delete ourself */
				if(lets[x] != NULL && curr != lets[x] && cmp_msgid(curr->msgnr, lets[x]->msgnr)==TRUE) {
					nrfound++;
					if(master->debug == TRUE) {
						do_debug("Matched %s, nuking\n", curr->msgnr);
					}
					/* now flag it for deletion */
					lets[x]->msgnr[0] = '\0';	/* no more article nr */
					lets[x] = NULL;			/* so don't check it again */
				}
			}
			curr = curr->next;
		}
		/* pass four */
		/* now go thru and delete em */
		curr = master->head;
		prev = NULL;
		while(curr != NULL) {			
			if( curr->msgnr[0] == '\0') {
				/* nuke it */
				master->nritems--;
				if(prev == NULL) {
					/* remove master node */
					master->head = curr->next;
					free_one_node(curr);
					curr = master->head;
				}
				else {
					prev->next = curr->next;
					free_one_node(curr);
					curr = prev->next;
				}
			}
			else {
				prev = curr;
				curr = curr->next;
			}
		}
		free(lets);
	}

	TimerFunc(TIMER_TIMEONLY, 0l, master->msgs);

	print_phrases(master->msgs, dedupe_phrases[2], str_int(master->nritems),  str_int(nrfound), NULL);
}