граббер для devki.ws =))

Discussion in 'С/С++, C#, Rust, Swift, Go, Java, Perl, Ruby' started by sn0w, 3 Oct 2009.

  1. sn0w

    sn0w Статус пользователя:

    Joined:
    26 Jul 2005
    Messages:
    1,021
    Likes Received:
    1,200
    Reputations:
    327
    как сказать то...)) ценителям женской красоты посвящается)

    скачивает картинки) сразу говорю - НЕ ПОРНОГРАФИЯ
    фотки хорошего качества, так что не особо быстро льются.

    exe тут - http://www.rapidshare.ru/1196291

    Code:
    /*
    *	devki.ws image grabber by sn0w (c) 2009, for education purposes only =)
    *				antichat.ru
    *		getitshot v 1.0
    */
    
    
    #include <stdio.h>
    #include <conio.h>
    #include <windows.h>
    #include <wininet.h>
    #pragma comment(lib,"wininet")
    
    
    BOOL g_bCreateFolders;
    //////////////////////////////////////////////////////////////////////////
    void* halloc(size_t size)
    {
    	return HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, size);
    }
    
    void* hrealloc(void* oldmem, size_t size)
    {
    	return HeapReAlloc(GetProcessHeap(), 0, oldmem, size);
    }
    
    size_t hsize(void *mem)
    {
    	return HeapSize(GetProcessHeap(),0, mem);
    }
    
    void hfree(void* mem)
    {
    	HeapFree(GetProcessHeap(),0,mem);
    }
    
    /////////////////////////////////////////////////////////////////////////////
    LPVOID Inet_GET(IN LPSTR host, IN LPSTR request, OUT LPDWORD retlen, IN BOOL KeepConnection)
    {
    	LPCSTR szAccept[] = {"*/*", NULL};
    	LPCSTR szUserAgent = "Mozilla/5.0 (Windows; U; Windows NT 6.1; ru; rv:1.9.1.3) Gecko/20090824 Firefox/3.5.3";
    
    	*retlen = 0;
    
    	HINTERNET hInternet = InternetOpen(szUserAgent, INTERNET_OPEN_TYPE_DIRECT, NULL, NULL,0);
    
    	//InternetSetPerSiteCookieDecision(host, COOKIE_STATE_ACCEPT);
    
    	DWORD flags = INTERNET_FLAG_RELOAD | INTERNET_FLAG_PRAGMA_NOCACHE;
    	if(KeepConnection) flags |= INTERNET_FLAG_KEEP_CONNECTION;
    	HINTERNET hConnect = InternetConnect(hInternet, host, 80, NULL, NULL, INTERNET_SERVICE_HTTP, 0, 0);
    	HINTERNET hRequest = HttpOpenRequest(hConnect, "GET", request, NULL, NULL, szAccept, flags, 0);
    
    	HttpSendRequest(hRequest, 0, 0, NULL, 0);
    	
    	LPVOID	outbuff = halloc(4096);
    	LPVOID	p = outbuff;
    
    	DWORD size = 0, curr = 0;
    
    	while(InternetReadFile(hRequest, p, 4096, &curr)==TRUE && curr !=0){
    		size += curr;
    		outbuff = hrealloc(outbuff, size + 4096);
    		p = (LPVOID)((DWORD)outbuff+size);
    	}
    
    	InternetCloseHandle(hRequest);
    	InternetCloseHandle(hConnect);
    	InternetCloseHandle(hInternet);
    
    	*retlen = size;
    	return outbuff;
    }
    //////////////////////////////////////////////////////////////////////////
    LPVOID Get_Image(char *url, DWORD *len)
    {
    	return Inet_GET("pix01.devki.ws", url, len, 0);
    }
    
    //////////////////////////////////////////////////////////////////////////
    void parse_page(char *page, int len)
    {
    	
    	if(!len) return;
    
    	// string to find:    "XXXXXXXXXXXXXXXXX_slideshow.html">
    	char *ptr;
    	char fulllink[256], lastlink[256];
    	int linklen;
    
    	memset(fulllink, 0, sizeof(fulllink));
    	memset(lastlink, 0, sizeof(lastlink));
    
    	ptr = strstr(page, "_slideshow.html\"");
    	if(!ptr) return;
    
    	FILE *pf = fopen("pagelinks.txt", "a+");
    
    	do{
    		linklen = 0;
    		
    		while(*--ptr!='\"')	linklen++;
    		ptr++;
    		linklen+=15;
    			
    		memset(fulllink, 0, sizeof(fulllink));
    		strncpy(fulllink, ptr, linklen);
    		ptr += linklen;
    
    		if(strcmp(lastlink,fulllink))
    			fprintf(pf, "%s\n", fulllink);
    
    		strcpy(lastlink, fulllink);
    		
    	}while(ptr = strstr(ptr, "_slideshow.html\""));
    
    	fclose(pf);
    
    }
    
    //////////////////////////////////////////////////////////////////////////
    
    void string_replace(char *str, char *substr, char *with)
    {
    	//str = aaaaabbbbbccccc
    	//substr = aabb
    	//with = XYX
    	char *copy = (char*)halloc(strlen(str));
    	
    	for(int i=0,y=0; i < strlen(str); i++, y++)
    		if(!strncmp(&str[i], substr,strlen(substr))){
    			strncpy(&copy[y], with, strlen(with));
    			y+=strlen(with)-1;
    			i+=strlen(substr)-1;
    		}else{
    			copy[y]=str[i];
    		}
    	strcpy(str, copy);
    	hfree(copy);
    }
    
    
    //////////////////////////////////////////////////////////////////////////
    void process_single_entry(char *s_entry)
    {
    	// terminate 0d/0a with 0
    	char *p = s_entry;
    	while(*p++) if(*p=='\x0d' || *p=='\x0a') *p = 0;
    
    	// now download it
    	LPVOID	page;
    	DWORD	page_size;
    	char	req_tmpl[] = "/%s";
    	char	req[256];
    	int		url_length;
    	char	file_path[256], directory[256];
    	
    	static int nNumberImage = 0;
    	static int nNumberGallery = 0;
    
    	nNumberGallery++;
    
    	printf("downloading is in progress, gallery #%d (ETA unavailable)...\n", nNumberGallery);
    	
    	sprintf_s(req, req_tmpl, s_entry);
    	page = Inet_GET("devki.ws", req, &page_size, 0);
    
    	
    	if(g_bCreateFolders){
    		GetCurrentDirectory(sizeof(file_path), file_path);
    		sprintf_s(directory, "%s\\%.5d", file_path, nNumberGallery);
    		CreateDirectory(directory, 0);
    	}
    
    	// check the page
    	// <p><div class="sandbox"><a href="brunettes_g6305_slideshow.html">
    	// <script language="JavaScript">
    	// document.write(decodeURIComponent("%3Cimg%20src%3D%22http%3A%2F%2Fpix01.devki.ws%2F560b1152%2F6302%2F137.jpg%22%20border%3D%220%22%20alt%3D%22Evelyn%20Lory%2C%D0%BF%D0%BE%D1%80%D0%BD%D0%BE%20%D0%B2%D0%B8%D0%B4%D0%B5%D0%BE%20%D1%81%D0%BE%20%D0%B7%D0%B2%D0%B5%D0%B7%D0%B4%D0%B0%D0%BC%D0%B8%22%20%2F%3E"));
    	// </script></a></div></p>
    	while(TRUE){
    
    		char *sandbox_str, *pnext_page_url;
    		char next_page_url[256];
    
    		sandbox_str = strstr((char*)page, "sandbox\"><a href=\"");
    		memset(next_page_url,0, sizeof(next_page_url));
    
    		// no sandbox tag, abort operation
    		if(!sandbox_str){
    			hfree(page);
    			nNumberImage = 0;
    			return;
    		}
    
    		pnext_page_url = sandbox_str + 18;
    
    		// fillup next_page_url
    		char *pnpage = next_page_url;
    		while(*pnext_page_url!='\"')
    			*pnpage++=*pnext_page_url++;
    
    		// the next url is from the different gallery, abort operation
    		if(strncmp(s_entry, next_page_url , strlen(s_entry)-20)){
    			hfree(page);
    			nNumberImage = 0;
    			return;
    		}
    		
    		char *image_url = strstr((char*)page, "pix01.devki.ws");
    
    		if(!image_url){
    			hfree(page);
    			nNumberImage = 0;
    			return;
    		}
    
    		image_url+=14;
    
    		char *url_end = strstr(image_url, ".jpg");
    		char current_image_url[256];
    
    		url_end += 4;
    		
    		memset(current_image_url, 0, sizeof(current_image_url));
    		strncpy(current_image_url, image_url, url_end - image_url);
    	
    		string_replace(current_image_url, "%2F", "/");
    		hfree(page);
    
    		// got it.
    		// next page location: next_page_url
    		// current image link: current_image_url
    		
    		nNumberImage++;
    
    		DWORD jpeglen;
    		LPVOID jpegdata;
    		HANDLE hFile;
    		char file_name[256];
    		
    		if(g_bCreateFolders){
    			sprintf_s(file_name, "%s\\%.8d_%.4d.jpg", directory, nNumberGallery, nNumberImage);
    		}else{
    			sprintf_s(file_name, "%.8d_%.4d.jpg", nNumberGallery, nNumberImage);
    		}
    
    		printf("\tdownloading image #%d...\r", nNumberImage);
    		jpegdata = Get_Image(current_image_url, &jpeglen);
    
    		hFile = CreateFile(file_name, GENERIC_WRITE,FILE_SHARE_WRITE,0,CREATE_ALWAYS,0,0);
    		WriteFile(hFile, jpegdata, jpeglen, &jpeglen, 0);
    		CloseHandle(hFile);
    		hfree(jpegdata);
    
    		page = Inet_GET("devki.ws", next_page_url, &page_size, 0);
    	}
    	
    	// this place is unreachable %)
    
    }
    
    
    //////////////////////////////////////////////////////////////////////////
    int main(int argc, char **argv)
    {
    	DWORD len;
    	LPVOID buff;
    	HANDLE hFile;
    
    	printf("            -= devki.ws grabber by sn0w ;) =-\n\n");
    
    	char s_page_tmpl[] ="/engine.php?mod=galleries&act=list&category=53&page=%d";
    	char s_page[256];
    
    	
    	// 1. Accept cookies etc
    	printf("logging on devki.ws:80...\n");
    	buff = Inet_GET("devki.ws", "/", &len,0);
    	hfree(buff);
    	if(!len){
    		printf("connection failed. press any key to exit\n");
    		goto end_prog;
    	}
    
    
    	char retch;
    	printf("press Y if you want to create directory for each gallery\n");
    	retch = getch();
    	g_bCreateFolders = FALSE;
    	if(retch == 'Y' || retch == 'y')
    		g_bCreateFolders = TRUE;
    
    	DeleteFile("pagelinks.txt");
    
    	// 2. Get page 1-25 and parse links for the slide shows (its a better way to view them step by step)
    	// to file pagelinks.txt
    	for(int i=1; i<=25; i++){
    		printf("\tparsing page %d...\r",i);
    		sprintf_s(s_page, s_page_tmpl, i);
    		buff = Inet_GET("devki.ws", s_page, &len,0);
    		parse_page((char*)buff, len);	
    		hfree(buff);
    	}
    	
    	printf("\n");
    
    	// 3. Now start deep parsing for the each gallery
    	FILE *pf = fopen("pagelinks.txt", "r");
    	char page_str[256];
    
    	while(!feof(pf)){
    		fgets(page_str, sizeof(page_str), pf);
    		process_single_entry(page_str);
    	}
    	fclose(pf);
    
    
    	// All done.
    	printf("\nok.");
    end_prog:
    	_getch();
    	
    return 0;
    }
    
    
    а блин баг нашел - последнюю фотку из галереи каждой не заливает.)
     
    #1 sn0w, 3 Oct 2009
    Last edited: 3 Oct 2009
    5 people like this.
  2. superboy4

    superboy4 Banned

    Joined:
    17 Jul 2007
    Messages:
    151
    Likes Received:
    19
    Reputations:
    -8
    спасибо, хвалю за знания в си
     
  3. Ins3t

    Ins3t Харьковчанин

    Joined:
    18 Jul 2009
    Messages:
    939
    Likes Received:
    429
    Reputations:
    139
    Снег, ты бы лутше порнокачалку подновил, а то народ жалуется на неработоспособность :D
     
  4. sn0w

    sn0w Статус пользователя:

    Joined:
    26 Jul 2005
    Messages:
    1,021
    Likes Received:
    1,200
    Reputations:
    327
    так аналог) тока более эстетичный)))))
     
    1 person likes this.
  5. Dark_Scorpicore

    Joined:
    4 Apr 2009
    Messages:
    53
    Likes Received:
    8
    Reputations:
    0
    Благодарю за хороший пример :)
    Очень помогло (не в скачивании прона, а в изучении си)
    xD
     
  6. АлексDevil

    Joined:
    5 Apr 2010
    Messages:
    10
    Likes Received:
    3
    Reputations:
    0
    сайт больше не работает
     
  7. АлексDevil

    Joined:
    5 Apr 2010
    Messages:
    10
    Likes Received:
    3
    Reputations:
    0
    кто знаете этот они брали сеты?