C++编程之利用关联容器map统计Aapche访问日志

C++语言之关联容器map提供大小可变的容器,基于关联键值高效检索元素值。当你处理键值对的数据是,都可以考虑使用map关联容器。譬如统计日志中的IP信息,用IP字符串做键,访问次数作为值。

一、关联容器map特点
大小可变的关联容器,基于关联键值高效检索元素值。
可逆,因为它提供双向迭代器来访问其元素。
有序,因为它的元素根据指定的比较函数按键值排序。
唯一。 因为它的每个元素必须具有唯一键。
关联容器对,因为它的元素数据值与其键值不同。
模板类,因为它提供的功能是一般性的功能,与元素或键类型无关。用于元素和键的数据类型作为类模板以及比较函数和分配器中的参数指定。

二、实现代码

#include <iostream>
#include <algorithm>
#include <vector>
#include <string>
#include <map>
#include <fstream>
#include <sstream>

using namespace std;

// 结点定义
struct node
{
	string data;
	int count;
};

// 分割字符串函数
void split(const string &str, vector<string> &vec, const char &c)
{
    stringstream ss(str);
    string sub;
    while (getline(ss, sub, c))
    {
        vec.push_back(sub);
    }
}

// 对vector排序,然后打印结果
void sort_and_print(vector<node> &vec)
{
	sort(vec.begin(), vec.end(), 
		[](const node &a, const node &b){return a.count > b.count;});

	for(vector<node>::size_type i=0; i<10 && i<vec.size(); i++)
	{
		cout << vec[i].count << ":\t" << vec[i].data << endl;
	}
}

// 插入关联容器
void add_item(map<string, size_t> &map, string str)
{
	++map[str];
}

// 关联容器数据导入vector中,方便下一步排序
void print_map_to_vector(map<string, size_t> &m, vector<node> &vec)
{
	node *p = new node();
	for(const auto &item : m)
	{
		p->data = item.first;
		p->count = item.second;
		vec.push_back(*p);
	}
	delete p;
}

int main(void)
{
	ifstream fin("access_2018-03-29.log");
	map<string, size_t> ip_map, url_map, referer_map, user_agent_map;
	vector<node> ip_vec, url_vec, referer_vec, user_agent_vec;
	string ip, url, referer, user_agent;
	vector<string> vec;
	string line;

	// 按行处理字符串
	while(getline(fin, line))
	{
		vec.clear();
		split(line, vec, '\"');

		string::size_type pos1, pos2;

		// 处理IP字符串
		pos1 = 0;
		pos2 = vec[0].find(' ', pos1);
		ip = vec[0].substr(pos1, pos2-pos1);
		add_item(ip_map, ip);

		// 处理访问链接字符串
		pos1 = vec[1].find(' ') + 1;
		pos2 = vec[1].find(' ', pos1);
		url = vec[1].substr(pos1, pos2-pos1);
		add_item(url_map, url);

		// 处理来路字符串
		referer = vec[3];
		add_item(referer_map, referer);

		// 处理浏览器字符串
		user_agent = vec[5];
		add_item(user_agent_map, user_agent);
	}
	fin.close();

	// 关联容器导入到vector中,为排序做准备
	print_map_to_vector(ip_map, ip_vec);
	print_map_to_vector(url_map, url_vec);
	print_map_to_vector(referer_map, referer_vec);
	print_map_to_vector(user_agent_map, user_agent_vec);
	
	// 输入打印结果
	cout << "访问IP统计:" << endl;
	sort_and_print(ip_vec);

	cout << "访问页面统计:" << endl;
	sort_and_print(url_vec);

	cout << "来路统计:" << endl;
	sort_and_print(referer_vec);

	cout << "浏览器统计:" << endl;
	sort_and_print(user_agent_vec);

	return 0;
}

三、运行结果

[ycxie@fedora Workspace]$ g++ stat_map.cpp -o stat_map -Wall
[ycxie@fedora Workspace]$ ./stat_map
访问IP统计:
39580:	116.196.82.133
28216:	123.11.117.71
22028:	106.14.242.113
21915:	112.67.103.4
13640:	47.98.60.66
7929:	222.84.60.132
7341:	61.231.167.236
6326:	106.120.173.135
6281:	117.86.200.253
5804:	1.27.91.109
访问页面统计:
70630:	/
39580:	/thread-7247526-1-1.html
27991:	/thread-7247526-1-1.html
26750:	/forum-6-1.html
26241:	/forum.php
23534:	/thread-7217850-1-1.html
23455:	/thread-7204090-1-1.html
23305:	/static/js/forum_viewthread.js
18918:	/thread-7223719-1-1.html
18365:	/
来路统计:
507630:	http://www.xieyincai.com/thread-7217850-1-1.html
307575:	-
96905:	http://www.xieyincai.com/
47408:	http://www.xieyincai.com/data/cache/style_1_common.css
39687:	http://www.xieyincai.com/thread-7223996-1-1.html
38359:	http://www.xieyincai.com/forum.php
13182:	http://www.xieyincai.com/data/cache/style_1_forum_viewthread.css
11718:	http://www.xieyincai.com/member.php?mod=register
9134:	http://www.xieyincai.com/forum-31-1.html
7058:	http://www.xieyincai.com/forum-20-1.html
浏览器统计:
47130:	Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.109 Safari/537.36
39580:	Mozilla/5.0 (Windows NT 6.1; WOW64; rv:55.0) Gecko/20100101 Firefox/55.0
38635:	Mozilla/5.0 (Windows NT 6.1; WOW64; rv:55.0) Gecko/20100101 Firefox/55.0
33372:	Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko
30989:	Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36
30160:	Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022)
28200:	Mozilla/4.0 (compatible; MSIE 10.0; Windows NT 6.1; LBBROWSER)
27272:	Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)
24261:	Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.109 Safari/537.36
21754:	Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36

Leave a Reply

Your email address will not be published. Required fields are marked *