matome.cs
using System;
using System.Data;
using System.IO;
using System.Text;
using System.Collections;
using System.Collections.Generic;
using System.Configuration;
using System.Text.RegularExpressions;
namespace matome {
class Matome {
private static string ANCHOR_PATTERN="";
private static string RES_PATTERN = "";
private static int logCount = 1;
private static Boolean TagMode = true;
private static Boolean AppendMode = true;
public static int Main(string[] args) {
Boolean ret = true;
Brave brave = new Brave();
if (args.Length < 1) {
string braveName = System.Configuration.ConfigurationSettings.AppSettings["BRAVE_NAME"].Trim();
if (braveName==null || braveName.Equals("")) {
Console.WriteLine("Usage : Matome 勇者名 ");
return 1;
}
brave.Name = braveName;
} else {
brave.Name = args[0];
}
ANCHOR_PATTERN=System.Configuration.ConfigurationSettings.AppSettings["ANCHOR_PATTERN"];
RES_PATTERN = System.Configuration.ConfigurationSettings.AppSettings["RES_PATTERN"];
TagMode = System.Configuration.ConfigurationSettings.AppSettings["TAG_MODE"].Equals("TRUE");
AppendMode = System.Configuration.ConfigurationSettings.AppSettings["APPEND_MODE"].Equals("TRUE");
for (int i = 0; i < int.Parse(ConfigurationSettings.AppSettings["BRAVE_TRIP_COUNT"]); i++) {
brave.Trips.Add(ConfigurationSettings.AppSettings["BRAVE_TRIP_" + i]);
}
for (int i = 0; i < int.Parse(ConfigurationSettings.AppSettings["BRAVE_ID_COUNT"]); i++) {
brave.IDs.Add(ConfigurationSettings.AppSettings["BRAVE_ID_" + i]);
}
for (int i = 0; i < int.Parse(ConfigurationSettings.AppSettings["NOT_BRAVE_TRIP_COUNT"]); i++) {
brave.DummyTrips.Add(ConfigurationSettings.AppSettings["NOT_BRAVE_TRIP_" + i]);
}
for (int i = 0; i < int.Parse(ConfigurationSettings.AppSettings["NOT_BRAVE_ID_COUNT"]); i++) {
brave.DummyIDs.Add(ConfigurationSettings.AppSettings["NOT_BRAVE_ID_" + i]);
}
string path = ConfigurationSettings.AppSettings["FOLDER_PATH"];
string targetPath = path;
string[] workDirectorys;
Boolean subFolderSearchMode = System.Configuration.ConfigurationSettings.AppSettings["FOLDER_SEARCH_MODE"].Equals("TRUE");
if(!Directory.Exists(targetPath)){
Console.WriteLine("指定されたパスが見つかりません :{0}",targetPath);
return 1;
}
if (subFolderSearchMode) {
workDirectorys = Directory.GetDirectories(targetPath, "*", SearchOption.AllDirectories);
} else {
workDirectorys = Directory.GetDirectories(targetPath);
}
Boolean idMode = System.Configuration.ConfigurationSettings.AppSettings["ID_MODE"].Equals("ALL");
Boolean logMode = System.Configuration.ConfigurationSettings.AppSettings["LOG_MODE"].Equals("TRUE");
string nameMode = System.Configuration.ConfigurationSettings.AppSettings["NAME_MODE"];
if (workDirectorys.Length > 0) {
for (int i = 0; ret && i < workDirectorys.Length; i++) {
if (!logMode) {
getLogByFolder(workDirectorys[i], brave, true, nameMode);
}
if (!idMode) {
getLogByFolder(workDirectorys[i], brave, false, nameMode);
}
}
if (idMode) {
for (int i = 0; ret && i < workDirectorys.Length; i++) {
getLogByFolder(workDirectorys[i], brave, false, nameMode);
}
}
} else { if (!logMode) {
getLogByFolder(targetPath, brave, true, nameMode);
}
getLogByFolder(targetPath, brave, false, nameMode);
}
, "").Trim();
string trip = "";
if (name.IndexOf("◆") >= 0) {
trip = name;
trip = trip.Substring(trip.IndexOf("◆") + 1, trip.Length - trip.IndexOf("◆") - 1);
trip = trip.Trim();
name = name.Substring(0, name.IndexOf("◆")).Trim();
}
if (trip!=null && !trip.Equals("") && brave.Trips.Contains(trip)
|| (nameMode.Equals("START") && name.StartsWith(brave.Name))
|| (nameMode.Equals("IN") && name.IndexOf(brave.Name)>=0)
|| (nameMode.Equals("ALL") && name.Equals(brave.Name))
|| (nameMode.Equals("END") && name.EndsWith(brave.Name))
) {
string dateId = matchCol[0].Groups[3].Value.Trim();
string id = dateId.Substring(dateId.IndexOf("ID:") + 3).Trim();
if (brave.DummyTrips.Contains(trip)) {
continue;
}
if (brave.DummyIDs.Contains(id)) {
continue;
}
if (!brave.Trips.Contains(trip)) {
brave.Trips.Add(trip);
}
if (!brave.IDs.Contains(id)) {
brave.IDs.Add(id);
}
}
}
}
}
} catch (Exception e) {
Console.WriteLine("Exception StackTrace = {0}", e.StackTrace);
ret = false;
} finally {
}
return ret;
}
private static Sure executeForTripID(FileInfo pInfo, Brave brave, string nameMode) {
Sure wSure = new Sure();
try {
string sure = ReadFile(pInfo);
string[] result = sure.Split('\n');
for (int i = 0; i < result.Length ; i++) {
if (result[i].Trim() != "") {
Regex regex = new System.Text.RegularExpressions.Regex(RES_PATTERN);
MatchCollection matchCol = regex.Matches(result[i].Trim());
if (matchCol.Count > 0) {
if (i == 0) {
wSure.Title = matchCol[0].Groups[5].Value.Trim();
}
Res wRes = new Res();
wRes.No = i + 1;
wRes.Name = matchCol[0].Groups[1].Value.Trim();
wRes.Name = wRes.Name.Replace("", "").Replace("", "");
if (wRes.Name.IndexOf("◆") >= 0) {
string trip = wRes.Name;
trip = trip.Substring(trip.IndexOf("◆") + 1, trip.Length - trip.IndexOf("◆") - 1);
wRes.Trip = trip.Trim();
wRes.Name = wRes.Name.Substring(0, wRes.Name.IndexOf("◆")).Trim();
}
wRes.Mail = matchCol[0].Groups[2].Value.Trim().Replace("sage", "").Replace("age", "");
string dateId = matchCol[0].Groups[3].Value.Trim();
if (dateId.IndexOf("ID:") >= 0) {
wRes.date = dateId.Substring(0, dateId.IndexOf("ID:")).Trim();
wRes.id = dateId.Substring(dateId.IndexOf("ID:") + 3).Trim();
} else {
wRes.date = dateId.Trim();
}
wRes.text = matchCol[0].Groups[4].Value.Trim();
if (new System.Text.RegularExpressions.Regex(@"\d{20}").Matches(wRes.text).Count > 0) {
wRes.text = "tikage と思われるので削除されました。";
}
if (wRes.Name.Equals("milky")) {
wRes.text = "milky と思われるので削除されました。";
}
wRes.text = Regex.Replace(wRes.text, "\\(.*?)", "$1");
wSure.Res.Add(wRes);
}
}
}
Boolean enforcedMode = System.Configuration.ConfigurationSettings.AppSettings["ENFORCED_MODE"].Equals("TRUE");
Boolean resMode = System.Configuration.ConfigurationSettings.AppSettings["RES_MODE"].Equals("TRUE");
foreach (Res r in wSure.Res) {
if (brave.DummyTrips.Contains(r.Trip)) {
continue;
}
else if (brave.DummyIDs.Contains(r.id)) {
continue;
}
else if ((nameMode.Equals("START") && r.Name.StartsWith(brave.Name))
|| (nameMode.Equals("IN") && r.Name.IndexOf(brave.Name)>=0)
|| (nameMode.Equals("ALL") && r.Name.Equals(brave.Name))
|| (nameMode.Equals("END") && r.Name.EndsWith(brave.Name))
) {
r.flag = Res.FLAG_RES;
}
else if (brave.Trips.Contains(r.Trip)) {
r.flag = Res.FLAG_RES;
}
else if (brave.IDs.Contains(r.id)) {
r.flag = Res.FLAG_RES;
}
else if (enforcedMode && r.Name.IndexOf(brave.Name) >= 0) {
r.flag = Res.FLAG_OTHER;
}
if (wSure.flag == 0 && r.flag != Res.FLAG_NO_OUTPUT) {
wSure.flag = 1;
wSure.date = r.date;
}
}
if (wSure.flag == 1) {
foreach (Res r in wSure.Res) {
if (r.flag == Res.FLAG_RES || r.flag == Res.FLAG_UNC) {
Regex regex = new System.Text.RegularExpressions.Regex(ANCHOR_PATTERN);
MatchCollection matchCol = regex.Matches(r.text);
if (matchCol.Count > 0) {
foreach (Match match in matchCol) {
try {
int index = ToNumber(match.Groups[2].Value);
int lastIndex = index;
if (match.Groups.Count > 3) {
string separete = match.Groups[3].Value.Trim();
if (separete.Equals("-")) {
lastIndex = ToNumber(match.Groups[4].Value);
}
if (separete.Equals(",")) {
int wIndex = ToNumber(match.Groups[4].Value);
if (wIndex > 1 && wIndex < wSure.Res.Count) {
if (r.No < wSure.Res[wIndex - 1].No || r.flag == Res.FLAG_UNC) {
SetFlag(wSure.Res[wIndex - 1], Res.FLAG_UNC);
} else {
SetFlag(wSure.Res[wIndex - 1], Res.FLAG_OTHER);
}
}
}
}
for (int i = index; i <= lastIndex; i++) {
if (i > 1 && i < wSure.Res.Count) {
if (r.No < wSure.Res[i - 1].No || r.flag == Res.FLAG_UNC) {
SetFlag(wSure.Res[i - 1], Res.FLAG_UNC);
} else {
SetFlag(wSure.Res[i - 1], Res.FLAG_OTHER);
}
}
}
} catch (Exception e) {
Console.WriteLine("text={0}\nException StackTrace = {1}", r.text, e.StackTrace);
}
}
}
if (r.flag == Res.FLAG_UNC) {
if (r.text.IndexOf("↑") >= 0) {
int index = r.No - 1;
if (index > 1 && index < wSure.Res.Count) {
SetFlag(wSure.Res[index - 1], Res.FLAG_UNC);
}
}
if (r.text.IndexOf("↓の↓") >= 0) {
int index = r.No + 2;
if (index > 1 && index < wSure.Res.Count) {
SetFlag(wSure.Res[index - 1], Res.FLAG_UNC);
}
} else
if (r.text.IndexOf("↓") >= 0) {
int index = r.No + 1;
if (index > 1 && index < wSure.Res.Count) {
SetFlag(wSure.Res[index - 1], Res.FLAG_UNC);
}
}
}
} else if (resMode) { if(r.No==1){
continue;
}
Regex regex = new System.Text.RegularExpressions.Regex(ANCHOR_PATTERN);
MatchCollection matchCol = regex.Matches(r.text);
if (matchCol.Count > 0) {
foreach (Match match in matchCol) {
try {
int index = ToNumber(match.Groups[2].Value);
int lastIndex = index;
if (match.Groups.Count > 3) {
string separete = match.Groups[3].Value.Trim();
if (separete.Equals("-")) {
lastIndex = ToNumber(match.Groups[4].Value);
}
if (separete.Equals(",")) {
int wIndex = ToNumber(match.Groups[4].Value);
if (wIndex > 0 && wIndex < wSure.Res.Count) {
if (wSure.Res[wIndex - 1].flag==Res.FLAG_RES) {
SetFlag(r, Res.FLAG_OTHER);
}
}
}
}
for (int i = index; i <= lastIndex; i++) {
if (i > 0 && i < wSure.Res.Count) {
if (wSure.Res[i - 1].flag == Res.FLAG_RES) {
SetFlag(r, Res.FLAG_OTHER);
}
}
}
} catch (Exception e) {
Console.WriteLine("text={0}\nException StackTrace = {1}", r.text, e.StackTrace);
throw new Exception("テキスト解析失敗", e);
}
}
}
}
}
}
} catch (Exception e) {
Console.WriteLine("Exception StackTrace = {0}", e.StackTrace);
throw new Exception("酉、ID抽出失敗", e);
} finally {
}
return wSure;
}
private static int ToNumber(string oldString) {
if (oldString==null || oldString.Trim().Equals("")) {
return 0;
}
if (new System.Text.RegularExpressions.Regex(@"[0-9]").Matches(oldString).Count > 0) {
oldString=oldString.Replace("0", "0")
.Replace("1", "1")
.Replace("2", "2")
.Replace("3", "3")
.Replace("4", "4")
.Replace("5", "5")
.Replace("6", "6")
.Replace("7", "7")
.Replace("8", "8")
.Replace("9", "9");
}
try {
return int.Parse(oldString);
} catch{
return 0;
}
}
private static void SetFlag(Res r,int flag) {
if (r.flag == Res.FLAG_RES) {
} else if (r.flag == Res.FLAG_YRES) {
} else if (r.flag == Res.FLAG_UNC) {
} else if (r.Trip == null || r.Trip.Equals("")) {
r.flag = flag;
} else {
r.flag = Res.FLAG_YRES;
}
}
private static string ReadFile(FileInfo pInfo) {
StreamReader streamReader = null;
string sure = "";
try {
streamReader = new System.IO.StreamReader(pInfo.OpenRead(), System.Text.Encoding.GetEncoding("sjis"));
sure = streamReader.ReadToEnd();
} catch (UnauthorizedAccessException e) {
Console.WriteLine("Exception StackTrace = {0}", e.StackTrace);
Console.WriteLine("ファイル({0})へのアクセスが許可されていません。 ", pInfo.Name);
} catch (FileNotFoundException e) {
Console.WriteLine("Exception StackTrace = {0}", e.StackTrace);
Console.WriteLine("ファイル({0})が見つかりませんでした。 ", pInfo.Name);
} finally {
if (streamReader != null) {
streamReader.Close();
}
}
return sure;
}
private static void OutHtml(FileInfo pInfo, Brave brave, Sure wSure) {
StringBuilder buf = MakeHtml(wSure);
bool outLastNameMode = System.Configuration.ConfigurationSettings.AppSettings["OUT_FILELAST_NAME"].Equals("TRUE");
DateTime outTime= new DateTime();
try {
if (wSure.date == null) {
outTime = DateTime.Parse(wSure.Res[0].date);
} else {
outTime = DateTime.Parse(wSure.date);
}
} catch {
}
string outPath = ConfigurationSettings.AppSettings["OUT_PATH"];
if (outPath == null || outPath.Equals("")) {
outPath = ".";
}
if (!Directory.Exists(outPath)){
outPath = ".";
}
string outName = ConfigurationSettings.AppSettings["OUT_FILENAME"];
if (outName == null || outName.Equals("")) {
if (AppendMode) {
outName = brave.Name;
} else if (outLastNameMode) {
outName = brave.Name + "_" + outTime.ToString(ConfigurationSettings.AppSettings["OUT_FILELAST_NAME_FORMAT"]);
} else {
outName = brave.Name + "_" + pInfo.Name.Substring(0, pInfo.Name.Length - 4);
}
} else {
if (AppendMode) {
} else if (outLastNameMode) {
outName = outName + "_" + outTime.ToString(ConfigurationSettings.AppSettings["OUT_FILELAST_NAME_FORMAT"]);
} else {
outName = outName + logCount.ToString(ConfigurationSettings.AppSettings["OUT_FORMAT"]);
logCount++;
}
}
string fileName=outPath + "/" + outName + ".html";
StreamWriter sw = new StreamWriter(fileName, true,System.Text.Encoding.GetEncoding(932));
sw.Write(buf);
sw.Close();
Boolean dateMode = System.Configuration.ConfigurationSettings.AppSettings["DATE_MODE"].Equals("TRUE");
if (dateMode) {
File.SetLastWriteTime(fileName, outTime);
}
}
private static StringBuilder MakeHtml(Sure wSure) {
StringBuilder buf = new StringBuilder();
if (TagMode) {
buf.Append("\n");
buf.Append(");
buf.Append(ConfigurationSettings.AppSettings["CSS_FILE"]);
buf.Append("\">\n");
buf.Append("\n");
}
buf.Append("
");
buf.Append(wSure.Title);
buf.Append("
\n");
foreach (Res r in wSure.Res) {
if (r.flag != Res.FLAG_NO_OUTPUT) {
buf.Append(r.No);
buf.Append(" ");
buf.Append(r.Name);
if (r.Trip == null || r.Trip.Equals("")) {
} else {
buf.Append("◆");
buf.Append(r.Trip);
}
buf.Append("[");
buf.Append(r.Mail);
buf.Append("] 投稿日:");
buf.Append(r.date);
buf.Append(" ID:");
buf.Append(r.id);
switch (r.flag) {
case Res.FLAG_RES:
buf.Append("");
break;
case Res.FLAG_UNC:
buf.Append(
"");
break;
case Res.FLAG_YRES:
buf.Append(
"");
break;
case Res.FLAG_OTHER:
buf.Append(
"");
break;
}
buf.Append(r.text);
buf.Append("
\n");
}
}
if (TagMode) {
buf.Append(
"\n");
}
return buf;
}
private class Brave {
static string Name_;
public string Name {
get {
return Name_;
}
set {
Name_ = value;
}
}
public List<
string> Trips =
new List<
string>();
public List<
string> IDs =
new List<
string>();
public List<
string> DummyTrips =
new List<
string>();
public List<
string> DummyIDs =
new List<
string>();
}
class Sure {
public string Title;
public List
Res = new List();
public int flag = 0;
public string date=null;
}
class Res {
public const int FLAG_NO_OUTPUT = 0;
public const int FLAG_RES = 1;
public const int FLAG_UNC = 2;
public const int FLAG_YRES = 3;
public const int FLAG_OTHER = 4;
public int No;
public string Name;
public string Trip;
public string Mail;
public string date;
public string id;
public string text;
public int flag = FLAG_NO_OUTPUT;
}
}
}