Из парсера xml в парсер html

Discussion in 'С/С++, C#, Rust, Swift, Go, Java, Perl, Ruby' started by F&D, 30 Jun 2010.

  1. F&D

    F&D Member

    Joined:
    6 Nov 2008
    Messages:
    341
    Likes Received:
    8
    Reputations:
    5
    Здрасте всем!
    Написал парсер Xml, теперь вот хочу сделать по его подобию парсер html страниц. чтобы страницу ввел и отпарсило. Да только вот туго у меня с этим =(

    Code:
            private XmlDocument mDocument;
            private XmlNode mCurrentNode;
            public Form1()
            {
                InitializeComponent();
                mDocument = new XmlDocument();
                mDocument.Load(@"путь к файлу");
                mCurrentNode = mDocument.DocumentElement;
                ClearListBox();
            }
            private void DisplayList(XmlNodeList nodeList)
            {
                foreach (XmlNode node in nodeList)
                {
                    RecurseXmlDocumentNoSiblings(node, 0);
                }
            }
            private void RecurseXmlDocumentNoSiblings(XmlNode root, int indent)
            {
                //ничего не делать, если root равен null
                if (root == null)
                    return;
                if (root is XmlElement) 
                {
                    
                    listBoxResult.Items.Add(root.Name.PadLeft(root.Name.Length + indent));
                    
                    if (root.HasChildNodes)
                        RecurseXmlDocument(root.FirstChild, indent + 2);
                }
                else if (root is XmlText)
                {
    
                    string text = ((XmlText)root).Value;
                    listBoxResult.Items.Add(text.PadLeft(text.Length + indent));
                }
                else if (root is XmlComment)
                {
    
                    string text = root.Value;
                    listBoxResult.Items.Add(text.PadLeft(text.Length + indent));
             
                    if (root.HasChildNodes)
                        RecurseXmlDocument(root.FirstChild, indent + 2);
                }
            }
            private void RecurseXmlDocument(XmlNode root, int indent)
            {
                if (root == null)
                    return;
                if (root is XmlElement) 
                {
                    listBoxResult.Items.Add(root.Name.PadLeft(root.Name.Length + indent));
                   
                    if (root.HasChildNodes)
                        RecurseXmlDocument(root.FirstChild, indent + 2);
    
                    if (root.NextSibling != null)
                        RecurseXmlDocument(root.NextSibling, indent);
                }
                else if (root is XmlText)
                {
                    string text = ((XmlText)root).Value;
                    listBoxResult.Items.Add(text.PadLeft(text.Length + indent));
                }
    
                else if (root is XmlComment)
                {
                    string text = root.Value;
                    listBoxResult.Items.Add(text.PadLeft(text.Length + indent));
                   
                    if (root.HasChildNodes)
                        RecurseXmlDocument(root.FirstChild, indent + 2);
                    
                    if (root.NextSibling != null)
                        RecurseXmlDocument(root.NextSibling, indent);
                }
            }
            private void ClearListBox()
            {
                listBoxResult.Items.Clear();
            }
    
            private void button1_Click(object sender, EventArgs e)
            {
                Application.Exit();
            }
    
            private void radioButton1_CheckedChanged(object sender, EventArgs e)
            {
                mCurrentNode = mDocument.DocumentElement.SelectSingleNode("//books");
                ClearListBox();
                RecurseXmlDocument(mCurrentNode, 0);
            }
    
            private void radioButton2_CheckedChanged(object sender, EventArgs e)
            {
                if (mCurrentNode != null)
                {
                    XmlNodeList nodeList = mCurrentNode.SelectNodes("//book/author");
                    ClearListBox();
                    DisplayList(nodeList);
                }
                else
                    ClearListBox();
            }
    
            private void radioButton3_CheckedChanged(object sender, EventArgs e)
            {
                if (mCurrentNode != null)
                {
                    XmlNodeList nodeList = mCurrentNode.SelectNodes("//book[author='Jacob Hammer Pedersen']");
                    ClearListBox();
                    DisplayList(nodeList);
                }
                else
                    ClearListBox();
            }
    
            private void radioButton4_CheckedChanged(object sender, EventArgs e)
            {
                if (mCurrentNode != null)
                {
                    XmlNodeList nodeList = mCurrentNode.SelectNodes("//item");
                    ClearListBox();
                    DisplayList(nodeList);
                }
                else
                    ClearListBox();
            }
    
            private void radioButton5_CheckedChanged(object sender, EventArgs e)
            {
                if (mCurrentNode != null)
                {
                    mCurrentNode = mCurrentNode.SelectSingleNode("book[title='Beginnig Visual C#']");
                    ClearListBox();
                    RecurseXmlDocumentNoSiblings(mCurrentNode, 0);
                }
                else
                    ClearListBox();
            }
    
            private void radioButton6_CheckedChanged(object sender, EventArgs e)
            {
                if (mCurrentNode != null)
                {
                    mCurrentNode = mCurrentNode.SelectSingleNode("//books");
                    ClearListBox();
                    RecurseXmlDocumentNoSiblings(mCurrentNode, 0);
                }
                else
                    ClearListBox();
            }
    
            private void radioButton7_CheckedChanged(object sender, EventArgs e)
            {
                if (mCurrentNode != null)
                {
                    XmlNodeList nodeList = mCurrentNode.SelectNodes("*");
                    ClearListBox();
                    DisplayList(nodeList);
                }
                else
                    ClearListBox();
            }
    
            private void radioButton8_CheckedChanged(object sender, EventArgs e)
            {
                if (mCurrentNode != null)
                {
                    mCurrentNode = mCurrentNode.SelectSingleNode("book[@pages='1000']");
                    ClearListBox();
                    RecurseXmlDocumentNoSiblings(mCurrentNode, 0);
                }
                else
                    ClearListBox();
            }
            
    
    
            private void button2_Click(object sender, EventArgs e)
            {
                if (textBoxQuery.Text == "")
                    return;
                try
                {
                    XmlNodeList nodeList = mCurrentNode.SelectNodes(textBoxQuery.Text);
                    ClearListBox();
                    DisplayList(nodeList);
                }
                catch (System.Exception err)
                {
                    MessageBox.Show(err.Message);
                }
    
            }
    
    
        }
    }
    
    Помогите плиз преобразовать парсер xml в парсер html...