Cleanup and testing
This commit is contained in:
parent
8bcd07bb0b
commit
09d8ca3f2f
4 changed files with 9 additions and 88 deletions
|
@ -1,52 +1,23 @@
|
|||
package org.openaudible.audible;
|
||||
|
||||
import com.gargoylesoftware.htmlunit.html.DomNode;
|
||||
import com.gargoylesoftware.htmlunit.html.HtmlAnchor;
|
||||
import com.gargoylesoftware.htmlunit.html.HtmlPage;
|
||||
import com.google.gson.JsonArray;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.json.JSONArray;
|
||||
import org.json.JSONObject;
|
||||
import org.openaudible.books.Book;
|
||||
import org.openaudible.books.BookElement;
|
||||
import org.openaudible.util.HTMLUtil;
|
||||
import org.w3c.dom.Node;
|
||||
import org.w3c.dom.NodeList;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public enum BookPageParser {
|
||||
instance;
|
||||
|
||||
instance; // Singleton
|
||||
|
||||
private static final Log LOG = LogFactory.getLog(BookPageParser.class);
|
||||
public String extract(String c, DomNode h) {
|
||||
return HTMLUtil.text(HTMLUtil.findByClass(c, h));
|
||||
}
|
||||
|
||||
public String extractParagraph(String c, DomNode h) {
|
||||
String out = "";
|
||||
DomNode node = (DomNode) HTMLUtil.findByClass(c, h);
|
||||
if (node != null) {
|
||||
NodeList cn = node.getChildNodes();
|
||||
for (int x = 0; x < cn.getLength(); x++) {
|
||||
Node y = cn.item(x);
|
||||
String text = y.getTextContent();
|
||||
if (text != null) {
|
||||
text = text.trim();
|
||||
if (out.length() > 0)
|
||||
out += "\n";
|
||||
out += text;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
|
||||
// audible uses a lot of cdata. It is useful.
|
||||
List<String> getCDATATags(String html)
|
||||
{
|
||||
ArrayList<String> list = new ArrayList<>();
|
||||
|
@ -72,7 +43,7 @@ public enum BookPageParser {
|
|||
|
||||
public boolean parseBookPage(HtmlPage page, Book b) {
|
||||
DomNode h = page;
|
||||
HTMLUtil.debugNode(page, "book_info");
|
||||
// HTMLUtil.debugNode(page, "book_info");
|
||||
String xml = page.asXml();
|
||||
List<String> cdataList = getCDATATags(xml);
|
||||
for (String cd:cdataList)
|
||||
|
@ -102,33 +73,7 @@ public enum BookPageParser {
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
"image": "https://m.media-amazon.com/images/I/51u1om96bmL._SL500_.jpg",
|
||||
"@type": "Audiobook",
|
||||
"author": [{
|
||||
"@type": "Person",
|
||||
"name": "Amanda Hodgkinson"
|
||||
}],
|
||||
"readBy": [{
|
||||
"@type": "Person",
|
||||
"name": "Robin Sachs"
|
||||
}],
|
||||
"description": "<p>A tour de force that echoes modern classics like <i>Suite Francaise<\/i> and <i>The Postmistress<\/i>. <\/p><p>\"Housekeeper or housewife?\" the soldier asks Silvana as she and eight-year-old Aurek board the ship that will take them from Poland to England at the end of World War II. There her husband, Janusz, is already waiting for them at the little house at 22 Britannia Road. But the war has changed them all so utterly that they'll barely recognize one another when they are reunited. \"Survivor,\" she answers.<\/p><p>Silvana and Aurek spent the war hiding in the forests of Poland. Wild, almost feral Aurek doesn't know how to tie his own shoes or sleep in a bed. Janusz is an Englishman now-determined to forget Poland, forget his own ghosts from the way, and begin a new life as a proper English family. But for Silvana, who cannot escape the painful memory of a shattering wartime act, forgetting is not a possibility.<\/p>",
|
||||
"abridged": "false",
|
||||
"inLanguage": "english",
|
||||
"bookFormat": "AudiobookFormat",
|
||||
"@context": "https://schema.org",
|
||||
"datePublished": "2011-04-28",
|
||||
"duration": "PT11H19M",
|
||||
"name": "22 Britannia Road",
|
||||
"publisher": "Penguin Audio",
|
||||
"aggregateRating": {
|
||||
"@type": "AggregateRating",
|
||||
"ratingValue": "3.6842105263157894",
|
||||
"ratingCount": "171"
|
||||
}
|
||||
*/
|
||||
|
||||
// right now we only care about the @type AudioBook meta data.
|
||||
private void extractFromJSON(JSONObject obj, Book b) {
|
||||
String typ = obj.optString("@type");
|
||||
if (typ == null || typ.isEmpty())
|
||||
|
@ -136,14 +81,12 @@ public enum BookPageParser {
|
|||
if (!"AudioBook".equalsIgnoreCase(typ)) // && !"Product".equalsIgnoreCase(typ))
|
||||
return;
|
||||
|
||||
LOG.info(obj.toString(2));
|
||||
// LOG.info(obj.toString(2));
|
||||
|
||||
for (String k:obj.keySet())
|
||||
{
|
||||
System.out.println(k+" = "+ obj.get(k));
|
||||
Object value = obj.get(k);
|
||||
String str = value!=null ? value.toString():"";
|
||||
|
||||
BookElement elem = null;
|
||||
|
||||
switch(k)
|
||||
|
@ -195,7 +138,7 @@ public enum BookPageParser {
|
|||
elem = BookElement.publisher;
|
||||
break;
|
||||
default:
|
||||
LOG.info("Skipping "+k+" = "+ str);
|
||||
// LOG.info("Skipping "+k+" = "+ str);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -206,12 +149,7 @@ public enum BookPageParser {
|
|||
b.set(elem, str);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
@ -224,6 +162,7 @@ public enum BookPageParser {
|
|||
// "name": "Robin Racer"
|
||||
// }],
|
||||
|
||||
// array of 'person' objects.
|
||||
private String personToString(JSONArray arr) {
|
||||
String out = "";
|
||||
for (int x=0;x<arr.length();x++)
|
||||
|
@ -238,7 +177,6 @@ public enum BookPageParser {
|
|||
out += name;
|
||||
}
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
|
|
|
@ -124,7 +124,7 @@ public enum LibraryParser {
|
|||
if (r.getCells().size() != BookColumns.size()) {
|
||||
LOG.error("wrong number of columns found: " + r.getCells().size() + " != " + BookColumns.size());
|
||||
LOG.error(xml);
|
||||
HTMLUtil.debugNode(r, "bad_col.xml");
|
||||
if (debug) HTMLUtil.debugNode(r, "bad_col.xml");
|
||||
return null;
|
||||
}
|
||||
|
||||
|
@ -201,7 +201,7 @@ public enum LibraryParser {
|
|||
if (text.contains("by parts"))
|
||||
{
|
||||
LOG.error("error with title: "+text);
|
||||
HTMLUtil.debugNode(cell, col.name()+".xml");
|
||||
if (debug) HTMLUtil.debugNode(cell, col.name()+".xml");
|
||||
// bug check.
|
||||
}
|
||||
|
||||
|
|
|
@ -71,14 +71,8 @@ public class Book implements Comparable<Book>, Serializable {
|
|||
public boolean equals(Book that) {
|
||||
if (that==null) return false;
|
||||
if (this==that) return true;
|
||||
|
||||
boolean e1 = this.getProduct_id().equals(that.getProduct_id());
|
||||
// boolean e2 = this.getAsin().equals(that.getAsin());
|
||||
|
||||
// assert (e1 == e2);
|
||||
|
||||
return e1;
|
||||
|
||||
}
|
||||
|
||||
public boolean isOK() {
|
||||
|
|
|
@ -142,16 +142,6 @@ public class BookInfoPanel extends GridComposite implements BookListener {
|
|||
return c;
|
||||
}
|
||||
|
||||
private void updateTask(Book b) {
|
||||
String t = "";
|
||||
if (curBook != null) {
|
||||
|
||||
|
||||
}
|
||||
|
||||
task.setText(t);
|
||||
|
||||
}
|
||||
|
||||
private void update(Book b) {
|
||||
curBook = b;
|
||||
|
@ -311,7 +301,6 @@ public class BookInfoPanel extends GridComposite implements BookListener {
|
|||
task.setText(msg);
|
||||
}
|
||||
});
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue