google 翻譯爬蟲 java

package com.fly.design.translation.google;


import com.fly.design.translation.Language;
import com.fly.design.translation.Translator;
import org.junit.Test;

import java.io.IOException;

import static org.junit.Assert.*;

/**
 * @author weijun.zou
 * Create on 2018/1/18
 */
public class GoogleTranslatorTest {
    @Test
    public void translate()
            throws IOException, InterruptedException {
        Translator translator = new GoogleTranslator();
        assertEquals("Dollars",translator.translate("美元", Language.CHINESE,Language.ENGLISH));
        assertEquals("Peaceful",translator.translate("平安", Language.CHINESE,Language.ENGLISH));
        assertEquals("美元",translator.translate("Dollars", Language.ENGLISH,Language.CHINESE));
        assertEquals("一個",translator.translate("a", Language.ENGLISH,Language.CHINESE));
    }
}
package com.fly.design.translation;

/**
 * @author weijun.zou
 * Create on 2018/1/17
 */
public enum Language {
    CHINESE,ENGLISH
}
package com.fly.design.translation;

import java.io.IOException;

/**
 * @author weijun.zou
 * Create on 2018/1/17
 */
public interface Translator {
    String translate(String value, Language input,Language output) throws IOException, InterruptedException;
}
package com.fly.design.translation.google;

import com.fly.design.translation.Language;
import com.fly.design.translation.Translator;

import org.apache.http.NameValuePair;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.List;

import static com.fly.design.translation.Language.*;

/**
 * @author weijun.zou
 * Create on 2018/1/17
 */
public class GoogleTranslator implements Translator {

    private static final Logger log = LoggerFactory.getLogger(GoogleTranslator.class);
    private static final String PATH = "https://translate.google.cn/translate_a/single";
    private static final NameValuePair[] ARG_LIST = {
            new BasicNameValuePair("client", "t"),
            new BasicNameValuePair("hl", toArgs(Language.CHINESE)),
            new BasicNameValuePair("dt", "at"),
            new BasicNameValuePair("dt", "bd"),
            new BasicNameValuePair("dt", "ex"),
            new BasicNameValuePair("dt", "ld"),
            new BasicNameValuePair("dt", "md"),
            new BasicNameValuePair("dt", "qca"),
            new BasicNameValuePair("dt", "rw"),
            new BasicNameValuePair("dt", "rm"),
            new BasicNameValuePair("dt", "ss"),
            new BasicNameValuePair("dt", "t"),
            new BasicNameValuePair("ie", "UTF-8"),
            new BasicNameValuePair("oe", "UTF-8")
    };


    private HttpClient client = HttpClients.createDefault();

    @Override
    public String translate(String value, Language input, Language output)
            throws IOException, InterruptedException {
        HttpGet httpGet = new HttpGet(createURI(value.trim(), input, output));
        return client.execute(httpGet, response -> {
            String data = EntityUtils.toString(response.getEntity());
            int start = data.indexOf('\"') + 1;
            int end = data.indexOf('\"', start);
            log.info(data);
            return data.substring(start, end);
        });
    }


    private static URI createURI(String value, Language input, Language output) {
        try {
            return new URIBuilder()
                    .setPath(PATH)
                    .setParameters(ARG_LIST)
                    .addParameters(List.of(
                            new BasicNameValuePair("sl", toArgs(input)),
                            new BasicNameValuePair("tl", toArgs(output)),
                            new BasicNameValuePair("tk", getTk(value)),
                            new BasicNameValuePair("q", value)
                    )).build();
        } catch (URISyntaxException e) {
            log.error("構建google翻譯url出錯", e);
            throw new RuntimeException(e);
        }
    }


    private static String getTk(String values) {
        final String KEY = "+-a^+6";
        final String LAST_KEY = "+-3^+b+-f";
        final long INIT_NUM = 406644L;
        final long REMAIN_NUM = (long) 1E6;
        final long DECIMAL_OR_NUM = 406644L;
        final long FIRST_OR_NUM = 3293161072L;
        long token = toNums(values.toCharArray()).stream()
                .reduce(INIT_NUM, (t, u) -> encode(t + u, KEY));
        token = encode(token, LAST_KEY) ^ FIRST_OR_NUM;
        token = token < 0 ? (token & Integer.MAX_VALUE) + Integer.MAX_VALUE : token;
        token %= REMAIN_NUM;
        return token + "." + (token ^ DECIMAL_OR_NUM);
    }

    private static List<Long> toNums(char[] values) {
        List<Long> valueList = new ArrayList<>();
        for (int i = 0; i < values.length; i++) {
            long value = values[i];
            if (value <= Byte.MAX_VALUE) {
                valueList.add(value);
            } else if (value <= 2048) {
                valueList.add(value >> 6 | 192);
            } else {
                if (i + 1 < values.length
                        && (values[i + 1] & 64512) == 56320) {
                    value = 65536 + ((value & 1023) << 10) + (values[++i] & 1023);
                    valueList.add(value >> 18 | 240);
                    valueList.add(value >> 12 & 63 | 128);
                } else {
                    valueList.add(value >> 12 | 224);
                    valueList.add(value >> 6 & 63 | 128);
                }
                valueList.add(value & 63 | 128);
            }
        }
        return valueList;
    }

    private static long encode(long value, String key) {
        for (int i = 0; i < key.length() - 2; i += 3) {
            char char2 = key.charAt(i + 2);
            long status = char2 >= 'a' ? char2 - 87 : Long.valueOf(char2 + "");
            status = key.charAt(i + 1) == '+' ? value >>> status : value << status;
            value = key.charAt(i) == '+' ? value + status & 4294967295L : value ^ status;
        }
        return value;
    }


    private static String toArgs(Language language) {
        return language == CHINESE ? "zh-CN"
                : language == ENGLISH ? "en"
                : "";
    }
}

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章